clang 22.0.0git
avx512fintrin.h
Go to the documentation of this file.
1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
15
16typedef char __v64qi __attribute__((__vector_size__(64)));
17typedef short __v32hi __attribute__((__vector_size__(64)));
18typedef double __v8df __attribute__((__vector_size__(64)));
19typedef float __v16sf __attribute__((__vector_size__(64)));
20typedef long long __v8di __attribute__((__vector_size__(64)));
21typedef int __v16si __attribute__((__vector_size__(64)));
22
23/* Unsigned types */
24typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28
29/* We need an explicitly signed variant for char. Note that this shouldn't
30 * appear in the interface though. */
31typedef signed char __v64qs __attribute__((__vector_size__(64)));
32
33typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
34typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
35typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
36
37typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
38typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
39typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
40
41typedef unsigned char __mmask8;
42typedef unsigned short __mmask16;
43
44/* Rounding mode macros. */
45#define _MM_FROUND_TO_NEAREST_INT 0x00
46#define _MM_FROUND_TO_NEG_INF 0x01
47#define _MM_FROUND_TO_POS_INF 0x02
48#define _MM_FROUND_TO_ZERO 0x03
49#define _MM_FROUND_CUR_DIRECTION 0x04
50
51/* Constants for integer comparison predicates */
52typedef enum {
53 _MM_CMPINT_EQ, /* Equal */
54 _MM_CMPINT_LT, /* Less than */
55 _MM_CMPINT_LE, /* Less than or Equal */
57 _MM_CMPINT_NE, /* Not Equal */
58 _MM_CMPINT_NLT, /* Not Less than */
59#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
60 _MM_CMPINT_NLE /* Not Less than or Equal */
61#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
63
64typedef enum
65{
153
154typedef enum
155{
156 _MM_MANT_NORM_1_2, /* interval [1, 2) */
157 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
158 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
159 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
161
162typedef enum
163{
164 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
165 _MM_MANT_SIGN_zero, /* sign = 0 */
166 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
168
169/* Define the default attributes for the functions in this file. */
170#define __DEFAULT_FN_ATTRS512 \
171 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
172 __min_vector_width__(512)))
173#define __DEFAULT_FN_ATTRS128 \
174 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
175 __min_vector_width__(128)))
176#define __DEFAULT_FN_ATTRS \
177 __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
178
179#if defined(__cplusplus) && (__cplusplus >= 201103L)
180#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
181#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
182#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
183#else
184#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
185#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
186#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
187#endif
188
189/* Create vectors with repeated elements */
190
191static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
193 return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0};
194}
195
196#define _mm512_setzero_epi32 _mm512_setzero_si512
197
198static __inline__ __m512d __DEFAULT_FN_ATTRS512
200{
201 return (__m512d)__builtin_ia32_undef512();
202}
203
204static __inline__ __m512 __DEFAULT_FN_ATTRS512
206{
207 return (__m512)__builtin_ia32_undef512();
208}
209
210static __inline__ __m512 __DEFAULT_FN_ATTRS512
212{
213 return (__m512)__builtin_ia32_undef512();
214}
215
216static __inline__ __m512i __DEFAULT_FN_ATTRS512
218{
219 return (__m512i)__builtin_ia32_undef512();
220}
221
222static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
224 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
226}
227
228static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
229_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) {
230 return (__m512i)__builtin_ia32_selectd_512(__M,
231 (__v16si) _mm512_broadcastd_epi32(__A),
232 (__v16si) __O);
233}
234
235static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
237 return (__m512i)__builtin_ia32_selectd_512(__M,
238 (__v16si) _mm512_broadcastd_epi32(__A),
239 (__v16si) _mm512_setzero_si512());
240}
241
242static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
244 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
245 0, 0, 0, 0, 0, 0, 0, 0);
246}
247
248static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
249_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) {
250 return (__m512i)__builtin_ia32_selectq_512(
251 __M, (__v8di)_mm512_broadcastq_epi64(__A), (__v8di)__O);
252}
253
254static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
256 return (__m512i)__builtin_ia32_selectq_512(__M,
257 (__v8di) _mm512_broadcastq_epi64(__A),
258 (__v8di) _mm512_setzero_si512());
259}
260
262 return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
263 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
264}
265
266#define _mm512_setzero _mm512_setzero_ps
267
268static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
270 return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
271}
272
273static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
275{
276 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
277 __w, __w, __w, __w, __w, __w, __w, __w };
278}
279
280static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
281_mm512_set1_pd(double __w)
282{
283 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
284}
285
286static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
288{
289 return __extension__ (__m512i)(__v64qi){
290 __w, __w, __w, __w, __w, __w, __w, __w,
291 __w, __w, __w, __w, __w, __w, __w, __w,
292 __w, __w, __w, __w, __w, __w, __w, __w,
293 __w, __w, __w, __w, __w, __w, __w, __w,
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w,
296 __w, __w, __w, __w, __w, __w, __w, __w,
297 __w, __w, __w, __w, __w, __w, __w, __w };
298}
299
300static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
302{
303 return __extension__ (__m512i)(__v32hi){
304 __w, __w, __w, __w, __w, __w, __w, __w,
305 __w, __w, __w, __w, __w, __w, __w, __w,
306 __w, __w, __w, __w, __w, __w, __w, __w,
307 __w, __w, __w, __w, __w, __w, __w, __w };
308}
309
310static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
312{
313 return __extension__ (__m512i)(__v16si){
314 __s, __s, __s, __s, __s, __s, __s, __s,
315 __s, __s, __s, __s, __s, __s, __s, __s };
316}
317
318static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
320 return (__m512i)__builtin_ia32_selectd_512(__M,
321 (__v16si)_mm512_set1_epi32(__A),
322 (__v16si)_mm512_setzero_si512());
323}
324
325static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
326_mm512_set1_epi64(long long __d)
327{
328 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
329}
330
331static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
333 return (__m512i)__builtin_ia32_selectq_512(__M,
334 (__v8di)_mm512_set1_epi64(__A),
335 (__v8di)_mm512_setzero_si512());
336}
337
338static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
340 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
342}
343
344static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
345_mm512_set4_epi32(int __A, int __B, int __C, int __D) {
346 return __extension__ (__m512i)(__v16si)
347 { __D, __C, __B, __A, __D, __C, __B, __A,
348 __D, __C, __B, __A, __D, __C, __B, __A };
349}
350
351static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
352_mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D) {
353 return __extension__ (__m512i) (__v8di)
354 { __D, __C, __B, __A, __D, __C, __B, __A };
355}
356
357static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
358_mm512_set4_pd(double __A, double __B, double __C, double __D) {
359 return __extension__ (__m512d)
360 { __D, __C, __B, __A, __D, __C, __B, __A };
361}
362
363static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
364_mm512_set4_ps(float __A, float __B, float __C, float __D) {
365 return __extension__ (__m512)
366 { __D, __C, __B, __A, __D, __C, __B, __A,
367 __D, __C, __B, __A, __D, __C, __B, __A };
368}
369
370static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
371_mm512_setr4_epi32(int e0, int e1, int e2, int e3) {
372 return _mm512_set4_epi32(e3, e2, e1, e0);
373}
374
375static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
376_mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3) {
377 return _mm512_set4_epi64(e3, e2, e1, e0);
378}
379
380static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
381_mm512_setr4_pd(double e0, double e1, double e2, double e3) {
382 return _mm512_set4_pd(e3, e2, e1, e0);
383}
384
385static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
386_mm512_setr4_ps(float e0, float e1, float e2, float e3) {
387 return _mm512_set4_ps(e3, e2, e1, e0);
388}
389
390static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
392 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
393 0, 0, 0, 0, 0, 0, 0, 0);
394}
395
396/* Cast between vector types */
397
398static __inline __m512d __DEFAULT_FN_ATTRS512
400{
401 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
402 1, 2, 3, 4, 5, 6, 7);
403}
404
405static __inline __m512 __DEFAULT_FN_ATTRS512
407{
408 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
409 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
410}
411
412static __inline __m128d __DEFAULT_FN_ATTRS512
414{
415 return __builtin_shufflevector(__a, __a, 0, 1);
416}
417
418static __inline __m256d __DEFAULT_FN_ATTRS512
420{
421 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
422}
423
424static __inline __m128 __DEFAULT_FN_ATTRS512
426{
427 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
428}
429
430static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
432 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
433}
434
435static __inline __m512 __DEFAULT_FN_ATTRS512
436_mm512_castpd_ps (__m512d __A)
437{
438 return (__m512) (__A);
439}
440
441static __inline __m512i __DEFAULT_FN_ATTRS512
443{
444 return (__m512i) (__A);
445}
446
447static __inline__ __m512d __DEFAULT_FN_ATTRS512
449{
450 __m256d __B = __builtin_nondeterministic_value(__B);
451 return __builtin_shufflevector(
452 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
453 __B, 0, 1, 2, 3, 4, 5, 6, 7);
454}
455
456static __inline __m512d __DEFAULT_FN_ATTRS512
458{
459 return (__m512d) (__A);
460}
461
462static __inline __m512i __DEFAULT_FN_ATTRS512
464{
465 return (__m512i) (__A);
466}
467
468static __inline__ __m512 __DEFAULT_FN_ATTRS512
470{
471 __m256 __B = __builtin_nondeterministic_value(__B);
472 return __builtin_shufflevector(
473 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
474 __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
475}
476
477static __inline__ __m512i __DEFAULT_FN_ATTRS512
479{
480 __m256i __B = __builtin_nondeterministic_value(__B);
481 return __builtin_shufflevector(
482 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
483 __B, 0, 1, 2, 3, 4, 5, 6, 7);
484}
485
486static __inline__ __m512i __DEFAULT_FN_ATTRS512
488{
489 return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
490}
491
492static __inline __m512 __DEFAULT_FN_ATTRS512
494{
495 return (__m512) (__A);
496}
497
498static __inline __m512d __DEFAULT_FN_ATTRS512
500{
501 return (__m512d) (__A);
502}
503
504static __inline __m128i __DEFAULT_FN_ATTRS512
506{
507 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
508}
509
510static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR
512 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
513}
514
517 return (__mmask16)__a;
518}
519
520static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
522 return (int)__a;
523}
524
525/// Constructs a 512-bit floating-point vector of [8 x double] from a
526/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
527/// contain the value of the source vector. The upper 384 bits are set
528/// to zero.
529///
530/// \headerfile <x86intrin.h>
531///
532/// This intrinsic has no corresponding instruction.
533///
534/// \param __a
535/// A 128-bit vector of [2 x double].
536/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
537/// contain the value of the parameter. The upper 384 bits are set to zero.
538static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
540 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
541}
542
543/// Constructs a 512-bit floating-point vector of [8 x double] from a
544/// 256-bit floating-point vector of [4 x double]. The lower 256 bits
545/// contain the value of the source vector. The upper 256 bits are set
546/// to zero.
547///
548/// \headerfile <x86intrin.h>
549///
550/// This intrinsic has no corresponding instruction.
551///
552/// \param __a
553/// A 256-bit vector of [4 x double].
554/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
555/// contain the value of the parameter. The upper 256 bits are set to zero.
556static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
558 return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
559}
560
561/// Constructs a 512-bit floating-point vector of [16 x float] from a
562/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
563/// the value of the source vector. The upper 384 bits are set to zero.
564///
565/// \headerfile <x86intrin.h>
566///
567/// This intrinsic has no corresponding instruction.
568///
569/// \param __a
570/// A 128-bit vector of [4 x float].
571/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
572/// contain the value of the parameter. The upper 384 bits are set to zero.
573static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
575 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
576}
577
578/// Constructs a 512-bit floating-point vector of [16 x float] from a
579/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
580/// the value of the source vector. The upper 256 bits are set to zero.
581///
582/// \headerfile <x86intrin.h>
583///
584/// This intrinsic has no corresponding instruction.
585///
586/// \param __a
587/// A 256-bit vector of [8 x float].
588/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
589/// contain the value of the parameter. The upper 256 bits are set to zero.
590static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
592 return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
593}
594
595/// Constructs a 512-bit integer vector from a 128-bit integer vector.
596/// The lower 128 bits contain the value of the source vector. The upper
597/// 384 bits are set to zero.
598///
599/// \headerfile <x86intrin.h>
600///
601/// This intrinsic has no corresponding instruction.
602///
603/// \param __a
604/// A 128-bit integer vector.
605/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
606/// the parameter. The upper 384 bits are set to zero.
607static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
609 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
610}
611
612/// Constructs a 512-bit integer vector from a 256-bit integer vector.
613/// The lower 256 bits contain the value of the source vector. The upper
614/// 256 bits are set to zero.
615///
616/// \headerfile <x86intrin.h>
617///
618/// This intrinsic has no corresponding instruction.
619///
620/// \param __a
621/// A 256-bit integer vector.
622/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
623/// the parameter. The upper 256 bits are set to zero.
624static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
626 return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
627}
628
629/* Bitwise operators */
630static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
631_mm512_and_epi32(__m512i __a, __m512i __b)
632{
633 return (__m512i)((__v16su)__a & (__v16su)__b);
634}
635
636static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
637_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) {
638 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
639 (__v16si) _mm512_and_epi32(__a, __b),
640 (__v16si) __src);
641}
642
643static __inline__ __m512i __DEFAULT_FN_ATTRS512
645{
647 __k, __a, __b);
648}
649
650static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
651_mm512_and_epi64(__m512i __a, __m512i __b)
652{
653 return (__m512i)((__v8du)__a & (__v8du)__b);
654}
655
656static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
657_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) {
658 return (__m512i)__builtin_ia32_selectq_512(
659 (__mmask8)__k, (__v8di)_mm512_and_epi64(__a, __b), (__v8di)__src);
660}
661
662static __inline__ __m512i __DEFAULT_FN_ATTRS512
664{
666 __k, __a, __b);
667}
668
669static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
670_mm512_andnot_si512 (__m512i __A, __m512i __B)
671{
672 return (__m512i)(~(__v8du)__A & (__v8du)__B);
673}
674
675static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
676_mm512_andnot_epi32 (__m512i __A, __m512i __B)
677{
678 return (__m512i)(~(__v16su)__A & (__v16su)__B);
679}
680
681static __inline__ __m512i __DEFAULT_FN_ATTRS512
682_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
683{
684 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
685 (__v16si)_mm512_andnot_epi32(__A, __B),
686 (__v16si)__W);
687}
688
689static __inline__ __m512i __DEFAULT_FN_ATTRS512
690_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
691{
693 __U, __A, __B);
694}
695
696static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
697_mm512_andnot_epi64(__m512i __A, __m512i __B)
698{
699 return (__m512i)(~(__v8du)__A & (__v8du)__B);
700}
701
702static __inline__ __m512i __DEFAULT_FN_ATTRS512
703_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
704{
705 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
706 (__v8di)_mm512_andnot_epi64(__A, __B),
707 (__v8di)__W);
708}
709
710static __inline__ __m512i __DEFAULT_FN_ATTRS512
711_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
712{
714 __U, __A, __B);
715}
716
717static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
718_mm512_or_epi32(__m512i __a, __m512i __b)
719{
720 return (__m512i)((__v16su)__a | (__v16su)__b);
721}
722
723static __inline__ __m512i __DEFAULT_FN_ATTRS512
724_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
725{
726 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
727 (__v16si)_mm512_or_epi32(__a, __b),
728 (__v16si)__src);
729}
730
731static __inline__ __m512i __DEFAULT_FN_ATTRS512
733{
734 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
735}
736
737static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
738_mm512_or_epi64(__m512i __a, __m512i __b)
739{
740 return (__m512i)((__v8du)__a | (__v8du)__b);
741}
742
743static __inline__ __m512i __DEFAULT_FN_ATTRS512
744_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
745{
746 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
747 (__v8di)_mm512_or_epi64(__a, __b),
748 (__v8di)__src);
749}
750
751static __inline__ __m512i __DEFAULT_FN_ATTRS512
752_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
753{
754 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
755}
756
757static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
758_mm512_xor_epi32(__m512i __a, __m512i __b)
759{
760 return (__m512i)((__v16su)__a ^ (__v16su)__b);
761}
762
763static __inline__ __m512i __DEFAULT_FN_ATTRS512
764_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
765{
766 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
767 (__v16si)_mm512_xor_epi32(__a, __b),
768 (__v16si)__src);
769}
770
771static __inline__ __m512i __DEFAULT_FN_ATTRS512
773{
774 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
775}
776
777static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
778_mm512_xor_epi64(__m512i __a, __m512i __b)
779{
780 return (__m512i)((__v8du)__a ^ (__v8du)__b);
781}
782
783static __inline__ __m512i __DEFAULT_FN_ATTRS512
784_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
785{
786 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
787 (__v8di)_mm512_xor_epi64(__a, __b),
788 (__v8di)__src);
789}
790
791static __inline__ __m512i __DEFAULT_FN_ATTRS512
793{
794 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
795}
796
797static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
798_mm512_and_si512(__m512i __a, __m512i __b)
799{
800 return (__m512i)((__v8du)__a & (__v8du)__b);
801}
802
803static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
804_mm512_or_si512(__m512i __a, __m512i __b)
805{
806 return (__m512i)((__v8du)__a | (__v8du)__b);
807}
808
809static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
810_mm512_xor_si512(__m512i __a, __m512i __b)
811{
812 return (__m512i)((__v8du)__a ^ (__v8du)__b);
813}
814
815/* Arithmetic */
816
817static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
818_mm512_add_pd(__m512d __a, __m512d __b) {
819 return (__m512d)((__v8df)__a + (__v8df)__b);
820}
821
822static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
823_mm512_add_ps(__m512 __a, __m512 __b) {
824 return (__m512)((__v16sf)__a + (__v16sf)__b);
825}
826
827static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
828_mm512_mul_pd(__m512d __a, __m512d __b) {
829 return (__m512d)((__v8df)__a * (__v8df)__b);
830}
831
832static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
833_mm512_mul_ps(__m512 __a, __m512 __b) {
834 return (__m512)((__v16sf)__a * (__v16sf)__b);
835}
836
837static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
838_mm512_sub_pd(__m512d __a, __m512d __b) {
839 return (__m512d)((__v8df)__a - (__v8df)__b);
840}
841
842static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
843_mm512_sub_ps(__m512 __a, __m512 __b) {
844 return (__m512)((__v16sf)__a - (__v16sf)__b);
845}
846
847static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
848_mm512_add_epi64(__m512i __A, __m512i __B) {
849 return (__m512i) ((__v8du) __A + (__v8du) __B);
850}
851
852static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
853_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
854 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
855 (__v8di)_mm512_add_epi64(__A, __B),
856 (__v8di)__W);
857}
858
859static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
860_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
861 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
862 (__v8di)_mm512_add_epi64(__A, __B),
863 (__v8di)_mm512_setzero_si512());
864}
865
866static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
867_mm512_sub_epi64(__m512i __A, __m512i __B) {
868 return (__m512i) ((__v8du) __A - (__v8du) __B);
869}
870
871static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
872_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
873 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
874 (__v8di)_mm512_sub_epi64(__A, __B),
875 (__v8di)__W);
876}
877
878static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
879_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
880 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
881 (__v8di)_mm512_sub_epi64(__A, __B),
882 (__v8di)_mm512_setzero_si512());
883}
884
885static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
886_mm512_add_epi32(__m512i __A, __m512i __B) {
887 return (__m512i) ((__v16su) __A + (__v16su) __B);
888}
889
890static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
891_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
892 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
893 (__v16si)_mm512_add_epi32(__A, __B),
894 (__v16si)__W);
895}
896
897static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
898_mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
899 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
900 (__v16si)_mm512_add_epi32(__A, __B),
901 (__v16si)_mm512_setzero_si512());
902}
903
904static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
905_mm512_sub_epi32(__m512i __A, __m512i __B) {
906 return (__m512i) ((__v16su) __A - (__v16su) __B);
907}
908
909static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
910_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
911 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
912 (__v16si)_mm512_sub_epi32(__A, __B),
913 (__v16si)__W);
914}
915
916static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
917_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
918 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
919 (__v16si)_mm512_sub_epi32(__A, __B),
920 (__v16si)_mm512_setzero_si512());
921}
922
923#define _mm512_max_round_pd(A, B, R) \
924 ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
925 (__v8df)(__m512d)(B), (int)(R)))
926
927#define _mm512_mask_max_round_pd(W, U, A, B, R) \
928 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
929 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
930 (__v8df)(W)))
931
932#define _mm512_maskz_max_round_pd(U, A, B, R) \
933 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
934 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
935 (__v8df)_mm512_setzero_pd()))
936
937static __inline__ __m512d __DEFAULT_FN_ATTRS512
938_mm512_max_pd(__m512d __A, __m512d __B)
939{
940 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
942}
943
944static __inline__ __m512d __DEFAULT_FN_ATTRS512
945_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
946{
947 return (__m512d)__builtin_ia32_selectpd_512(__U,
948 (__v8df)_mm512_max_pd(__A, __B),
949 (__v8df)__W);
950}
951
952static __inline__ __m512d __DEFAULT_FN_ATTRS512
953_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
954{
955 return (__m512d)__builtin_ia32_selectpd_512(__U,
956 (__v8df)_mm512_max_pd(__A, __B),
957 (__v8df)_mm512_setzero_pd());
958}
959
960#define _mm512_max_round_ps(A, B, R) \
961 ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
962 (__v16sf)(__m512)(B), (int)(R)))
963
964#define _mm512_mask_max_round_ps(W, U, A, B, R) \
965 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
966 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
967 (__v16sf)(W)))
968
969#define _mm512_maskz_max_round_ps(U, A, B, R) \
970 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
971 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
972 (__v16sf)_mm512_setzero_ps()))
973
974static __inline__ __m512 __DEFAULT_FN_ATTRS512
975_mm512_max_ps(__m512 __A, __m512 __B)
976{
977 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
979}
980
981static __inline__ __m512 __DEFAULT_FN_ATTRS512
982_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
983{
984 return (__m512)__builtin_ia32_selectps_512(__U,
985 (__v16sf)_mm512_max_ps(__A, __B),
986 (__v16sf)__W);
987}
988
989static __inline__ __m512 __DEFAULT_FN_ATTRS512
990_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
991{
992 return (__m512)__builtin_ia32_selectps_512(__U,
993 (__v16sf)_mm512_max_ps(__A, __B),
994 (__v16sf)_mm512_setzero_ps());
995}
996
997static __inline__ __m128 __DEFAULT_FN_ATTRS128
998_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
999 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1000 (__v4sf) __B,
1001 (__v4sf) __W,
1002 (__mmask8) __U,
1004}
1005
1006static __inline__ __m128 __DEFAULT_FN_ATTRS128
1007_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1008 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1009 (__v4sf) __B,
1010 (__v4sf) _mm_setzero_ps (),
1011 (__mmask8) __U,
1013}
1014
1015#define _mm_max_round_ss(A, B, R) \
1016 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1017 (__v4sf)(__m128)(B), \
1018 (__v4sf)_mm_setzero_ps(), \
1019 (__mmask8)-1, (int)(R)))
1020
1021#define _mm_mask_max_round_ss(W, U, A, B, R) \
1022 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1023 (__v4sf)(__m128)(B), \
1024 (__v4sf)(__m128)(W), (__mmask8)(U), \
1025 (int)(R)))
1026
1027#define _mm_maskz_max_round_ss(U, A, B, R) \
1028 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1029 (__v4sf)(__m128)(B), \
1030 (__v4sf)_mm_setzero_ps(), \
1031 (__mmask8)(U), (int)(R)))
1032
1033static __inline__ __m128d __DEFAULT_FN_ATTRS128
1034_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1035 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1036 (__v2df) __B,
1037 (__v2df) __W,
1038 (__mmask8) __U,
1040}
1041
1042static __inline__ __m128d __DEFAULT_FN_ATTRS128
1043_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1044 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1045 (__v2df) __B,
1046 (__v2df) _mm_setzero_pd (),
1047 (__mmask8) __U,
1049}
1050
1051#define _mm_max_round_sd(A, B, R) \
1052 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1053 (__v2df)(__m128d)(B), \
1054 (__v2df)_mm_setzero_pd(), \
1055 (__mmask8)-1, (int)(R)))
1056
1057#define _mm_mask_max_round_sd(W, U, A, B, R) \
1058 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1059 (__v2df)(__m128d)(B), \
1060 (__v2df)(__m128d)(W), \
1061 (__mmask8)(U), (int)(R)))
1062
1063#define _mm_maskz_max_round_sd(U, A, B, R) \
1064 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1065 (__v2df)(__m128d)(B), \
1066 (__v2df)_mm_setzero_pd(), \
1067 (__mmask8)(U), (int)(R)))
1068
1069static __inline __m512i
1071 return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1072}
1073
1074static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1075_mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1076 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1077 (__v16si)_mm512_max_epi32(__A, __B),
1078 (__v16si)__W);
1079}
1080
1081static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1082_mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1083 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1084 (__v16si)_mm512_max_epi32(__A, __B),
1085 (__v16si)_mm512_setzero_si512());
1086}
1087
1088static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1089_mm512_max_epu32(__m512i __A, __m512i __B) {
1090 return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1091}
1092
1093static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1094_mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1095 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1096 (__v16si)_mm512_max_epu32(__A, __B),
1097 (__v16si)__W);
1098}
1099
1100static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1101_mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
1102 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1103 (__v16si)_mm512_max_epu32(__A, __B),
1104 (__v16si)_mm512_setzero_si512());
1105}
1106
1107static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1108_mm512_max_epi64(__m512i __A, __m512i __B) {
1109 return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1110}
1111
1112static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1113_mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1114 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1115 (__v8di)_mm512_max_epi64(__A, __B),
1116 (__v8di)__W);
1117}
1118
1119static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1120_mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
1121 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1122 (__v8di)_mm512_max_epi64(__A, __B),
1123 (__v8di)_mm512_setzero_si512());
1124}
1125
1126static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1127_mm512_max_epu64(__m512i __A, __m512i __B) {
1128 return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1129}
1130
1131static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1132_mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1133 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1134 (__v8di)_mm512_max_epu64(__A, __B),
1135 (__v8di)__W);
1136}
1137
1138static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1139_mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
1140 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1141 (__v8di)_mm512_max_epu64(__A, __B),
1142 (__v8di)_mm512_setzero_si512());
1143}
1144
1145#define _mm512_min_round_pd(A, B, R) \
1146 ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1147 (__v8df)(__m512d)(B), (int)(R)))
1148
1149#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1150 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1151 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1152 (__v8df)(W)))
1153
1154#define _mm512_maskz_min_round_pd(U, A, B, R) \
1155 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1156 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1157 (__v8df)_mm512_setzero_pd()))
1158
1159static __inline__ __m512d __DEFAULT_FN_ATTRS512
1160_mm512_min_pd(__m512d __A, __m512d __B)
1161{
1162 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1164}
1165
1166static __inline__ __m512d __DEFAULT_FN_ATTRS512
1167_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1168{
1169 return (__m512d)__builtin_ia32_selectpd_512(__U,
1170 (__v8df)_mm512_min_pd(__A, __B),
1171 (__v8df)__W);
1172}
1173
1174static __inline__ __m512d __DEFAULT_FN_ATTRS512
1175_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1176{
1177 return (__m512d)__builtin_ia32_selectpd_512(__U,
1178 (__v8df)_mm512_min_pd(__A, __B),
1179 (__v8df)_mm512_setzero_pd());
1180}
1181
1182#define _mm512_min_round_ps(A, B, R) \
1183 ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1184 (__v16sf)(__m512)(B), (int)(R)))
1185
1186#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1187 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1188 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1189 (__v16sf)(W)))
1190
1191#define _mm512_maskz_min_round_ps(U, A, B, R) \
1192 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1193 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1194 (__v16sf)_mm512_setzero_ps()))
1195
1196static __inline__ __m512 __DEFAULT_FN_ATTRS512
1197_mm512_min_ps(__m512 __A, __m512 __B)
1198{
1199 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1201}
1202
1203static __inline__ __m512 __DEFAULT_FN_ATTRS512
1204_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1205{
1206 return (__m512)__builtin_ia32_selectps_512(__U,
1207 (__v16sf)_mm512_min_ps(__A, __B),
1208 (__v16sf)__W);
1209}
1210
1211static __inline__ __m512 __DEFAULT_FN_ATTRS512
1212_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1213{
1214 return (__m512)__builtin_ia32_selectps_512(__U,
1215 (__v16sf)_mm512_min_ps(__A, __B),
1216 (__v16sf)_mm512_setzero_ps());
1217}
1218
1219static __inline__ __m128 __DEFAULT_FN_ATTRS128
1220_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1221 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1222 (__v4sf) __B,
1223 (__v4sf) __W,
1224 (__mmask8) __U,
1226}
1227
1228static __inline__ __m128 __DEFAULT_FN_ATTRS128
1229_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1230 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1231 (__v4sf) __B,
1232 (__v4sf) _mm_setzero_ps (),
1233 (__mmask8) __U,
1235}
1236
1237#define _mm_min_round_ss(A, B, R) \
1238 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1239 (__v4sf)(__m128)(B), \
1240 (__v4sf)_mm_setzero_ps(), \
1241 (__mmask8)-1, (int)(R)))
1242
1243#define _mm_mask_min_round_ss(W, U, A, B, R) \
1244 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1245 (__v4sf)(__m128)(B), \
1246 (__v4sf)(__m128)(W), (__mmask8)(U), \
1247 (int)(R)))
1248
1249#define _mm_maskz_min_round_ss(U, A, B, R) \
1250 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1251 (__v4sf)(__m128)(B), \
1252 (__v4sf)_mm_setzero_ps(), \
1253 (__mmask8)(U), (int)(R)))
1254
1255static __inline__ __m128d __DEFAULT_FN_ATTRS128
1256_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1257 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1258 (__v2df) __B,
1259 (__v2df) __W,
1260 (__mmask8) __U,
1262}
1263
1264static __inline__ __m128d __DEFAULT_FN_ATTRS128
1265_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1266 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1267 (__v2df) __B,
1268 (__v2df) _mm_setzero_pd (),
1269 (__mmask8) __U,
1271}
1272
1273#define _mm_min_round_sd(A, B, R) \
1274 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1275 (__v2df)(__m128d)(B), \
1276 (__v2df)_mm_setzero_pd(), \
1277 (__mmask8)-1, (int)(R)))
1278
1279#define _mm_mask_min_round_sd(W, U, A, B, R) \
1280 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1281 (__v2df)(__m128d)(B), \
1282 (__v2df)(__m128d)(W), \
1283 (__mmask8)(U), (int)(R)))
1284
1285#define _mm_maskz_min_round_sd(U, A, B, R) \
1286 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1287 (__v2df)(__m128d)(B), \
1288 (__v2df)_mm_setzero_pd(), \
1289 (__mmask8)(U), (int)(R)))
1290
1291static __inline __m512i
1293 return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1294}
1295
1296static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1297_mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1298 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1299 (__v16si)_mm512_min_epi32(__A, __B),
1300 (__v16si)__W);
1301}
1302
1303static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1304_mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1305 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1306 (__v16si)_mm512_min_epi32(__A, __B),
1307 (__v16si)_mm512_setzero_si512());
1308}
1309
1310static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1311_mm512_min_epu32(__m512i __A, __m512i __B) {
1312 return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1313}
1314
1315static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1316_mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1317 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1318 (__v16si)_mm512_min_epu32(__A, __B),
1319 (__v16si)__W);
1320}
1321
1322static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1323_mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
1324 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1325 (__v16si)_mm512_min_epu32(__A, __B),
1326 (__v16si)_mm512_setzero_si512());
1327}
1328
1329static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1330_mm512_min_epi64(__m512i __A, __m512i __B) {
1331 return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1332}
1333
1334static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1335_mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1336 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1337 (__v8di)_mm512_min_epi64(__A, __B),
1338 (__v8di)__W);
1339}
1340
1341static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1342_mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
1343 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1344 (__v8di)_mm512_min_epi64(__A, __B),
1345 (__v8di)_mm512_setzero_si512());
1346}
1347
1348static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1349_mm512_min_epu64(__m512i __A, __m512i __B) {
1350 return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1351}
1352
1353static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1354_mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1355 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1356 (__v8di)_mm512_min_epu64(__A, __B),
1357 (__v8di)__W);
1358}
1359
1360static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1361_mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
1362 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1363 (__v8di)_mm512_min_epu64(__A, __B),
1364 (__v8di)_mm512_setzero_si512());
1365}
1366
1367static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1368_mm512_mul_epi32(__m512i __X, __m512i __Y) {
1369 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1370}
1371
1372static __inline __m512i __DEFAULT_FN_ATTRS512
1373_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1374{
1375 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1376 (__v8di)_mm512_mul_epi32(__X, __Y),
1377 (__v8di)__W);
1378}
1379
1380static __inline __m512i __DEFAULT_FN_ATTRS512
1381_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1382{
1383 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1384 (__v8di)_mm512_mul_epi32(__X, __Y),
1385 (__v8di)_mm512_setzero_si512 ());
1386}
1387
1388static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1389_mm512_mul_epu32(__m512i __X, __m512i __Y) {
1390 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1391}
1392
1393static __inline __m512i __DEFAULT_FN_ATTRS512
1394_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1395{
1396 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1397 (__v8di)_mm512_mul_epu32(__X, __Y),
1398 (__v8di)__W);
1399}
1400
1401static __inline __m512i __DEFAULT_FN_ATTRS512
1402_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1403{
1404 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1405 (__v8di)_mm512_mul_epu32(__X, __Y),
1406 (__v8di)_mm512_setzero_si512 ());
1407}
1408
1409static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1410_mm512_mullo_epi32(__m512i __A, __m512i __B) {
1411 return (__m512i) ((__v16su) __A * (__v16su) __B);
1412}
1413
1414static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1415_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1416 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1417 (__v16si)_mm512_mullo_epi32(__A, __B),
1418 (__v16si)_mm512_setzero_si512());
1419}
1420
1421static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1422_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1423 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1424 (__v16si)_mm512_mullo_epi32(__A, __B),
1425 (__v16si)__W);
1426}
1427
1428static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1429_mm512_mullox_epi64(__m512i __A, __m512i __B) {
1430 return (__m512i) ((__v8du) __A * (__v8du) __B);
1431}
1432
1433static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1434_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1435 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1436 (__v8di)_mm512_mullox_epi64(__A, __B),
1437 (__v8di)__W);
1438}
1439
1440#define _mm512_sqrt_round_pd(A, R) \
1441 ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1442
1443#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1444 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1445 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1446 (__v8df)(__m512d)(W)))
1447
1448#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1449 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1450 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1451 (__v8df)_mm512_setzero_pd()))
1452
1453static __inline__ __m512d __DEFAULT_FN_ATTRS512
1454_mm512_sqrt_pd(__m512d __A)
1455{
1456 return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1458}
1459
1460static __inline__ __m512d __DEFAULT_FN_ATTRS512
1461_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1462{
1463 return (__m512d)__builtin_ia32_selectpd_512(__U,
1464 (__v8df)_mm512_sqrt_pd(__A),
1465 (__v8df)__W);
1466}
1467
1468static __inline__ __m512d __DEFAULT_FN_ATTRS512
1470{
1471 return (__m512d)__builtin_ia32_selectpd_512(__U,
1472 (__v8df)_mm512_sqrt_pd(__A),
1473 (__v8df)_mm512_setzero_pd());
1474}
1475
1476#define _mm512_sqrt_round_ps(A, R) \
1477 ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1478
1479#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1480 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1481 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1482 (__v16sf)(__m512)(W)))
1483
1484#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1485 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1486 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1487 (__v16sf)_mm512_setzero_ps()))
1488
1489static __inline__ __m512 __DEFAULT_FN_ATTRS512
1491{
1492 return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1494}
1495
1496static __inline__ __m512 __DEFAULT_FN_ATTRS512
1497_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1498{
1499 return (__m512)__builtin_ia32_selectps_512(__U,
1500 (__v16sf)_mm512_sqrt_ps(__A),
1501 (__v16sf)__W);
1502}
1503
1504static __inline__ __m512 __DEFAULT_FN_ATTRS512
1506{
1507 return (__m512)__builtin_ia32_selectps_512(__U,
1508 (__v16sf)_mm512_sqrt_ps(__A),
1509 (__v16sf)_mm512_setzero_ps());
1510}
1511
1512static __inline__ __m512d __DEFAULT_FN_ATTRS512
1514{
1515 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1516 (__v8df)
1518 (__mmask8) -1);}
1519
1520static __inline__ __m512d __DEFAULT_FN_ATTRS512
1521_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1522{
1523 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1524 (__v8df) __W,
1525 (__mmask8) __U);
1526}
1527
1528static __inline__ __m512d __DEFAULT_FN_ATTRS512
1530{
1531 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1532 (__v8df)
1534 (__mmask8) __U);
1535}
1536
1537static __inline__ __m512 __DEFAULT_FN_ATTRS512
1539{
1540 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1541 (__v16sf)
1543 (__mmask16) -1);
1544}
1545
1546static __inline__ __m512 __DEFAULT_FN_ATTRS512
1547_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1548{
1549 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1550 (__v16sf) __W,
1551 (__mmask16) __U);
1552}
1553
1554static __inline__ __m512 __DEFAULT_FN_ATTRS512
1556{
1557 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1558 (__v16sf)
1560 (__mmask16) __U);
1561}
1562
1563static __inline__ __m128 __DEFAULT_FN_ATTRS128
1564_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1565{
1566 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1567 (__v4sf) __B,
1568 (__v4sf)
1569 _mm_setzero_ps (),
1570 (__mmask8) -1);
1571}
1572
1573static __inline__ __m128 __DEFAULT_FN_ATTRS128
1574_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1575{
1576 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1577 (__v4sf) __B,
1578 (__v4sf) __W,
1579 (__mmask8) __U);
1580}
1581
1582static __inline__ __m128 __DEFAULT_FN_ATTRS128
1583_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1584{
1585 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1586 (__v4sf) __B,
1587 (__v4sf) _mm_setzero_ps (),
1588 (__mmask8) __U);
1589}
1590
1591static __inline__ __m128d __DEFAULT_FN_ATTRS128
1592_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1593{
1594 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1595 (__v2df) __B,
1596 (__v2df)
1597 _mm_setzero_pd (),
1598 (__mmask8) -1);
1599}
1600
1601static __inline__ __m128d __DEFAULT_FN_ATTRS128
1602_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1603{
1604 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1605 (__v2df) __B,
1606 (__v2df) __W,
1607 (__mmask8) __U);
1608}
1609
1610static __inline__ __m128d __DEFAULT_FN_ATTRS128
1611_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1612{
1613 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1614 (__v2df) __B,
1615 (__v2df) _mm_setzero_pd (),
1616 (__mmask8) __U);
1617}
1618
1619static __inline__ __m512d __DEFAULT_FN_ATTRS512
1621{
1622 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1623 (__v8df)
1625 (__mmask8) -1);
1626}
1627
1628static __inline__ __m512d __DEFAULT_FN_ATTRS512
1629_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1630{
1631 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1632 (__v8df) __W,
1633 (__mmask8) __U);
1634}
1635
1636static __inline__ __m512d __DEFAULT_FN_ATTRS512
1638{
1639 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1640 (__v8df)
1642 (__mmask8) __U);
1643}
1644
1645static __inline__ __m512 __DEFAULT_FN_ATTRS512
1647{
1648 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1649 (__v16sf)
1651 (__mmask16) -1);
1652}
1653
1654static __inline__ __m512 __DEFAULT_FN_ATTRS512
1655_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1656{
1657 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1658 (__v16sf) __W,
1659 (__mmask16) __U);
1660}
1661
1662static __inline__ __m512 __DEFAULT_FN_ATTRS512
1664{
1665 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1666 (__v16sf)
1668 (__mmask16) __U);
1669}
1670
1671static __inline__ __m128 __DEFAULT_FN_ATTRS128
1672_mm_rcp14_ss(__m128 __A, __m128 __B)
1673{
1674 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1675 (__v4sf) __B,
1676 (__v4sf)
1677 _mm_setzero_ps (),
1678 (__mmask8) -1);
1679}
1680
1681static __inline__ __m128 __DEFAULT_FN_ATTRS128
1682_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1683{
1684 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1685 (__v4sf) __B,
1686 (__v4sf) __W,
1687 (__mmask8) __U);
1688}
1689
1690static __inline__ __m128 __DEFAULT_FN_ATTRS128
1691_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1692{
1693 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1694 (__v4sf) __B,
1695 (__v4sf) _mm_setzero_ps (),
1696 (__mmask8) __U);
1697}
1698
1699static __inline__ __m128d __DEFAULT_FN_ATTRS128
1700_mm_rcp14_sd(__m128d __A, __m128d __B)
1701{
1702 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1703 (__v2df) __B,
1704 (__v2df)
1705 _mm_setzero_pd (),
1706 (__mmask8) -1);
1707}
1708
1709static __inline__ __m128d __DEFAULT_FN_ATTRS128
1710_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1711{
1712 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1713 (__v2df) __B,
1714 (__v2df) __W,
1715 (__mmask8) __U);
1716}
1717
1718static __inline__ __m128d __DEFAULT_FN_ATTRS128
1719_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1720{
1721 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1722 (__v2df) __B,
1723 (__v2df) _mm_setzero_pd (),
1724 (__mmask8) __U);
1725}
1726
1727static __inline __m512 __DEFAULT_FN_ATTRS512
1729{
1730 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1732 (__v16sf) __A, (unsigned short)-1,
1734}
1735
1736static __inline__ __m512 __DEFAULT_FN_ATTRS512
1737_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1738{
1739 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1741 (__v16sf) __W, __U,
1743}
1744
1745static __inline __m512d __DEFAULT_FN_ATTRS512
1747{
1748 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1750 (__v8df) __A, (unsigned char)-1,
1752}
1753
1754static __inline__ __m512d __DEFAULT_FN_ATTRS512
1755_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1756{
1757 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1759 (__v8df) __W, __U,
1761}
1762
1763static __inline__ __m512 __DEFAULT_FN_ATTRS512
1764_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1765{
1766 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1768 (__v16sf) __W, __U,
1770}
1771
1772static __inline __m512 __DEFAULT_FN_ATTRS512
1774{
1775 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1777 (__v16sf) __A, (unsigned short)-1,
1779}
1780
1781static __inline __m512d __DEFAULT_FN_ATTRS512
1782_mm512_ceil_pd(__m512d __A)
1783{
1784 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1786 (__v8df) __A, (unsigned char)-1,
1788}
1789
1790static __inline__ __m512d __DEFAULT_FN_ATTRS512
1791_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1792{
1793 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1795 (__v8df) __W, __U,
1797}
1798
1799static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1800_mm512_abs_epi64(__m512i __A) {
1801 return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1802}
1803
1804static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1805_mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
1806 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1807 (__v8di)_mm512_abs_epi64(__A),
1808 (__v8di)__W);
1809}
1810
1811static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1813 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1814 (__v8di)_mm512_abs_epi64(__A),
1815 (__v8di)_mm512_setzero_si512());
1816}
1817
1818static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1819_mm512_abs_epi32(__m512i __A) {
1820 return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1821}
1822
1823static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1824_mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
1825 return (__m512i)__builtin_ia32_selectd_512(__U,
1826 (__v16si)_mm512_abs_epi32(__A),
1827 (__v16si)__W);
1828}
1829
1830static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1832 return (__m512i)__builtin_ia32_selectd_512(__U,
1833 (__v16si)_mm512_abs_epi32(__A),
1834 (__v16si)_mm512_setzero_si512());
1835}
1836
1837static __inline__ __m128 __DEFAULT_FN_ATTRS128
1838_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1839 __A = _mm_add_ss(__A, __B);
1840 return __builtin_ia32_selectss_128(__U, __A, __W);
1841}
1842
1843static __inline__ __m128 __DEFAULT_FN_ATTRS128
1844_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1845 __A = _mm_add_ss(__A, __B);
1846 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1847}
1848
1849#define _mm_add_round_ss(A, B, R) \
1850 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1851 (__v4sf)(__m128)(B), \
1852 (__v4sf)_mm_setzero_ps(), \
1853 (__mmask8)-1, (int)(R)))
1854
1855#define _mm_mask_add_round_ss(W, U, A, B, R) \
1856 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1857 (__v4sf)(__m128)(B), \
1858 (__v4sf)(__m128)(W), (__mmask8)(U), \
1859 (int)(R)))
1860
1861#define _mm_maskz_add_round_ss(U, A, B, R) \
1862 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1863 (__v4sf)(__m128)(B), \
1864 (__v4sf)_mm_setzero_ps(), \
1865 (__mmask8)(U), (int)(R)))
1866
1867static __inline__ __m128d __DEFAULT_FN_ATTRS128
1868_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1869 __A = _mm_add_sd(__A, __B);
1870 return __builtin_ia32_selectsd_128(__U, __A, __W);
1871}
1872
1873static __inline__ __m128d __DEFAULT_FN_ATTRS128
1874_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1875 __A = _mm_add_sd(__A, __B);
1876 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1877}
1878#define _mm_add_round_sd(A, B, R) \
1879 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1880 (__v2df)(__m128d)(B), \
1881 (__v2df)_mm_setzero_pd(), \
1882 (__mmask8)-1, (int)(R)))
1883
1884#define _mm_mask_add_round_sd(W, U, A, B, R) \
1885 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1886 (__v2df)(__m128d)(B), \
1887 (__v2df)(__m128d)(W), \
1888 (__mmask8)(U), (int)(R)))
1889
1890#define _mm_maskz_add_round_sd(U, A, B, R) \
1891 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1892 (__v2df)(__m128d)(B), \
1893 (__v2df)_mm_setzero_pd(), \
1894 (__mmask8)(U), (int)(R)))
1895
1896static __inline__ __m512d __DEFAULT_FN_ATTRS512
1897_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1898 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1899 (__v8df)_mm512_add_pd(__A, __B),
1900 (__v8df)__W);
1901}
1902
1903static __inline__ __m512d __DEFAULT_FN_ATTRS512
1904_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1905 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1906 (__v8df)_mm512_add_pd(__A, __B),
1907 (__v8df)_mm512_setzero_pd());
1908}
1909
1910static __inline__ __m512 __DEFAULT_FN_ATTRS512
1911_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1912 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1913 (__v16sf)_mm512_add_ps(__A, __B),
1914 (__v16sf)__W);
1915}
1916
1917static __inline__ __m512 __DEFAULT_FN_ATTRS512
1918_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1919 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1920 (__v16sf)_mm512_add_ps(__A, __B),
1921 (__v16sf)_mm512_setzero_ps());
1922}
1923
1924#define _mm512_add_round_pd(A, B, R) \
1925 ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1926 (__v8df)(__m512d)(B), (int)(R)))
1927
1928#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1929 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1930 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1931 (__v8df)(__m512d)(W)))
1932
1933#define _mm512_maskz_add_round_pd(U, A, B, R) \
1934 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1935 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1936 (__v8df)_mm512_setzero_pd()))
1937
1938#define _mm512_add_round_ps(A, B, R) \
1939 ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1940 (__v16sf)(__m512)(B), (int)(R)))
1941
1942#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1943 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1944 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1945 (__v16sf)(__m512)(W)))
1946
1947#define _mm512_maskz_add_round_ps(U, A, B, R) \
1948 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1949 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1950 (__v16sf)_mm512_setzero_ps()))
1951
1952static __inline__ __m128 __DEFAULT_FN_ATTRS128
1953_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1954 __A = _mm_sub_ss(__A, __B);
1955 return __builtin_ia32_selectss_128(__U, __A, __W);
1956}
1957
1958static __inline__ __m128 __DEFAULT_FN_ATTRS128
1959_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1960 __A = _mm_sub_ss(__A, __B);
1961 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1962}
1963#define _mm_sub_round_ss(A, B, R) \
1964 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1965 (__v4sf)(__m128)(B), \
1966 (__v4sf)_mm_setzero_ps(), \
1967 (__mmask8)-1, (int)(R)))
1968
1969#define _mm_mask_sub_round_ss(W, U, A, B, R) \
1970 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1971 (__v4sf)(__m128)(B), \
1972 (__v4sf)(__m128)(W), (__mmask8)(U), \
1973 (int)(R)))
1974
1975#define _mm_maskz_sub_round_ss(U, A, B, R) \
1976 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1977 (__v4sf)(__m128)(B), \
1978 (__v4sf)_mm_setzero_ps(), \
1979 (__mmask8)(U), (int)(R)))
1980
1981static __inline__ __m128d __DEFAULT_FN_ATTRS128
1982_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1983 __A = _mm_sub_sd(__A, __B);
1984 return __builtin_ia32_selectsd_128(__U, __A, __W);
1985}
1986
1987static __inline__ __m128d __DEFAULT_FN_ATTRS128
1988_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1989 __A = _mm_sub_sd(__A, __B);
1990 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1991}
1992
1993#define _mm_sub_round_sd(A, B, R) \
1994 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1995 (__v2df)(__m128d)(B), \
1996 (__v2df)_mm_setzero_pd(), \
1997 (__mmask8)-1, (int)(R)))
1998
1999#define _mm_mask_sub_round_sd(W, U, A, B, R) \
2000 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2001 (__v2df)(__m128d)(B), \
2002 (__v2df)(__m128d)(W), \
2003 (__mmask8)(U), (int)(R)))
2004
2005#define _mm_maskz_sub_round_sd(U, A, B, R) \
2006 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2007 (__v2df)(__m128d)(B), \
2008 (__v2df)_mm_setzero_pd(), \
2009 (__mmask8)(U), (int)(R)))
2010
2011static __inline__ __m512d __DEFAULT_FN_ATTRS512
2012_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2013 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2014 (__v8df)_mm512_sub_pd(__A, __B),
2015 (__v8df)__W);
2016}
2017
2018static __inline__ __m512d __DEFAULT_FN_ATTRS512
2019_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2020 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2021 (__v8df)_mm512_sub_pd(__A, __B),
2022 (__v8df)_mm512_setzero_pd());
2023}
2024
2025static __inline__ __m512 __DEFAULT_FN_ATTRS512
2026_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2027 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2028 (__v16sf)_mm512_sub_ps(__A, __B),
2029 (__v16sf)__W);
2030}
2031
2032static __inline__ __m512 __DEFAULT_FN_ATTRS512
2033_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2034 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2035 (__v16sf)_mm512_sub_ps(__A, __B),
2036 (__v16sf)_mm512_setzero_ps());
2037}
2038
2039#define _mm512_sub_round_pd(A, B, R) \
2040 ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2041 (__v8df)(__m512d)(B), (int)(R)))
2042
2043#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2044 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2045 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2046 (__v8df)(__m512d)(W)))
2047
2048#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2049 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2050 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2051 (__v8df)_mm512_setzero_pd()))
2052
2053#define _mm512_sub_round_ps(A, B, R) \
2054 ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2055 (__v16sf)(__m512)(B), (int)(R)))
2056
2057#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2058 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2059 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2060 (__v16sf)(__m512)(W)))
2061
2062#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2063 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2064 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2065 (__v16sf)_mm512_setzero_ps()))
2066
2067static __inline__ __m128 __DEFAULT_FN_ATTRS128
2068_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2069 __A = _mm_mul_ss(__A, __B);
2070 return __builtin_ia32_selectss_128(__U, __A, __W);
2071}
2072
2073static __inline__ __m128 __DEFAULT_FN_ATTRS128
2074_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2075 __A = _mm_mul_ss(__A, __B);
2076 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2077}
2078#define _mm_mul_round_ss(A, B, R) \
2079 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2080 (__v4sf)(__m128)(B), \
2081 (__v4sf)_mm_setzero_ps(), \
2082 (__mmask8)-1, (int)(R)))
2083
2084#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2085 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2086 (__v4sf)(__m128)(B), \
2087 (__v4sf)(__m128)(W), (__mmask8)(U), \
2088 (int)(R)))
2089
2090#define _mm_maskz_mul_round_ss(U, A, B, R) \
2091 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2092 (__v4sf)(__m128)(B), \
2093 (__v4sf)_mm_setzero_ps(), \
2094 (__mmask8)(U), (int)(R)))
2095
2096static __inline__ __m128d __DEFAULT_FN_ATTRS128
2097_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2098 __A = _mm_mul_sd(__A, __B);
2099 return __builtin_ia32_selectsd_128(__U, __A, __W);
2100}
2101
2102static __inline__ __m128d __DEFAULT_FN_ATTRS128
2103_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2104 __A = _mm_mul_sd(__A, __B);
2105 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2106}
2107
2108#define _mm_mul_round_sd(A, B, R) \
2109 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2110 (__v2df)(__m128d)(B), \
2111 (__v2df)_mm_setzero_pd(), \
2112 (__mmask8)-1, (int)(R)))
2113
2114#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2115 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2116 (__v2df)(__m128d)(B), \
2117 (__v2df)(__m128d)(W), \
2118 (__mmask8)(U), (int)(R)))
2119
2120#define _mm_maskz_mul_round_sd(U, A, B, R) \
2121 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2122 (__v2df)(__m128d)(B), \
2123 (__v2df)_mm_setzero_pd(), \
2124 (__mmask8)(U), (int)(R)))
2125
2126static __inline__ __m512d __DEFAULT_FN_ATTRS512
2127_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2128 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2129 (__v8df)_mm512_mul_pd(__A, __B),
2130 (__v8df)__W);
2131}
2132
2133static __inline__ __m512d __DEFAULT_FN_ATTRS512
2134_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2135 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2136 (__v8df)_mm512_mul_pd(__A, __B),
2137 (__v8df)_mm512_setzero_pd());
2138}
2139
2140static __inline__ __m512 __DEFAULT_FN_ATTRS512
2141_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2142 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2143 (__v16sf)_mm512_mul_ps(__A, __B),
2144 (__v16sf)__W);
2145}
2146
2147static __inline__ __m512 __DEFAULT_FN_ATTRS512
2148_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2149 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2150 (__v16sf)_mm512_mul_ps(__A, __B),
2151 (__v16sf)_mm512_setzero_ps());
2152}
2153
2154#define _mm512_mul_round_pd(A, B, R) \
2155 ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2156 (__v8df)(__m512d)(B), (int)(R)))
2157
2158#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2159 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2160 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2161 (__v8df)(__m512d)(W)))
2162
2163#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2164 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2165 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2166 (__v8df)_mm512_setzero_pd()))
2167
2168#define _mm512_mul_round_ps(A, B, R) \
2169 ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2170 (__v16sf)(__m512)(B), (int)(R)))
2171
2172#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2173 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2174 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2175 (__v16sf)(__m512)(W)))
2176
2177#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2178 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2179 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2180 (__v16sf)_mm512_setzero_ps()))
2181
2182static __inline__ __m128 __DEFAULT_FN_ATTRS128
2183_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2184 __A = _mm_div_ss(__A, __B);
2185 return __builtin_ia32_selectss_128(__U, __A, __W);
2186}
2187
2188static __inline__ __m128 __DEFAULT_FN_ATTRS128
2189_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2190 __A = _mm_div_ss(__A, __B);
2191 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2192}
2193
2194#define _mm_div_round_ss(A, B, R) \
2195 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2196 (__v4sf)(__m128)(B), \
2197 (__v4sf)_mm_setzero_ps(), \
2198 (__mmask8)-1, (int)(R)))
2199
2200#define _mm_mask_div_round_ss(W, U, A, B, R) \
2201 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2202 (__v4sf)(__m128)(B), \
2203 (__v4sf)(__m128)(W), (__mmask8)(U), \
2204 (int)(R)))
2205
2206#define _mm_maskz_div_round_ss(U, A, B, R) \
2207 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2208 (__v4sf)(__m128)(B), \
2209 (__v4sf)_mm_setzero_ps(), \
2210 (__mmask8)(U), (int)(R)))
2211
2212static __inline__ __m128d __DEFAULT_FN_ATTRS128
2213_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2214 __A = _mm_div_sd(__A, __B);
2215 return __builtin_ia32_selectsd_128(__U, __A, __W);
2216}
2217
2218static __inline__ __m128d __DEFAULT_FN_ATTRS128
2219_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2220 __A = _mm_div_sd(__A, __B);
2221 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2222}
2223
2224#define _mm_div_round_sd(A, B, R) \
2225 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2226 (__v2df)(__m128d)(B), \
2227 (__v2df)_mm_setzero_pd(), \
2228 (__mmask8)-1, (int)(R)))
2229
2230#define _mm_mask_div_round_sd(W, U, A, B, R) \
2231 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2232 (__v2df)(__m128d)(B), \
2233 (__v2df)(__m128d)(W), \
2234 (__mmask8)(U), (int)(R)))
2235
2236#define _mm_maskz_div_round_sd(U, A, B, R) \
2237 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2238 (__v2df)(__m128d)(B), \
2239 (__v2df)_mm_setzero_pd(), \
2240 (__mmask8)(U), (int)(R)))
2241
2242static __inline __m512d
2244 return (__m512d)((__v8df)__a/(__v8df)__b);
2245}
2246
2247static __inline__ __m512d __DEFAULT_FN_ATTRS512
2248_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2249 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2250 (__v8df)_mm512_div_pd(__A, __B),
2251 (__v8df)__W);
2252}
2253
2254static __inline__ __m512d __DEFAULT_FN_ATTRS512
2255_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2256 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2257 (__v8df)_mm512_div_pd(__A, __B),
2258 (__v8df)_mm512_setzero_pd());
2259}
2260
2261static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2262_mm512_div_ps(__m512 __a, __m512 __b) {
2263 return (__m512)((__v16sf)__a/(__v16sf)__b);
2264}
2265
2266static __inline__ __m512 __DEFAULT_FN_ATTRS512
2267_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2268 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2269 (__v16sf)_mm512_div_ps(__A, __B),
2270 (__v16sf)__W);
2271}
2272
2273static __inline__ __m512 __DEFAULT_FN_ATTRS512
2274_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2275 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2276 (__v16sf)_mm512_div_ps(__A, __B),
2277 (__v16sf)_mm512_setzero_ps());
2278}
2279
2280#define _mm512_div_round_pd(A, B, R) \
2281 ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2282 (__v8df)(__m512d)(B), (int)(R)))
2283
2284#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2285 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2286 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2287 (__v8df)(__m512d)(W)))
2288
2289#define _mm512_maskz_div_round_pd(U, A, B, R) \
2290 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2291 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2292 (__v8df)_mm512_setzero_pd()))
2293
2294#define _mm512_div_round_ps(A, B, R) \
2295 ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2296 (__v16sf)(__m512)(B), (int)(R)))
2297
2298#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2299 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2300 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2301 (__v16sf)(__m512)(W)))
2302
2303#define _mm512_maskz_div_round_ps(U, A, B, R) \
2304 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2305 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2306 (__v16sf)_mm512_setzero_ps()))
2307
2308#define _mm512_roundscale_ps(A, B) \
2309 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2310 (__v16sf)_mm512_undefined_ps(), \
2311 (__mmask16)-1, \
2312 _MM_FROUND_CUR_DIRECTION))
2313
2314#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2315 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2316 (__v16sf)(__m512)(A), (__mmask16)(B), \
2317 _MM_FROUND_CUR_DIRECTION))
2318
2319#define _mm512_maskz_roundscale_ps(A, B, imm) \
2320 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2321 (__v16sf)_mm512_setzero_ps(), \
2322 (__mmask16)(A), \
2323 _MM_FROUND_CUR_DIRECTION))
2324
2325#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2326 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2327 (__v16sf)(__m512)(A), (__mmask16)(B), \
2328 (int)(R)))
2329
2330#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2331 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2332 (__v16sf)_mm512_setzero_ps(), \
2333 (__mmask16)(A), (int)(R)))
2334
2335#define _mm512_roundscale_round_ps(A, imm, R) \
2336 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2337 (__v16sf)_mm512_undefined_ps(), \
2338 (__mmask16)-1, (int)(R)))
2339
2340#define _mm512_roundscale_pd(A, B) \
2341 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2342 (__v8df)_mm512_undefined_pd(), \
2343 (__mmask8)-1, \
2344 _MM_FROUND_CUR_DIRECTION))
2345
2346#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2347 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2348 (__v8df)(__m512d)(A), (__mmask8)(B), \
2349 _MM_FROUND_CUR_DIRECTION))
2350
2351#define _mm512_maskz_roundscale_pd(A, B, imm) \
2352 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2353 (__v8df)_mm512_setzero_pd(), \
2354 (__mmask8)(A), \
2355 _MM_FROUND_CUR_DIRECTION))
2356
2357#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2358 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2359 (__v8df)(__m512d)(A), (__mmask8)(B), \
2360 (int)(R)))
2361
2362#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2363 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2364 (__v8df)_mm512_setzero_pd(), \
2365 (__mmask8)(A), (int)(R)))
2366
2367#define _mm512_roundscale_round_pd(A, imm, R) \
2368 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2369 (__v8df)_mm512_undefined_pd(), \
2370 (__mmask8)-1, (int)(R)))
2371
2372#define _mm512_fmadd_round_pd(A, B, C, R) \
2373 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2374 (__v8df)(__m512d)(B), \
2375 (__v8df)(__m512d)(C), \
2376 (__mmask8)-1, (int)(R)))
2377
2378
2379#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2380 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2381 (__v8df)(__m512d)(B), \
2382 (__v8df)(__m512d)(C), \
2383 (__mmask8)(U), (int)(R)))
2384
2385
2386#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2387 ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2388 (__v8df)(__m512d)(B), \
2389 (__v8df)(__m512d)(C), \
2390 (__mmask8)(U), (int)(R)))
2391
2392
2393#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2394 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2395 (__v8df)(__m512d)(B), \
2396 (__v8df)(__m512d)(C), \
2397 (__mmask8)(U), (int)(R)))
2398
2399
2400#define _mm512_fmsub_round_pd(A, B, C, R) \
2401 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2402 (__v8df)(__m512d)(B), \
2403 -(__v8df)(__m512d)(C), \
2404 (__mmask8)-1, (int)(R)))
2405
2406
2407#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2408 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2409 (__v8df)(__m512d)(B), \
2410 -(__v8df)(__m512d)(C), \
2411 (__mmask8)(U), (int)(R)))
2412
2413
2414#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2415 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2416 (__v8df)(__m512d)(B), \
2417 -(__v8df)(__m512d)(C), \
2418 (__mmask8)(U), (int)(R)))
2419
2420
2421#define _mm512_fnmadd_round_pd(A, B, C, R) \
2422 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2423 (__v8df)(__m512d)(B), \
2424 (__v8df)(__m512d)(C), \
2425 (__mmask8)-1, (int)(R)))
2426
2427
2428#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2429 ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2430 (__v8df)(__m512d)(B), \
2431 (__v8df)(__m512d)(C), \
2432 (__mmask8)(U), (int)(R)))
2433
2434
2435#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2436 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2437 (__v8df)(__m512d)(B), \
2438 (__v8df)(__m512d)(C), \
2439 (__mmask8)(U), (int)(R)))
2440
2441
2442#define _mm512_fnmsub_round_pd(A, B, C, R) \
2443 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2444 (__v8df)(__m512d)(B), \
2445 -(__v8df)(__m512d)(C), \
2446 (__mmask8)-1, (int)(R)))
2447
2448
2449#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2450 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2451 (__v8df)(__m512d)(B), \
2452 -(__v8df)(__m512d)(C), \
2453 (__mmask8)(U), (int)(R)))
2454
2455static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2456_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
2457 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2458 (__v8df)__C);
2459}
2460
2461static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2462_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2463 return (__m512d)__builtin_ia32_selectpd_512(
2464 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__A);
2465}
2466
2467static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2468_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2469 return (__m512d)__builtin_ia32_selectpd_512(
2470 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__C);
2471}
2472
2473static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2474_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2475 return (__m512d)__builtin_ia32_selectpd_512(
2476 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C),
2477 (__v8df)_mm512_setzero_pd());
2478}
2479
2480static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2481_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
2482 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2483 -(__v8df)__C);
2484}
2485
2486static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2487_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2488 return (__m512d)__builtin_ia32_selectpd_512(
2489 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__A);
2490}
2491
2492static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2493_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2494 return (__m512d)__builtin_ia32_selectpd_512(
2495 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__C);
2496}
2497
2498static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2499_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2500 return (__m512d)__builtin_ia32_selectpd_512(
2501 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C),
2502 (__v8df)_mm512_setzero_pd());
2503}
2504
2505static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2506_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
2507 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2508 (__v8df)__C);
2509}
2510
2511static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2512_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2513 return (__m512d)__builtin_ia32_selectpd_512(
2514 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__A);
2515}
2516
2517static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2518_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2519 return (__m512d)__builtin_ia32_selectpd_512(
2520 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__C);
2521}
2522
2523static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2524_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2525 return (__m512d)__builtin_ia32_selectpd_512(
2526 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C),
2527 (__v8df)_mm512_setzero_pd());
2528}
2529
2530static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2531_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
2532 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2533 -(__v8df)__C);
2534}
2535
2536static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2537_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2538 return (__m512d)__builtin_ia32_selectpd_512(
2539 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__A);
2540}
2541
2542static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2543_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2544 return (__m512d)__builtin_ia32_selectpd_512(
2545 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__C);
2546}
2547
2548static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2549_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2550 return (__m512d)__builtin_ia32_selectpd_512(
2551 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C),
2552 (__v8df)_mm512_setzero_pd());
2553}
2554
2555#define _mm512_fmadd_round_ps(A, B, C, R) \
2556 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2557 (__v16sf)(__m512)(B), \
2558 (__v16sf)(__m512)(C), \
2559 (__mmask16)-1, (int)(R)))
2560
2561
2562#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2563 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2564 (__v16sf)(__m512)(B), \
2565 (__v16sf)(__m512)(C), \
2566 (__mmask16)(U), (int)(R)))
2567
2568
2569#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2570 ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2571 (__v16sf)(__m512)(B), \
2572 (__v16sf)(__m512)(C), \
2573 (__mmask16)(U), (int)(R)))
2574
2575
2576#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2577 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2578 (__v16sf)(__m512)(B), \
2579 (__v16sf)(__m512)(C), \
2580 (__mmask16)(U), (int)(R)))
2581
2582
2583#define _mm512_fmsub_round_ps(A, B, C, R) \
2584 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2585 (__v16sf)(__m512)(B), \
2586 -(__v16sf)(__m512)(C), \
2587 (__mmask16)-1, (int)(R)))
2588
2589
2590#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2591 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2592 (__v16sf)(__m512)(B), \
2593 -(__v16sf)(__m512)(C), \
2594 (__mmask16)(U), (int)(R)))
2595
2596
2597#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2598 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2599 (__v16sf)(__m512)(B), \
2600 -(__v16sf)(__m512)(C), \
2601 (__mmask16)(U), (int)(R)))
2602
2603
2604#define _mm512_fnmadd_round_ps(A, B, C, R) \
2605 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2606 -(__v16sf)(__m512)(B), \
2607 (__v16sf)(__m512)(C), \
2608 (__mmask16)-1, (int)(R)))
2609
2610
2611#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2612 ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2613 (__v16sf)(__m512)(B), \
2614 (__v16sf)(__m512)(C), \
2615 (__mmask16)(U), (int)(R)))
2616
2617
2618#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2619 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2620 (__v16sf)(__m512)(B), \
2621 (__v16sf)(__m512)(C), \
2622 (__mmask16)(U), (int)(R)))
2623
2624
2625#define _mm512_fnmsub_round_ps(A, B, C, R) \
2626 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2627 -(__v16sf)(__m512)(B), \
2628 -(__v16sf)(__m512)(C), \
2629 (__mmask16)-1, (int)(R)))
2630
2631
2632#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2633 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2634 (__v16sf)(__m512)(B), \
2635 -(__v16sf)(__m512)(C), \
2636 (__mmask16)(U), (int)(R)))
2637
2638static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2639_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
2640 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2641 (__v16sf)__C);
2642}
2643
2644static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2645_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2646 return (__m512)__builtin_ia32_selectps_512(
2647 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__A);
2648}
2649
2650static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2651_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2652 return (__m512)__builtin_ia32_selectps_512(
2653 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__C);
2654}
2655
2656static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2657_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2658 return (__m512)__builtin_ia32_selectps_512(
2659 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C),
2660 (__v16sf)_mm512_setzero_ps());
2661}
2662
2663static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2664_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
2665 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2666 -(__v16sf)__C);
2667}
2668
2669static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2670_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2671 return (__m512)__builtin_ia32_selectps_512(
2672 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__A);
2673}
2674
2675static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2676_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2677 return (__m512)__builtin_ia32_selectps_512(
2678 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__C);
2679}
2680
2681static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2682_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2683 return (__m512)__builtin_ia32_selectps_512(
2684 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C),
2685 (__v16sf)_mm512_setzero_ps());
2686}
2687
2688static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2689_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
2690 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2691 (__v16sf)__C);
2692}
2693
2694static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2695_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2696 return (__m512)__builtin_ia32_selectps_512(
2697 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__A);
2698}
2699
2700static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2701_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2702 return (__m512)__builtin_ia32_selectps_512(
2703 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__C);
2704}
2705
2706static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2707_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2708 return (__m512)__builtin_ia32_selectps_512(
2709 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C),
2710 (__v16sf)_mm512_setzero_ps());
2711}
2712
2713static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2714_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
2715 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2716 -(__v16sf)__C);
2717}
2718
2719static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2720_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2721 return (__m512)__builtin_ia32_selectps_512(
2722 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__A);
2723}
2724
2725static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2726_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2727 return (__m512)__builtin_ia32_selectps_512(
2728 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__C);
2729}
2730
2731static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2732_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2733 return (__m512)__builtin_ia32_selectps_512(
2734 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C),
2735 (__v16sf)_mm512_setzero_ps());
2736}
2737
2738#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2739 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2740 (__v8df)(__m512d)(B), \
2741 (__v8df)(__m512d)(C), \
2742 (__mmask8)-1, (int)(R)))
2743
2744
2745#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2746 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2747 (__v8df)(__m512d)(B), \
2748 (__v8df)(__m512d)(C), \
2749 (__mmask8)(U), (int)(R)))
2750
2751
2752#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2753 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2754 (__v8df)(__m512d)(B), \
2755 (__v8df)(__m512d)(C), \
2756 (__mmask8)(U), (int)(R)))
2757
2758
2759#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2760 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2761 (__v8df)(__m512d)(B), \
2762 (__v8df)(__m512d)(C), \
2763 (__mmask8)(U), (int)(R)))
2764
2765
2766#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2767 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2768 (__v8df)(__m512d)(B), \
2769 -(__v8df)(__m512d)(C), \
2770 (__mmask8)-1, (int)(R)))
2771
2772
2773#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2774 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2775 (__v8df)(__m512d)(B), \
2776 -(__v8df)(__m512d)(C), \
2777 (__mmask8)(U), (int)(R)))
2778
2779
2780#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2781 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2782 (__v8df)(__m512d)(B), \
2783 -(__v8df)(__m512d)(C), \
2784 (__mmask8)(U), (int)(R)))
2785
2786
2787static __inline__ __m512d __DEFAULT_FN_ATTRS512
2788_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2789{
2790 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2791 (__v8df) __B,
2792 (__v8df) __C,
2793 (__mmask8) -1,
2795}
2796
2797static __inline__ __m512d __DEFAULT_FN_ATTRS512
2798_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2799{
2800 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2801 (__v8df) __B,
2802 (__v8df) __C,
2803 (__mmask8) __U,
2805}
2806
2807static __inline__ __m512d __DEFAULT_FN_ATTRS512
2808_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2809{
2810 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2811 (__v8df) __B,
2812 (__v8df) __C,
2813 (__mmask8) __U,
2815}
2816
2817static __inline__ __m512d __DEFAULT_FN_ATTRS512
2818_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2819{
2820 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2821 (__v8df) __B,
2822 (__v8df) __C,
2823 (__mmask8) __U,
2825}
2826
2827static __inline__ __m512d __DEFAULT_FN_ATTRS512
2828_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2829{
2830 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2831 (__v8df) __B,
2832 -(__v8df) __C,
2833 (__mmask8) -1,
2835}
2836
2837static __inline__ __m512d __DEFAULT_FN_ATTRS512
2838_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2839{
2840 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2841 (__v8df) __B,
2842 -(__v8df) __C,
2843 (__mmask8) __U,
2845}
2846
2847static __inline__ __m512d __DEFAULT_FN_ATTRS512
2848_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2849{
2850 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2851 (__v8df) __B,
2852 -(__v8df) __C,
2853 (__mmask8) __U,
2855}
2856
2857#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2858 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2859 (__v16sf)(__m512)(B), \
2860 (__v16sf)(__m512)(C), \
2861 (__mmask16)-1, (int)(R)))
2862
2863
2864#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2865 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2866 (__v16sf)(__m512)(B), \
2867 (__v16sf)(__m512)(C), \
2868 (__mmask16)(U), (int)(R)))
2869
2870
2871#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2872 ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2873 (__v16sf)(__m512)(B), \
2874 (__v16sf)(__m512)(C), \
2875 (__mmask16)(U), (int)(R)))
2876
2877
2878#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2879 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2880 (__v16sf)(__m512)(B), \
2881 (__v16sf)(__m512)(C), \
2882 (__mmask16)(U), (int)(R)))
2883
2884
2885#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2886 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2887 (__v16sf)(__m512)(B), \
2888 -(__v16sf)(__m512)(C), \
2889 (__mmask16)-1, (int)(R)))
2890
2891
2892#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2893 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2894 (__v16sf)(__m512)(B), \
2895 -(__v16sf)(__m512)(C), \
2896 (__mmask16)(U), (int)(R)))
2897
2898
2899#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2900 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2901 (__v16sf)(__m512)(B), \
2902 -(__v16sf)(__m512)(C), \
2903 (__mmask16)(U), (int)(R)))
2904
2905
2906static __inline__ __m512 __DEFAULT_FN_ATTRS512
2907_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
2908{
2909 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2910 (__v16sf) __B,
2911 (__v16sf) __C,
2912 (__mmask16) -1,
2914}
2915
2916static __inline__ __m512 __DEFAULT_FN_ATTRS512
2917_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2918{
2919 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2920 (__v16sf) __B,
2921 (__v16sf) __C,
2922 (__mmask16) __U,
2924}
2925
2926static __inline__ __m512 __DEFAULT_FN_ATTRS512
2927_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2928{
2929 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2930 (__v16sf) __B,
2931 (__v16sf) __C,
2932 (__mmask16) __U,
2934}
2935
2936static __inline__ __m512 __DEFAULT_FN_ATTRS512
2937_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2938{
2939 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2940 (__v16sf) __B,
2941 (__v16sf) __C,
2942 (__mmask16) __U,
2944}
2945
2946static __inline__ __m512 __DEFAULT_FN_ATTRS512
2947_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
2948{
2949 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2950 (__v16sf) __B,
2951 -(__v16sf) __C,
2952 (__mmask16) -1,
2954}
2955
2956static __inline__ __m512 __DEFAULT_FN_ATTRS512
2957_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2958{
2959 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2960 (__v16sf) __B,
2961 -(__v16sf) __C,
2962 (__mmask16) __U,
2964}
2965
2966static __inline__ __m512 __DEFAULT_FN_ATTRS512
2967_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2968{
2969 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2970 (__v16sf) __B,
2971 -(__v16sf) __C,
2972 (__mmask16) __U,
2974}
2975
2976#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
2977 ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
2978 (__v8df)(__m512d)(B), \
2979 (__v8df)(__m512d)(C), \
2980 (__mmask8)(U), (int)(R)))
2981
2982#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
2983 ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
2984 (__v16sf)(__m512)(B), \
2985 (__v16sf)(__m512)(C), \
2986 (__mmask16)(U), (int)(R)))
2987
2988#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
2989 ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
2990 (__v8df)(__m512d)(B), \
2991 (__v8df)(__m512d)(C), \
2992 (__mmask8)(U), (int)(R)))
2993
2994
2995static __inline__ __m512d __DEFAULT_FN_ATTRS512
2996_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2997{
2998 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2999 (__v8df) __B,
3000 (__v8df) __C,
3001 (__mmask8) __U,
3003}
3004
3005#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3006 ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3007 (__v16sf)(__m512)(B), \
3008 (__v16sf)(__m512)(C), \
3009 (__mmask16)(U), (int)(R)))
3010
3011
3012static __inline__ __m512 __DEFAULT_FN_ATTRS512
3013_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3014{
3015 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3016 (__v16sf) __B,
3017 (__v16sf) __C,
3018 (__mmask16) __U,
3020}
3021
3022#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3023 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3024 -(__v8df)(__m512d)(B), \
3025 (__v8df)(__m512d)(C), \
3026 (__mmask8)(U), (int)(R)))
3027
3028#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3029 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3030 -(__v16sf)(__m512)(B), \
3031 (__v16sf)(__m512)(C), \
3032 (__mmask16)(U), (int)(R)))
3033
3034#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3035 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3036 -(__v8df)(__m512d)(B), \
3037 -(__v8df)(__m512d)(C), \
3038 (__mmask8)(U), (int)(R)))
3039
3040
3041#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3042 ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3043 (__v8df)(__m512d)(B), \
3044 (__v8df)(__m512d)(C), \
3045 (__mmask8)(U), (int)(R)))
3046
3047#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3048 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3049 -(__v16sf)(__m512)(B), \
3050 -(__v16sf)(__m512)(C), \
3051 (__mmask16)(U), (int)(R)))
3052
3053
3054#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3055 ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3056 (__v16sf)(__m512)(B), \
3057 (__v16sf)(__m512)(C), \
3058 (__mmask16)(U), (int)(R)))
3059
3060/* Vector permutations */
3061
3062static __inline __m512i __DEFAULT_FN_ATTRS512
3063_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3064{
3065 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3066 (__v16si) __B);
3067}
3068
3069static __inline__ __m512i __DEFAULT_FN_ATTRS512
3070_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3071 __m512i __B)
3072{
3073 return (__m512i)__builtin_ia32_selectd_512(__U,
3074 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3075 (__v16si)__A);
3076}
3077
3078static __inline__ __m512i __DEFAULT_FN_ATTRS512
3079_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3080 __m512i __B)
3081{
3082 return (__m512i)__builtin_ia32_selectd_512(__U,
3083 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3084 (__v16si)__I);
3085}
3086
3087static __inline__ __m512i __DEFAULT_FN_ATTRS512
3088_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3089 __m512i __B)
3090{
3091 return (__m512i)__builtin_ia32_selectd_512(__U,
3092 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3093 (__v16si)_mm512_setzero_si512());
3094}
3095
3096static __inline __m512i __DEFAULT_FN_ATTRS512
3097_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3098{
3099 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3100 (__v8di) __B);
3101}
3102
3103static __inline__ __m512i __DEFAULT_FN_ATTRS512
3104_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3105 __m512i __B)
3106{
3107 return (__m512i)__builtin_ia32_selectq_512(__U,
3108 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3109 (__v8di)__A);
3110}
3111
3112static __inline__ __m512i __DEFAULT_FN_ATTRS512
3113_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3114 __m512i __B)
3115{
3116 return (__m512i)__builtin_ia32_selectq_512(__U,
3117 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3118 (__v8di)__I);
3119}
3120
3121static __inline__ __m512i __DEFAULT_FN_ATTRS512
3122_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3123 __m512i __B)
3124{
3125 return (__m512i)__builtin_ia32_selectq_512(__U,
3126 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3127 (__v8di)_mm512_setzero_si512());
3128}
3129
3130#define _mm512_alignr_epi64(A, B, I) \
3131 ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3132 (__v8di)(__m512i)(B), (int)(I)))
3133
3134#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3135 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3136 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3137 (__v8di)(__m512i)(W)))
3138
3139#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3140 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3141 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3142 (__v8di)_mm512_setzero_si512()))
3143
3144#define _mm512_alignr_epi32(A, B, I) \
3145 ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3146 (__v16si)(__m512i)(B), (int)(I)))
3147
3148#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3149 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3150 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3151 (__v16si)(__m512i)(W)))
3152
3153#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3154 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3155 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3156 (__v16si)_mm512_setzero_si512()))
3157/* Vector Extract */
3158
3159#define _mm512_extractf64x4_pd(A, I) \
3160 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3161 (__v4df)_mm256_undefined_pd(), \
3162 (__mmask8)-1))
3163
3164#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3165 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3166 (__v4df)(__m256d)(W), \
3167 (__mmask8)(U)))
3168
3169#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3170 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3171 (__v4df)_mm256_setzero_pd(), \
3172 (__mmask8)(U)))
3173
3174#define _mm512_extractf32x4_ps(A, I) \
3175 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3176 (__v4sf)_mm_undefined_ps(), \
3177 (__mmask8)-1))
3178
3179#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3180 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3181 (__v4sf)(__m128)(W), \
3182 (__mmask8)(U)))
3183
3184#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3185 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3186 (__v4sf)_mm_setzero_ps(), \
3187 (__mmask8)(U)))
3188
3189/* Vector Blend */
3190
3191static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3192_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
3193 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3194 (__v8df) __W,
3195 (__v8df) __A);
3196}
3197
3198static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3199_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) {
3200 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3201 (__v16sf) __W,
3202 (__v16sf) __A);
3203}
3204
3205static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3206_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
3207 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3208 (__v8di) __W,
3209 (__v8di) __A);
3210}
3211
3212static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3213_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
3214 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3215 (__v16si) __W,
3216 (__v16si) __A);
3217}
3218
3219/* Compare */
3220
3221#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3222 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3223 (__v16sf)(__m512)(B), (int)(P), \
3224 (__mmask16)-1, (int)(R)))
3225
3226#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3227 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3228 (__v16sf)(__m512)(B), (int)(P), \
3229 (__mmask16)(U), (int)(R)))
3230
3231#define _mm512_cmp_ps_mask(A, B, P) \
3232 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3233#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3234 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3235
3236#define _mm512_cmpeq_ps_mask(A, B) \
3237 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3238#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3239 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3240
3241#define _mm512_cmplt_ps_mask(A, B) \
3242 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3243#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3244 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3245
3246#define _mm512_cmple_ps_mask(A, B) \
3247 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3248#define _mm512_mask_cmple_ps_mask(k, A, B) \
3249 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3250
3251#define _mm512_cmpunord_ps_mask(A, B) \
3252 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3253#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3254 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3255
3256#define _mm512_cmpneq_ps_mask(A, B) \
3257 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3258#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3259 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3260
3261#define _mm512_cmpnlt_ps_mask(A, B) \
3262 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3263#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3264 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3265
3266#define _mm512_cmpnle_ps_mask(A, B) \
3267 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3268#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3269 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3270
3271#define _mm512_cmpord_ps_mask(A, B) \
3272 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3273#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3274 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3275
3276#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3277 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3278 (__v8df)(__m512d)(B), (int)(P), \
3279 (__mmask8)-1, (int)(R)))
3280
3281#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3282 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3283 (__v8df)(__m512d)(B), (int)(P), \
3284 (__mmask8)(U), (int)(R)))
3285
3286#define _mm512_cmp_pd_mask(A, B, P) \
3287 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3288#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3289 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3290
3291#define _mm512_cmpeq_pd_mask(A, B) \
3292 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3293#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3294 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3295
3296#define _mm512_cmplt_pd_mask(A, B) \
3297 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3298#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3299 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3300
3301#define _mm512_cmple_pd_mask(A, B) \
3302 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3303#define _mm512_mask_cmple_pd_mask(k, A, B) \
3304 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3305
3306#define _mm512_cmpunord_pd_mask(A, B) \
3307 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3308#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3309 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3310
3311#define _mm512_cmpneq_pd_mask(A, B) \
3312 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3313#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3314 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3315
3316#define _mm512_cmpnlt_pd_mask(A, B) \
3317 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3318#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3319 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3320
3321#define _mm512_cmpnle_pd_mask(A, B) \
3322 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3323#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3324 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3325
3326#define _mm512_cmpord_pd_mask(A, B) \
3327 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3328#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3329 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3330
3331/* Conversion */
3332
3333#define _mm512_cvtt_roundps_epu32(A, R) \
3334 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3335 (__v16si)_mm512_undefined_epi32(), \
3336 (__mmask16)-1, (int)(R)))
3337
3338#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3339 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3340 (__v16si)(__m512i)(W), \
3341 (__mmask16)(U), (int)(R)))
3342
3343#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3344 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3345 (__v16si)_mm512_setzero_si512(), \
3346 (__mmask16)(U), (int)(R)))
3347
3348
3349static __inline __m512i __DEFAULT_FN_ATTRS512
3351{
3352 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3353 (__v16si)
3355 (__mmask16) -1,
3357}
3358
3359static __inline__ __m512i __DEFAULT_FN_ATTRS512
3360_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3361{
3362 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3363 (__v16si) __W,
3364 (__mmask16) __U,
3366}
3367
3368static __inline__ __m512i __DEFAULT_FN_ATTRS512
3370{
3371 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3372 (__v16si) _mm512_setzero_si512 (),
3373 (__mmask16) __U,
3375}
3376
3377#define _mm512_cvt_roundepi32_ps(A, R) \
3378 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3379 (__v16sf)_mm512_setzero_ps(), \
3380 (__mmask16)-1, (int)(R)))
3381
3382#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3383 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3384 (__v16sf)(__m512)(W), \
3385 (__mmask16)(U), (int)(R)))
3386
3387#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3388 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3389 (__v16sf)_mm512_setzero_ps(), \
3390 (__mmask16)(U), (int)(R)))
3391
3392#define _mm512_cvt_roundepu32_ps(A, R) \
3393 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3394 (__v16sf)_mm512_setzero_ps(), \
3395 (__mmask16)-1, (int)(R)))
3396
3397#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3398 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3399 (__v16sf)(__m512)(W), \
3400 (__mmask16)(U), (int)(R)))
3401
3402#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3403 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3404 (__v16sf)_mm512_setzero_ps(), \
3405 (__mmask16)(U), (int)(R)))
3406
3407static __inline__ __m512
3409 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3410}
3411
3412static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3413_mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
3414 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3415 (__v16sf)_mm512_cvtepu32_ps(__A),
3416 (__v16sf)__W);
3417}
3418
3419static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3421 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3422 (__v16sf)_mm512_cvtepu32_ps(__A),
3423 (__v16sf)_mm512_setzero_ps());
3424}
3425
3426static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3428 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3429}
3430
3431static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3432_mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
3433 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3434 (__v8df)_mm512_cvtepi32_pd(__A),
3435 (__v8df)__W);
3436}
3437
3438static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3440 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3441 (__v8df)_mm512_cvtepi32_pd(__A),
3442 (__v8df)_mm512_setzero_pd());
3443}
3444
3445static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3447 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3448}
3449
3450static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3451_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
3452 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3453}
3454
3455static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3457 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3458}
3459
3460static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3461_mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
3462 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3463 (__v16sf)_mm512_cvtepi32_ps(__A),
3464 (__v16sf)__W);
3465}
3466
3467static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3469 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3470 (__v16sf)_mm512_cvtepi32_ps(__A),
3471 (__v16sf)_mm512_setzero_ps());
3472}
3473
3474static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3476 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3477}
3478
3479static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3480_mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
3481 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3482 (__v8df)_mm512_cvtepu32_pd(__A),
3483 (__v8df)__W);
3484}
3485
3486static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3488 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3489 (__v8df)_mm512_cvtepu32_pd(__A),
3490 (__v8df)_mm512_setzero_pd());
3491}
3492
3493static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3495 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3496}
3497
3498static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3499_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
3500 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3501}
3502
3503#define _mm512_cvt_roundpd_ps(A, R) \
3504 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3505 (__v8sf)_mm256_setzero_ps(), \
3506 (__mmask8)-1, (int)(R)))
3507
3508#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3509 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3510 (__v8sf)(__m256)(W), (__mmask8)(U), \
3511 (int)(R)))
3512
3513#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3514 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3515 (__v8sf)_mm256_setzero_ps(), \
3516 (__mmask8)(U), (int)(R)))
3517
3518static __inline__ __m256 __DEFAULT_FN_ATTRS512
3519_mm512_cvtpd_ps (__m512d __A)
3520{
3521 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3522 (__v8sf) _mm256_undefined_ps (),
3523 (__mmask8) -1,
3525}
3526
3527static __inline__ __m256 __DEFAULT_FN_ATTRS512
3528_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3529{
3530 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3531 (__v8sf) __W,
3532 (__mmask8) __U,
3534}
3535
3536static __inline__ __m256 __DEFAULT_FN_ATTRS512
3538{
3539 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3540 (__v8sf) _mm256_setzero_ps (),
3541 (__mmask8) __U,
3543}
3544
3545static __inline__ __m512 __DEFAULT_FN_ATTRS512
3547{
3548 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3549 (__v8sf) _mm256_setzero_ps (),
3550 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3551}
3552
3553static __inline__ __m512 __DEFAULT_FN_ATTRS512
3554_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3555{
3556 return (__m512) __builtin_shufflevector (
3558 __U, __A),
3559 (__v8sf) _mm256_setzero_ps (),
3560 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3561}
3562
3563#define _mm512_cvt_roundps_ph(A, I) \
3564 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3565 (__v16hi)_mm256_undefined_si256(), \
3566 (__mmask16)-1))
3567
3568#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3569 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3570 (__v16hi)(__m256i)(U), \
3571 (__mmask16)(W)))
3572
3573#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3574 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3575 (__v16hi)_mm256_setzero_si256(), \
3576 (__mmask16)(W)))
3577
3578#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3579#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3580#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3581
3582#define _mm512_cvt_roundph_ps(A, R) \
3583 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3584 (__v16sf)_mm512_undefined_ps(), \
3585 (__mmask16)-1, (int)(R)))
3586
3587#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3588 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3589 (__v16sf)(__m512)(W), \
3590 (__mmask16)(U), (int)(R)))
3591
3592#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3593 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3594 (__v16sf)_mm512_setzero_ps(), \
3595 (__mmask16)(U), (int)(R)))
3596
3597
3598static __inline __m512 __DEFAULT_FN_ATTRS512
3600{
3601 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3602 (__v16sf)
3604 (__mmask16) -1,
3606}
3607
3608static __inline__ __m512 __DEFAULT_FN_ATTRS512
3609_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3610{
3611 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3612 (__v16sf) __W,
3613 (__mmask16) __U,
3615}
3616
3617static __inline__ __m512 __DEFAULT_FN_ATTRS512
3619{
3620 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3621 (__v16sf) _mm512_setzero_ps (),
3622 (__mmask16) __U,
3624}
3625
3626#define _mm512_cvtt_roundpd_epi32(A, R) \
3627 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3628 (__v8si)_mm256_setzero_si256(), \
3629 (__mmask8)-1, (int)(R)))
3630
3631#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3632 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3633 (__v8si)(__m256i)(W), \
3634 (__mmask8)(U), (int)(R)))
3635
3636#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3637 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3638 (__v8si)_mm256_setzero_si256(), \
3639 (__mmask8)(U), (int)(R)))
3640
3641static __inline __m256i __DEFAULT_FN_ATTRS512
3643{
3644 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3645 (__v8si)_mm256_setzero_si256(),
3646 (__mmask8) -1,
3648}
3649
3650static __inline__ __m256i __DEFAULT_FN_ATTRS512
3651_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3652{
3653 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3654 (__v8si) __W,
3655 (__mmask8) __U,
3657}
3658
3659static __inline__ __m256i __DEFAULT_FN_ATTRS512
3661{
3662 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3663 (__v8si) _mm256_setzero_si256 (),
3664 (__mmask8) __U,
3666}
3667
3668#define _mm512_cvtt_roundps_epi32(A, R) \
3669 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3670 (__v16si)_mm512_setzero_si512(), \
3671 (__mmask16)-1, (int)(R)))
3672
3673#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3674 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3675 (__v16si)(__m512i)(W), \
3676 (__mmask16)(U), (int)(R)))
3677
3678#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3679 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3680 (__v16si)_mm512_setzero_si512(), \
3681 (__mmask16)(U), (int)(R)))
3682
3683static __inline __m512i __DEFAULT_FN_ATTRS512
3685{
3686 return (__m512i)
3687 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3688 (__v16si) _mm512_setzero_si512 (),
3690}
3691
3692static __inline__ __m512i __DEFAULT_FN_ATTRS512
3693_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3694{
3695 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3696 (__v16si) __W,
3697 (__mmask16) __U,
3699}
3700
3701static __inline__ __m512i __DEFAULT_FN_ATTRS512
3703{
3704 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3705 (__v16si) _mm512_setzero_si512 (),
3706 (__mmask16) __U,
3708}
3709
3710#define _mm512_cvt_roundps_epi32(A, R) \
3711 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3712 (__v16si)_mm512_setzero_si512(), \
3713 (__mmask16)-1, (int)(R)))
3714
3715#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3716 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3717 (__v16si)(__m512i)(W), \
3718 (__mmask16)(U), (int)(R)))
3719
3720#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3721 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3722 (__v16si)_mm512_setzero_si512(), \
3723 (__mmask16)(U), (int)(R)))
3724
3725static __inline__ __m512i __DEFAULT_FN_ATTRS512
3727{
3728 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3729 (__v16si) _mm512_undefined_epi32 (),
3730 (__mmask16) -1,
3732}
3733
3734static __inline__ __m512i __DEFAULT_FN_ATTRS512
3735_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3736{
3737 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3738 (__v16si) __W,
3739 (__mmask16) __U,
3741}
3742
3743static __inline__ __m512i __DEFAULT_FN_ATTRS512
3745{
3746 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3747 (__v16si)
3749 (__mmask16) __U,
3751}
3752
3753#define _mm512_cvt_roundpd_epi32(A, R) \
3754 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3755 (__v8si)_mm256_setzero_si256(), \
3756 (__mmask8)-1, (int)(R)))
3757
3758#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3759 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3760 (__v8si)(__m256i)(W), \
3761 (__mmask8)(U), (int)(R)))
3762
3763#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3764 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3765 (__v8si)_mm256_setzero_si256(), \
3766 (__mmask8)(U), (int)(R)))
3767
3768static __inline__ __m256i __DEFAULT_FN_ATTRS512
3770{
3771 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3772 (__v8si)
3774 (__mmask8) -1,
3776}
3777
3778static __inline__ __m256i __DEFAULT_FN_ATTRS512
3779_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3780{
3781 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3782 (__v8si) __W,
3783 (__mmask8) __U,
3785}
3786
3787static __inline__ __m256i __DEFAULT_FN_ATTRS512
3789{
3790 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3791 (__v8si)
3793 (__mmask8) __U,
3795}
3796
3797#define _mm512_cvt_roundps_epu32(A, R) \
3798 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3799 (__v16si)_mm512_setzero_si512(), \
3800 (__mmask16)-1, (int)(R)))
3801
3802#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
3803 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3804 (__v16si)(__m512i)(W), \
3805 (__mmask16)(U), (int)(R)))
3806
3807#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
3808 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3809 (__v16si)_mm512_setzero_si512(), \
3810 (__mmask16)(U), (int)(R)))
3811
3812static __inline__ __m512i __DEFAULT_FN_ATTRS512
3814{
3815 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
3816 (__v16si)\
3818 (__mmask16) -1,\
3820}
3821
3822static __inline__ __m512i __DEFAULT_FN_ATTRS512
3823_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3824{
3825 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3826 (__v16si) __W,
3827 (__mmask16) __U,
3829}
3830
3831static __inline__ __m512i __DEFAULT_FN_ATTRS512
3833{
3834 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3835 (__v16si)
3837 (__mmask16) __U ,
3839}
3840
3841#define _mm512_cvt_roundpd_epu32(A, R) \
3842 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3843 (__v8si)_mm256_setzero_si256(), \
3844 (__mmask8)-1, (int)(R)))
3845
3846#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
3847 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3848 (__v8si)(__m256i)(W), \
3849 (__mmask8)(U), (int)(R)))
3850
3851#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
3852 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3853 (__v8si)_mm256_setzero_si256(), \
3854 (__mmask8)(U), (int)(R)))
3855
3856static __inline__ __m256i __DEFAULT_FN_ATTRS512
3858{
3859 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3860 (__v8si)
3862 (__mmask8) -1,
3864}
3865
3866static __inline__ __m256i __DEFAULT_FN_ATTRS512
3867_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
3868{
3869 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3870 (__v8si) __W,
3871 (__mmask8) __U,
3873}
3874
3875static __inline__ __m256i __DEFAULT_FN_ATTRS512
3877{
3878 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3879 (__v8si)
3881 (__mmask8) __U,
3883}
3884
3885static __inline__ double __DEFAULT_FN_ATTRS512
3887{
3888 return __a[0];
3889}
3890
3891static __inline__ float __DEFAULT_FN_ATTRS512
3893{
3894 return __a[0];
3895}
3896
3897/* Unpack and Interleave */
3898
3899static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3900_mm512_unpackhi_pd(__m512d __a, __m512d __b) {
3901 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
3902 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
3903}
3904
3905static __inline__ __m512d __DEFAULT_FN_ATTRS512
3906_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
3907{
3908 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3909 (__v8df)_mm512_unpackhi_pd(__A, __B),
3910 (__v8df)__W);
3911}
3912
3913static __inline__ __m512d __DEFAULT_FN_ATTRS512
3914_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
3915{
3916 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3917 (__v8df)_mm512_unpackhi_pd(__A, __B),
3918 (__v8df)_mm512_setzero_pd());
3919}
3920
3921static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3922_mm512_unpacklo_pd(__m512d __a, __m512d __b) {
3923 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
3924 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
3925}
3926
3927static __inline__ __m512d __DEFAULT_FN_ATTRS512
3928_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
3929{
3930 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3931 (__v8df)_mm512_unpacklo_pd(__A, __B),
3932 (__v8df)__W);
3933}
3934
3935static __inline__ __m512d __DEFAULT_FN_ATTRS512
3936_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
3937{
3938 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3939 (__v8df)_mm512_unpacklo_pd(__A, __B),
3940 (__v8df)_mm512_setzero_pd());
3941}
3942
3943static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3944_mm512_unpackhi_ps(__m512 __a, __m512 __b) {
3945 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
3946 2, 18, 3, 19,
3947 2+4, 18+4, 3+4, 19+4,
3948 2+8, 18+8, 3+8, 19+8,
3949 2+12, 18+12, 3+12, 19+12);
3950}
3951
3952static __inline__ __m512 __DEFAULT_FN_ATTRS512
3953_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
3954{
3955 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3956 (__v16sf)_mm512_unpackhi_ps(__A, __B),
3957 (__v16sf)__W);
3958}
3959
3960static __inline__ __m512 __DEFAULT_FN_ATTRS512
3961_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
3962{
3963 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3964 (__v16sf)_mm512_unpackhi_ps(__A, __B),
3965 (__v16sf)_mm512_setzero_ps());
3966}
3967
3968static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3969_mm512_unpacklo_ps(__m512 __a, __m512 __b) {
3970 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
3971 0, 16, 1, 17,
3972 0+4, 16+4, 1+4, 17+4,
3973 0+8, 16+8, 1+8, 17+8,
3974 0+12, 16+12, 1+12, 17+12);
3975}
3976
3977static __inline__ __m512 __DEFAULT_FN_ATTRS512
3978_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
3979{
3980 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3981 (__v16sf)_mm512_unpacklo_ps(__A, __B),
3982 (__v16sf)__W);
3983}
3984
3985static __inline__ __m512 __DEFAULT_FN_ATTRS512
3986_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
3987{
3988 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3989 (__v16sf)_mm512_unpacklo_ps(__A, __B),
3990 (__v16sf)_mm512_setzero_ps());
3991}
3992
3993static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3994_mm512_unpackhi_epi32(__m512i __A, __m512i __B) {
3995 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3996 2, 18, 3, 19,
3997 2+4, 18+4, 3+4, 19+4,
3998 2+8, 18+8, 3+8, 19+8,
3999 2+12, 18+12, 3+12, 19+12);
4000}
4001
4002static __inline__ __m512i __DEFAULT_FN_ATTRS512
4003_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4004{
4005 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4006 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4007 (__v16si)__W);
4008}
4009
4010static __inline__ __m512i __DEFAULT_FN_ATTRS512
4011_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4012{
4013 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4014 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4015 (__v16si)_mm512_setzero_si512());
4016}
4017
4018static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4019_mm512_unpacklo_epi32(__m512i __A, __m512i __B) {
4020 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4021 0, 16, 1, 17,
4022 0+4, 16+4, 1+4, 17+4,
4023 0+8, 16+8, 1+8, 17+8,
4024 0+12, 16+12, 1+12, 17+12);
4025}
4026
4027static __inline__ __m512i __DEFAULT_FN_ATTRS512
4028_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4029{
4030 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4031 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4032 (__v16si)__W);
4033}
4034
4035static __inline__ __m512i __DEFAULT_FN_ATTRS512
4036_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4037{
4038 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4039 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4040 (__v16si)_mm512_setzero_si512());
4041}
4042
4043static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4044_mm512_unpackhi_epi64(__m512i __A, __m512i __B) {
4045 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4046 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4047}
4048
4049static __inline__ __m512i __DEFAULT_FN_ATTRS512
4050_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4051{
4052 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4053 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4054 (__v8di)__W);
4055}
4056
4057static __inline__ __m512i __DEFAULT_FN_ATTRS512
4058_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4059{
4060 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4061 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4062 (__v8di)_mm512_setzero_si512());
4063}
4064
4065static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4066_mm512_unpacklo_epi64(__m512i __A, __m512i __B) {
4067 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4068 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4069}
4070
4071static __inline__ __m512i __DEFAULT_FN_ATTRS512
4072_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4073{
4074 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4075 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4076 (__v8di)__W);
4077}
4078
4079static __inline__ __m512i __DEFAULT_FN_ATTRS512
4080_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4081{
4082 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4083 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4084 (__v8di)_mm512_setzero_si512());
4085}
4086
4087
4088/* SIMD load ops */
4089
4090static __inline __m512i __DEFAULT_FN_ATTRS512
4092{
4093 struct __loadu_si512 {
4094 __m512i_u __v;
4095 } __attribute__((__packed__, __may_alias__));
4096 return ((const struct __loadu_si512*)__P)->__v;
4097}
4098
4099static __inline __m512i __DEFAULT_FN_ATTRS512
4101{
4102 struct __loadu_epi32 {
4103 __m512i_u __v;
4104 } __attribute__((__packed__, __may_alias__));
4105 return ((const struct __loadu_epi32*)__P)->__v;
4106}
4107
4108static __inline __m512i __DEFAULT_FN_ATTRS512
4109_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4110{
4111 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4112 (__v16si) __W,
4113 (__mmask16) __U);
4114}
4115
4116
4117static __inline __m512i __DEFAULT_FN_ATTRS512
4119{
4120 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4121 (__v16si)
4123 (__mmask16) __U);
4124}
4125
4126static __inline __m512i __DEFAULT_FN_ATTRS512
4128{
4129 struct __loadu_epi64 {
4130 __m512i_u __v;
4131 } __attribute__((__packed__, __may_alias__));
4132 return ((const struct __loadu_epi64*)__P)->__v;
4133}
4134
4135static __inline __m512i __DEFAULT_FN_ATTRS512
4136_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4137{
4138 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4139 (__v8di) __W,
4140 (__mmask8) __U);
4141}
4142
4143static __inline __m512i __DEFAULT_FN_ATTRS512
4145{
4146 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4147 (__v8di)
4149 (__mmask8) __U);
4150}
4151
4152static __inline __m512 __DEFAULT_FN_ATTRS512
4153_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4154{
4155 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4156 (__v16sf) __W,
4157 (__mmask16) __U);
4158}
4159
4160static __inline __m512 __DEFAULT_FN_ATTRS512
4162{
4163 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4164 (__v16sf)
4166 (__mmask16) __U);
4167}
4168
4169static __inline __m512d __DEFAULT_FN_ATTRS512
4170_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4171{
4172 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4173 (__v8df) __W,
4174 (__mmask8) __U);
4175}
4176
4177static __inline __m512d __DEFAULT_FN_ATTRS512
4179{
4180 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4181 (__v8df)
4183 (__mmask8) __U);
4184}
4185
4186static __inline __m512d __DEFAULT_FN_ATTRS512
4188{
4189 struct __loadu_pd {
4190 __m512d_u __v;
4191 } __attribute__((__packed__, __may_alias__));
4192 return ((const struct __loadu_pd*)__p)->__v;
4193}
4194
4195static __inline __m512 __DEFAULT_FN_ATTRS512
4197{
4198 struct __loadu_ps {
4199 __m512_u __v;
4200 } __attribute__((__packed__, __may_alias__));
4201 return ((const struct __loadu_ps*)__p)->__v;
4202}
4203
4204static __inline __m512 __DEFAULT_FN_ATTRS512
4206{
4207 return *(const __m512*)__p;
4208}
4209
4210static __inline __m512 __DEFAULT_FN_ATTRS512
4211_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4212{
4213 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4214 (__v16sf) __W,
4215 (__mmask16) __U);
4216}
4217
4218static __inline __m512 __DEFAULT_FN_ATTRS512
4220{
4221 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4222 (__v16sf)
4224 (__mmask16) __U);
4225}
4226
4227static __inline __m512d __DEFAULT_FN_ATTRS512
4229{
4230 return *(const __m512d*)__p;
4231}
4232
4233static __inline __m512d __DEFAULT_FN_ATTRS512
4234_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4235{
4236 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4237 (__v8df) __W,
4238 (__mmask8) __U);
4239}
4240
4241static __inline __m512d __DEFAULT_FN_ATTRS512
4243{
4244 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4245 (__v8df)
4247 (__mmask8) __U);
4248}
4249
4250static __inline __m512i __DEFAULT_FN_ATTRS512
4252{
4253 return *(const __m512i *) __P;
4254}
4255
4256static __inline __m512i __DEFAULT_FN_ATTRS512
4258{
4259 return *(const __m512i *) __P;
4260}
4261
4262static __inline __m512i __DEFAULT_FN_ATTRS512
4264{
4265 return *(const __m512i *) __P;
4266}
4267
4268/* SIMD store ops */
4269
4270static __inline void __DEFAULT_FN_ATTRS512
4271_mm512_storeu_epi64 (void *__P, __m512i __A)
4272{
4273 struct __storeu_epi64 {
4274 __m512i_u __v;
4275 } __attribute__((__packed__, __may_alias__));
4276 ((struct __storeu_epi64*)__P)->__v = __A;
4277}
4278
4279static __inline void __DEFAULT_FN_ATTRS512
4280_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4281{
4282 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4283 (__mmask8) __U);
4284}
4285
4286static __inline void __DEFAULT_FN_ATTRS512
4287_mm512_storeu_si512 (void *__P, __m512i __A)
4288{
4289 struct __storeu_si512 {
4290 __m512i_u __v;
4291 } __attribute__((__packed__, __may_alias__));
4292 ((struct __storeu_si512*)__P)->__v = __A;
4293}
4294
4295static __inline void __DEFAULT_FN_ATTRS512
4296_mm512_storeu_epi32 (void *__P, __m512i __A)
4297{
4298 struct __storeu_epi32 {
4299 __m512i_u __v;
4300 } __attribute__((__packed__, __may_alias__));
4301 ((struct __storeu_epi32*)__P)->__v = __A;
4302}
4303
4304static __inline void __DEFAULT_FN_ATTRS512
4306{
4307 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4308 (__mmask16) __U);
4309}
4310
4311static __inline void __DEFAULT_FN_ATTRS512
4312_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4313{
4314 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4315}
4316
4317static __inline void __DEFAULT_FN_ATTRS512
4318_mm512_storeu_pd(void *__P, __m512d __A)
4319{
4320 struct __storeu_pd {
4321 __m512d_u __v;
4322 } __attribute__((__packed__, __may_alias__));
4323 ((struct __storeu_pd*)__P)->__v = __A;
4324}
4325
4326static __inline void __DEFAULT_FN_ATTRS512
4327_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4328{
4329 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4330 (__mmask16) __U);
4331}
4332
4333static __inline void __DEFAULT_FN_ATTRS512
4334_mm512_storeu_ps(void *__P, __m512 __A)
4335{
4336 struct __storeu_ps {
4337 __m512_u __v;
4338 } __attribute__((__packed__, __may_alias__));
4339 ((struct __storeu_ps*)__P)->__v = __A;
4340}
4341
4342static __inline void __DEFAULT_FN_ATTRS512
4343_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4344{
4345 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4346}
4347
4348static __inline void __DEFAULT_FN_ATTRS512
4349_mm512_store_pd(void *__P, __m512d __A)
4350{
4351 *(__m512d*)__P = __A;
4352}
4353
4354static __inline void __DEFAULT_FN_ATTRS512
4355_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4356{
4357 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4358 (__mmask16) __U);
4359}
4360
4361static __inline void __DEFAULT_FN_ATTRS512
4362_mm512_store_ps(void *__P, __m512 __A)
4363{
4364 *(__m512*)__P = __A;
4365}
4366
4367static __inline void __DEFAULT_FN_ATTRS512
4368_mm512_store_si512 (void *__P, __m512i __A)
4369{
4370 *(__m512i *) __P = __A;
4371}
4372
4373static __inline void __DEFAULT_FN_ATTRS512
4374_mm512_store_epi32 (void *__P, __m512i __A)
4375{
4376 *(__m512i *) __P = __A;
4377}
4378
4379static __inline void __DEFAULT_FN_ATTRS512
4380_mm512_store_epi64 (void *__P, __m512i __A)
4381{
4382 *(__m512i *) __P = __A;
4383}
4384
4385/* Mask ops */
4386
4389 return __builtin_ia32_knothi(__M);
4390}
4391
4392/* Integer compare */
4393
4394#define _mm512_cmpeq_epi32_mask(A, B) \
4395 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4396#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4397 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4398#define _mm512_cmpge_epi32_mask(A, B) \
4399 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4400#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4401 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4402#define _mm512_cmpgt_epi32_mask(A, B) \
4403 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4404#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4405 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4406#define _mm512_cmple_epi32_mask(A, B) \
4407 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4408#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4409 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4410#define _mm512_cmplt_epi32_mask(A, B) \
4411 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4412#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4413 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4414#define _mm512_cmpneq_epi32_mask(A, B) \
4415 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4416#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4417 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4418
4419#define _mm512_cmpeq_epu32_mask(A, B) \
4420 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4421#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4422 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4423#define _mm512_cmpge_epu32_mask(A, B) \
4424 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4425#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4426 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4427#define _mm512_cmpgt_epu32_mask(A, B) \
4428 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4429#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4430 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4431#define _mm512_cmple_epu32_mask(A, B) \
4432 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4433#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4434 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4435#define _mm512_cmplt_epu32_mask(A, B) \
4436 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4437#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4438 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4439#define _mm512_cmpneq_epu32_mask(A, B) \
4440 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4441#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4442 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4443
4444#define _mm512_cmpeq_epi64_mask(A, B) \
4445 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4446#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4447 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4448#define _mm512_cmpge_epi64_mask(A, B) \
4449 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4450#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4451 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4452#define _mm512_cmpgt_epi64_mask(A, B) \
4453 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4454#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4455 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4456#define _mm512_cmple_epi64_mask(A, B) \
4457 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4458#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4459 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4460#define _mm512_cmplt_epi64_mask(A, B) \
4461 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4462#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4463 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4464#define _mm512_cmpneq_epi64_mask(A, B) \
4465 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4466#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4467 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4468
4469#define _mm512_cmpeq_epu64_mask(A, B) \
4470 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4471#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4472 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4473#define _mm512_cmpge_epu64_mask(A, B) \
4474 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4475#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4476 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4477#define _mm512_cmpgt_epu64_mask(A, B) \
4478 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4479#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4480 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4481#define _mm512_cmple_epu64_mask(A, B) \
4482 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4483#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4484 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4485#define _mm512_cmplt_epu64_mask(A, B) \
4486 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4487#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4488 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4489#define _mm512_cmpneq_epu64_mask(A, B) \
4490 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4491#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4492 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4493
4494static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4496 /* This function always performs a signed extension, but __v16qi is a char
4497 which may be signed or unsigned, so use __v16qs. */
4498 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4499}
4500
4501static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4502_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
4503 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4504 (__v16si)_mm512_cvtepi8_epi32(__A),
4505 (__v16si)__W);
4506}
4507
4508static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4510 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4511 (__v16si)_mm512_cvtepi8_epi32(__A),
4512 (__v16si)_mm512_setzero_si512());
4513}
4514
4515static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4517 /* This function always performs a signed extension, but __v16qi is a char
4518 which may be signed or unsigned, so use __v16qs. */
4519 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4520}
4521
4522static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4523_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4524 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4525 (__v8di)_mm512_cvtepi8_epi64(__A),
4526 (__v8di)__W);
4527}
4528
4529static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4531 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4532 (__v8di)_mm512_cvtepi8_epi64(__A),
4533 (__v8di)_mm512_setzero_si512 ());
4534}
4535
4536static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4538 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4539}
4540
4541static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4542_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
4543 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4544 (__v8di)_mm512_cvtepi32_epi64(__X),
4545 (__v8di)__W);
4546}
4547
4548static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4550 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4551 (__v8di)_mm512_cvtepi32_epi64(__X),
4552 (__v8di)_mm512_setzero_si512());
4553}
4554
4555static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4557 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4558}
4559
4560static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4561_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
4562 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4563 (__v16si)_mm512_cvtepi16_epi32(__A),
4564 (__v16si)__W);
4565}
4566
4567static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4569 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4570 (__v16si)_mm512_cvtepi16_epi32(__A),
4571 (__v16si)_mm512_setzero_si512 ());
4572}
4573
4574static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4576 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4577}
4578
4579static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4580_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4581 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4582 (__v8di)_mm512_cvtepi16_epi64(__A),
4583 (__v8di)__W);
4584}
4585
4586static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4588 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4589 (__v8di)_mm512_cvtepi16_epi64(__A),
4590 (__v8di)_mm512_setzero_si512());
4591}
4592
4593static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4595 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4596}
4597
4598static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4599_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
4600 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4601 (__v16si)_mm512_cvtepu8_epi32(__A),
4602 (__v16si)__W);
4603}
4604
4605static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4607 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4608 (__v16si)_mm512_cvtepu8_epi32(__A),
4609 (__v16si)_mm512_setzero_si512());
4610}
4611
4612static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4614 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4615}
4616
4617static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4618_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4619 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4620 (__v8di)_mm512_cvtepu8_epi64(__A),
4621 (__v8di)__W);
4622}
4623
4624static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4626 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4627 (__v8di)_mm512_cvtepu8_epi64(__A),
4628 (__v8di)_mm512_setzero_si512());
4629}
4630
4631static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4633 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4634}
4635
4636static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4637_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
4638 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4639 (__v8di)_mm512_cvtepu32_epi64(__X),
4640 (__v8di)__W);
4641}
4642
4643static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4645 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4646 (__v8di)_mm512_cvtepu32_epi64(__X),
4647 (__v8di)_mm512_setzero_si512());
4648}
4649
4650static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4652 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4653}
4654
4655static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4656_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
4657 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4658 (__v16si)_mm512_cvtepu16_epi32(__A),
4659 (__v16si)__W);
4660}
4661
4662static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4664 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4665 (__v16si)_mm512_cvtepu16_epi32(__A),
4666 (__v16si)_mm512_setzero_si512());
4667}
4668
4669static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4671 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4672}
4673
4674static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4675_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4676 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4677 (__v8di)_mm512_cvtepu16_epi64(__A),
4678 (__v8di)__W);
4679}
4680
4681static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4683 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4684 (__v8di)_mm512_cvtepu16_epi64(__A),
4685 (__v8di)_mm512_setzero_si512());
4686}
4687
4688static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4689_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4690{
4691 return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B);
4692}
4693
4694static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4695_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4696{
4697 return (__m512i)__builtin_ia32_selectd_512(__U,
4698 (__v16si)_mm512_rorv_epi32(__A, __B),
4699 (__v16si)__W);
4700}
4701
4702static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4703_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4704{
4705 return (__m512i)__builtin_ia32_selectd_512(__U,
4706 (__v16si)_mm512_rorv_epi32(__A, __B),
4707 (__v16si)_mm512_setzero_si512());
4708}
4709
4710static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4711_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4712{
4713 return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B);
4714}
4715
4716static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4717_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4718{
4719 return (__m512i)__builtin_ia32_selectq_512(__U,
4720 (__v8di)_mm512_rorv_epi64(__A, __B),
4721 (__v8di)__W);
4722}
4723
4724static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4725_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4726{
4727 return (__m512i)__builtin_ia32_selectq_512(__U,
4728 (__v8di)_mm512_rorv_epi64(__A, __B),
4729 (__v8di)_mm512_setzero_si512());
4730}
4731
4732
4733
4734#define _mm512_cmp_epi32_mask(a, b, p) \
4735 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4736 (__v16si)(__m512i)(b), (int)(p), \
4737 (__mmask16)-1))
4738
4739#define _mm512_cmp_epu32_mask(a, b, p) \
4740 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4741 (__v16si)(__m512i)(b), (int)(p), \
4742 (__mmask16)-1))
4743
4744#define _mm512_cmp_epi64_mask(a, b, p) \
4745 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4746 (__v8di)(__m512i)(b), (int)(p), \
4747 (__mmask8)-1))
4748
4749#define _mm512_cmp_epu64_mask(a, b, p) \
4750 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4751 (__v8di)(__m512i)(b), (int)(p), \
4752 (__mmask8)-1))
4753
4754#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
4755 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4756 (__v16si)(__m512i)(b), (int)(p), \
4757 (__mmask16)(m)))
4758
4759#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
4760 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4761 (__v16si)(__m512i)(b), (int)(p), \
4762 (__mmask16)(m)))
4763
4764#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
4765 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4766 (__v8di)(__m512i)(b), (int)(p), \
4767 (__mmask8)(m)))
4768
4769#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
4770 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4771 (__v8di)(__m512i)(b), (int)(p), \
4772 (__mmask8)(m)))
4773
4774#define _mm512_rol_epi32(a, b) \
4775 ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
4776
4777#define _mm512_mask_rol_epi32(W, U, a, b) \
4778 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4779 (__v16si)_mm512_rol_epi32((a), (b)), \
4780 (__v16si)(__m512i)(W)))
4781
4782#define _mm512_maskz_rol_epi32(U, a, b) \
4783 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4784 (__v16si)_mm512_rol_epi32((a), (b)), \
4785 (__v16si)_mm512_setzero_si512()))
4786
4787#define _mm512_rol_epi64(a, b) \
4788 ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
4789
4790#define _mm512_mask_rol_epi64(W, U, a, b) \
4791 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4792 (__v8di)_mm512_rol_epi64((a), (b)), \
4793 (__v8di)(__m512i)(W)))
4794
4795#define _mm512_maskz_rol_epi64(U, a, b) \
4796 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4797 (__v8di)_mm512_rol_epi64((a), (b)), \
4798 (__v8di)_mm512_setzero_si512()))
4799
4800static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4801_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4802{
4803 return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B);
4804}
4805
4806static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4807_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4808{
4809 return (__m512i)__builtin_ia32_selectd_512(__U,
4810 (__v16si)_mm512_rolv_epi32(__A, __B),
4811 (__v16si)__W);
4812}
4813
4814static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4815_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4816{
4817 return (__m512i)__builtin_ia32_selectd_512(__U,
4818 (__v16si)_mm512_rolv_epi32(__A, __B),
4819 (__v16si)_mm512_setzero_si512());
4820}
4821
4822static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4823_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4824{
4825 return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B);
4826}
4827
4828static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4829_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4830{
4831 return (__m512i)__builtin_ia32_selectq_512(__U,
4832 (__v8di)_mm512_rolv_epi64(__A, __B),
4833 (__v8di)__W);
4834}
4835
4836static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4837_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4838{
4839 return (__m512i)__builtin_ia32_selectq_512(__U,
4840 (__v8di)_mm512_rolv_epi64(__A, __B),
4841 (__v8di)_mm512_setzero_si512());
4842}
4843
4844#define _mm512_ror_epi32(A, B) \
4845 ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
4846
4847#define _mm512_mask_ror_epi32(W, U, A, B) \
4848 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4849 (__v16si)_mm512_ror_epi32((A), (B)), \
4850 (__v16si)(__m512i)(W)))
4851
4852#define _mm512_maskz_ror_epi32(U, A, B) \
4853 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4854 (__v16si)_mm512_ror_epi32((A), (B)), \
4855 (__v16si)_mm512_setzero_si512()))
4856
4857#define _mm512_ror_epi64(A, B) \
4858 ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
4859
4860#define _mm512_mask_ror_epi64(W, U, A, B) \
4861 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4862 (__v8di)_mm512_ror_epi64((A), (B)), \
4863 (__v8di)(__m512i)(W)))
4864
4865#define _mm512_maskz_ror_epi64(U, A, B) \
4866 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4867 (__v8di)_mm512_ror_epi64((A), (B)), \
4868 (__v8di)_mm512_setzero_si512()))
4869
4870static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4871_mm512_slli_epi32(__m512i __A, unsigned int __B) {
4872 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B);
4873}
4874
4875static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4876_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
4877 unsigned int __B) {
4878 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4879 (__v16si)_mm512_slli_epi32(__A, __B),
4880 (__v16si)__W);
4881}
4882
4883static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4884_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
4885 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4886 (__v16si)_mm512_slli_epi32(__A, __B),
4887 (__v16si)_mm512_setzero_si512());
4888}
4889
4890static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4891_mm512_slli_epi64(__m512i __A, unsigned int __B) {
4892 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B);
4893}
4894
4895static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4896_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
4897 unsigned int __B) {
4898 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4899 (__v8di)_mm512_slli_epi64(__A, __B),
4900 (__v8di)__W);
4901}
4902
4903static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4904_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
4905 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4906 (__v8di)_mm512_slli_epi64(__A, __B),
4907 (__v8di)_mm512_setzero_si512());
4908}
4909
4910static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4911_mm512_srli_epi32(__m512i __A, unsigned int __B) {
4912 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B);
4913}
4914
4915static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4916_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
4917 unsigned int __B) {
4918 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4919 (__v16si)_mm512_srli_epi32(__A, __B),
4920 (__v16si)__W);
4921}
4922
4923static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4924_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
4925 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4926 (__v16si)_mm512_srli_epi32(__A, __B),
4927 (__v16si)_mm512_setzero_si512());
4928}
4929
4930static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4931_mm512_srli_epi64(__m512i __A, unsigned int __B) {
4932 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B);
4933}
4934
4935static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4936_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
4937 unsigned int __B) {
4938 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4939 (__v8di)_mm512_srli_epi64(__A, __B),
4940 (__v8di)__W);
4941}
4942
4943static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4944_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
4945 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4946 (__v8di)_mm512_srli_epi64(__A, __B),
4947 (__v8di)_mm512_setzero_si512());
4948}
4949
4950static __inline__ __m512i __DEFAULT_FN_ATTRS512
4951_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4952{
4953 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4954 (__v16si) __W,
4955 (__mmask16) __U);
4956}
4957
4958static __inline__ __m512i __DEFAULT_FN_ATTRS512
4960{
4961 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4962 (__v16si)
4964 (__mmask16) __U);
4965}
4966
4967static __inline__ void __DEFAULT_FN_ATTRS512
4968_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
4969{
4970 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
4971 (__mmask16) __U);
4972}
4973
4974static __inline__ __m512i __DEFAULT_FN_ATTRS512
4975_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
4976{
4977 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
4978 (__v16si) __A,
4979 (__v16si) __W);
4980}
4981
4982static __inline__ __m512i __DEFAULT_FN_ATTRS512
4984{
4985 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
4986 (__v16si) __A,
4987 (__v16si) _mm512_setzero_si512 ());
4988}
4989
4990static __inline__ __m512i __DEFAULT_FN_ATTRS512
4991_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
4992{
4993 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
4994 (__v8di) __A,
4995 (__v8di) __W);
4996}
4997
4998static __inline__ __m512i __DEFAULT_FN_ATTRS512
5000{
5001 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5002 (__v8di) __A,
5003 (__v8di) _mm512_setzero_si512 ());
5004}
5005
5006static __inline__ __m512i __DEFAULT_FN_ATTRS512
5007_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5008{
5009 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5010 (__v8di) __W,
5011 (__mmask8) __U);
5012}
5013
5014static __inline__ __m512i __DEFAULT_FN_ATTRS512
5016{
5017 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5018 (__v8di)
5020 (__mmask8) __U);
5021}
5022
5023static __inline__ void __DEFAULT_FN_ATTRS512
5024_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5025{
5026 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5027 (__mmask8) __U);
5028}
5029
5030static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5032{
5033 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5034 0, 0, 2, 2, 4, 4, 6, 6);
5035}
5036
5037static __inline__ __m512d __DEFAULT_FN_ATTRS512
5038_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5039{
5040 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5041 (__v8df)_mm512_movedup_pd(__A),
5042 (__v8df)__W);
5043}
5044
5045static __inline__ __m512d __DEFAULT_FN_ATTRS512
5047{
5048 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5049 (__v8df)_mm512_movedup_pd(__A),
5050 (__v8df)_mm512_setzero_pd());
5051}
5052
5053#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5054 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5055 (__v8df)(__m512d)(B), \
5056 (__v8di)(__m512i)(C), (int)(imm), \
5057 (__mmask8)-1, (int)(R)))
5058
5059#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5060 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5061 (__v8df)(__m512d)(B), \
5062 (__v8di)(__m512i)(C), (int)(imm), \
5063 (__mmask8)(U), (int)(R)))
5064
5065#define _mm512_fixupimm_pd(A, B, C, imm) \
5066 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5067 (__v8df)(__m512d)(B), \
5068 (__v8di)(__m512i)(C), (int)(imm), \
5069 (__mmask8)-1, \
5070 _MM_FROUND_CUR_DIRECTION))
5071
5072#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5073 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5074 (__v8df)(__m512d)(B), \
5075 (__v8di)(__m512i)(C), (int)(imm), \
5076 (__mmask8)(U), \
5077 _MM_FROUND_CUR_DIRECTION))
5078
5079#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5080 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5081 (__v8df)(__m512d)(B), \
5082 (__v8di)(__m512i)(C), \
5083 (int)(imm), (__mmask8)(U), \
5084 (int)(R)))
5085
5086#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5087 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5088 (__v8df)(__m512d)(B), \
5089 (__v8di)(__m512i)(C), \
5090 (int)(imm), (__mmask8)(U), \
5091 _MM_FROUND_CUR_DIRECTION))
5092
5093#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5094 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5095 (__v16sf)(__m512)(B), \
5096 (__v16si)(__m512i)(C), (int)(imm), \
5097 (__mmask16)-1, (int)(R)))
5098
5099#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5100 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5101 (__v16sf)(__m512)(B), \
5102 (__v16si)(__m512i)(C), (int)(imm), \
5103 (__mmask16)(U), (int)(R)))
5104
5105#define _mm512_fixupimm_ps(A, B, C, imm) \
5106 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5107 (__v16sf)(__m512)(B), \
5108 (__v16si)(__m512i)(C), (int)(imm), \
5109 (__mmask16)-1, \
5110 _MM_FROUND_CUR_DIRECTION))
5111
5112#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5113 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5114 (__v16sf)(__m512)(B), \
5115 (__v16si)(__m512i)(C), (int)(imm), \
5116 (__mmask16)(U), \
5117 _MM_FROUND_CUR_DIRECTION))
5118
5119#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5120 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5121 (__v16sf)(__m512)(B), \
5122 (__v16si)(__m512i)(C), \
5123 (int)(imm), (__mmask16)(U), \
5124 (int)(R)))
5125
5126#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5127 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5128 (__v16sf)(__m512)(B), \
5129 (__v16si)(__m512i)(C), \
5130 (int)(imm), (__mmask16)(U), \
5131 _MM_FROUND_CUR_DIRECTION))
5132
5133#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5134 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5135 (__v2df)(__m128d)(B), \
5136 (__v2di)(__m128i)(C), (int)(imm), \
5137 (__mmask8)-1, (int)(R)))
5138
5139#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5140 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5141 (__v2df)(__m128d)(B), \
5142 (__v2di)(__m128i)(C), (int)(imm), \
5143 (__mmask8)(U), (int)(R)))
5144
5145#define _mm_fixupimm_sd(A, B, C, imm) \
5146 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5147 (__v2df)(__m128d)(B), \
5148 (__v2di)(__m128i)(C), (int)(imm), \
5149 (__mmask8)-1, \
5150 _MM_FROUND_CUR_DIRECTION))
5151
5152#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5153 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5154 (__v2df)(__m128d)(B), \
5155 (__v2di)(__m128i)(C), (int)(imm), \
5156 (__mmask8)(U), \
5157 _MM_FROUND_CUR_DIRECTION))
5158
5159#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5160 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5161 (__v2df)(__m128d)(B), \
5162 (__v2di)(__m128i)(C), (int)(imm), \
5163 (__mmask8)(U), (int)(R)))
5164
5165#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5166 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5167 (__v2df)(__m128d)(B), \
5168 (__v2di)(__m128i)(C), (int)(imm), \
5169 (__mmask8)(U), \
5170 _MM_FROUND_CUR_DIRECTION))
5171
5172#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5173 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5174 (__v4sf)(__m128)(B), \
5175 (__v4si)(__m128i)(C), (int)(imm), \
5176 (__mmask8)-1, (int)(R)))
5177
5178#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5179 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5180 (__v4sf)(__m128)(B), \
5181 (__v4si)(__m128i)(C), (int)(imm), \
5182 (__mmask8)(U), (int)(R)))
5183
5184#define _mm_fixupimm_ss(A, B, C, imm) \
5185 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5186 (__v4sf)(__m128)(B), \
5187 (__v4si)(__m128i)(C), (int)(imm), \
5188 (__mmask8)-1, \
5189 _MM_FROUND_CUR_DIRECTION))
5190
5191#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5192 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5193 (__v4sf)(__m128)(B), \
5194 (__v4si)(__m128i)(C), (int)(imm), \
5195 (__mmask8)(U), \
5196 _MM_FROUND_CUR_DIRECTION))
5197
5198#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5199 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5200 (__v4sf)(__m128)(B), \
5201 (__v4si)(__m128i)(C), (int)(imm), \
5202 (__mmask8)(U), (int)(R)))
5203
5204#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5205 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5206 (__v4sf)(__m128)(B), \
5207 (__v4si)(__m128i)(C), (int)(imm), \
5208 (__mmask8)(U), \
5209 _MM_FROUND_CUR_DIRECTION))
5210
5211#define _mm_getexp_round_sd(A, B, R) \
5212 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5213 (__v2df)(__m128d)(B), \
5214 (__v2df)_mm_setzero_pd(), \
5215 (__mmask8)-1, (int)(R)))
5216
5217
5218static __inline__ __m128d __DEFAULT_FN_ATTRS128
5219_mm_getexp_sd (__m128d __A, __m128d __B)
5220{
5221 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5222 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5223}
5224
5225static __inline__ __m128d __DEFAULT_FN_ATTRS128
5226_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5227{
5228 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5229 (__v2df) __B,
5230 (__v2df) __W,
5231 (__mmask8) __U,
5233}
5234
5235#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5236 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5237 (__v2df)(__m128d)(B), \
5238 (__v2df)(__m128d)(W), \
5239 (__mmask8)(U), (int)(R)))
5240
5241static __inline__ __m128d __DEFAULT_FN_ATTRS128
5242_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5243{
5244 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5245 (__v2df) __B,
5246 (__v2df) _mm_setzero_pd (),
5247 (__mmask8) __U,
5249}
5250
5251#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5252 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5253 (__v2df)(__m128d)(B), \
5254 (__v2df)_mm_setzero_pd(), \
5255 (__mmask8)(U), (int)(R)))
5256
5257#define _mm_getexp_round_ss(A, B, R) \
5258 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5259 (__v4sf)(__m128)(B), \
5260 (__v4sf)_mm_setzero_ps(), \
5261 (__mmask8)-1, (int)(R)))
5262
5263static __inline__ __m128 __DEFAULT_FN_ATTRS128
5264_mm_getexp_ss (__m128 __A, __m128 __B)
5265{
5266 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5267 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5268}
5269
5270static __inline__ __m128 __DEFAULT_FN_ATTRS128
5271_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5272{
5273 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5274 (__v4sf) __B,
5275 (__v4sf) __W,
5276 (__mmask8) __U,
5278}
5279
5280#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5281 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5282 (__v4sf)(__m128)(B), \
5283 (__v4sf)(__m128)(W), \
5284 (__mmask8)(U), (int)(R)))
5285
5286static __inline__ __m128 __DEFAULT_FN_ATTRS128
5287_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5288{
5289 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5290 (__v4sf) __B,
5291 (__v4sf) _mm_setzero_ps (),
5292 (__mmask8) __U,
5294}
5295
5296#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5297 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5298 (__v4sf)(__m128)(B), \
5299 (__v4sf)_mm_setzero_ps(), \
5300 (__mmask8)(U), (int)(R)))
5301
5302#define _mm_getmant_round_sd(A, B, C, D, R) \
5303 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5304 (__v2df)(__m128d)(B), \
5305 (int)(((D)<<2) | (C)), \
5306 (__v2df)_mm_setzero_pd(), \
5307 (__mmask8)-1, (int)(R)))
5308
5309#define _mm_getmant_sd(A, B, C, D) \
5310 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5311 (__v2df)(__m128d)(B), \
5312 (int)(((D)<<2) | (C)), \
5313 (__v2df)_mm_setzero_pd(), \
5314 (__mmask8)-1, \
5315 _MM_FROUND_CUR_DIRECTION))
5316
5317#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5318 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5319 (__v2df)(__m128d)(B), \
5320 (int)(((D)<<2) | (C)), \
5321 (__v2df)(__m128d)(W), \
5322 (__mmask8)(U), \
5323 _MM_FROUND_CUR_DIRECTION))
5324
5325#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5326 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5327 (__v2df)(__m128d)(B), \
5328 (int)(((D)<<2) | (C)), \
5329 (__v2df)(__m128d)(W), \
5330 (__mmask8)(U), (int)(R)))
5331
5332#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5333 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5334 (__v2df)(__m128d)(B), \
5335 (int)(((D)<<2) | (C)), \
5336 (__v2df)_mm_setzero_pd(), \
5337 (__mmask8)(U), \
5338 _MM_FROUND_CUR_DIRECTION))
5339
5340#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5341 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5342 (__v2df)(__m128d)(B), \
5343 (int)(((D)<<2) | (C)), \
5344 (__v2df)_mm_setzero_pd(), \
5345 (__mmask8)(U), (int)(R)))
5346
5347#define _mm_getmant_round_ss(A, B, C, D, R) \
5348 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5349 (__v4sf)(__m128)(B), \
5350 (int)(((D)<<2) | (C)), \
5351 (__v4sf)_mm_setzero_ps(), \
5352 (__mmask8)-1, (int)(R)))
5353
5354#define _mm_getmant_ss(A, B, C, D) \
5355 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5356 (__v4sf)(__m128)(B), \
5357 (int)(((D)<<2) | (C)), \
5358 (__v4sf)_mm_setzero_ps(), \
5359 (__mmask8)-1, \
5360 _MM_FROUND_CUR_DIRECTION))
5361
5362#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5363 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5364 (__v4sf)(__m128)(B), \
5365 (int)(((D)<<2) | (C)), \
5366 (__v4sf)(__m128)(W), \
5367 (__mmask8)(U), \
5368 _MM_FROUND_CUR_DIRECTION))
5369
5370#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5371 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5372 (__v4sf)(__m128)(B), \
5373 (int)(((D)<<2) | (C)), \
5374 (__v4sf)(__m128)(W), \
5375 (__mmask8)(U), (int)(R)))
5376
5377#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5378 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5379 (__v4sf)(__m128)(B), \
5380 (int)(((D)<<2) | (C)), \
5381 (__v4sf)_mm_setzero_ps(), \
5382 (__mmask8)(U), \
5383 _MM_FROUND_CUR_DIRECTION))
5384
5385#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5386 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5387 (__v4sf)(__m128)(B), \
5388 (int)(((D)<<2) | (C)), \
5389 (__v4sf)_mm_setzero_ps(), \
5390 (__mmask8)(U), (int)(R)))
5391
5392static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5394{
5395 return __A;
5396}
5397
5398#define _mm_comi_round_sd(A, B, P, R) \
5399 ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5400 (int)(P), (int)(R)))
5401
5402#define _mm_comi_round_ss(A, B, P, R) \
5403 ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5404 (int)(P), (int)(R)))
5405
5406#ifdef __x86_64__
5407#define _mm_cvt_roundsd_si64(A, R) \
5408 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5409#endif
5410
5411static __inline__ __m512i __DEFAULT_FN_ATTRS512
5412_mm512_sll_epi32(__m512i __A, __m128i __B)
5413{
5414 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5415}
5416
5417static __inline__ __m512i __DEFAULT_FN_ATTRS512
5418_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5419{
5420 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5421 (__v16si)_mm512_sll_epi32(__A, __B),
5422 (__v16si)__W);
5423}
5424
5425static __inline__ __m512i __DEFAULT_FN_ATTRS512
5426_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5427{
5428 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5429 (__v16si)_mm512_sll_epi32(__A, __B),
5430 (__v16si)_mm512_setzero_si512());
5431}
5432
5433static __inline__ __m512i __DEFAULT_FN_ATTRS512
5434_mm512_sll_epi64(__m512i __A, __m128i __B)
5435{
5436 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5437}
5438
5439static __inline__ __m512i __DEFAULT_FN_ATTRS512
5440_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5441{
5442 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5443 (__v8di)_mm512_sll_epi64(__A, __B),
5444 (__v8di)__W);
5445}
5446
5447static __inline__ __m512i __DEFAULT_FN_ATTRS512
5448_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5449{
5450 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5451 (__v8di)_mm512_sll_epi64(__A, __B),
5452 (__v8di)_mm512_setzero_si512());
5453}
5454
5455static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5456_mm512_sllv_epi32(__m512i __X, __m512i __Y) {
5457 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5458}
5459
5460static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5461_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5462 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5463 (__v16si)_mm512_sllv_epi32(__X, __Y),
5464 (__v16si)__W);
5465}
5466
5467static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5468_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5469 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5470 (__v16si)_mm512_sllv_epi32(__X, __Y),
5471 (__v16si)_mm512_setzero_si512());
5472}
5473
5474static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5475_mm512_sllv_epi64(__m512i __X, __m512i __Y)
5476{
5477 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5478}
5479
5480static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5481_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5482{
5483 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5484 (__v8di)_mm512_sllv_epi64(__X, __Y),
5485 (__v8di)__W);
5486}
5487
5488static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5489_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5490{
5491 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5492 (__v8di)_mm512_sllv_epi64(__X, __Y),
5493 (__v8di)_mm512_setzero_si512());
5494}
5495
5496static __inline__ __m512i __DEFAULT_FN_ATTRS512
5497_mm512_sra_epi32(__m512i __A, __m128i __B)
5498{
5499 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5500}
5501
5502static __inline__ __m512i __DEFAULT_FN_ATTRS512
5503_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5504{
5505 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5506 (__v16si)_mm512_sra_epi32(__A, __B),
5507 (__v16si)__W);
5508}
5509
5510static __inline__ __m512i __DEFAULT_FN_ATTRS512
5511_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5512{
5513 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5514 (__v16si)_mm512_sra_epi32(__A, __B),
5515 (__v16si)_mm512_setzero_si512());
5516}
5517
5518static __inline__ __m512i __DEFAULT_FN_ATTRS512
5519_mm512_sra_epi64(__m512i __A, __m128i __B)
5520{
5521 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5522}
5523
5524static __inline__ __m512i __DEFAULT_FN_ATTRS512
5525_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5526{
5527 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5528 (__v8di)_mm512_sra_epi64(__A, __B),
5529 (__v8di)__W);
5530}
5531
5532static __inline__ __m512i __DEFAULT_FN_ATTRS512
5533_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5534{
5535 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5536 (__v8di)_mm512_sra_epi64(__A, __B),
5537 (__v8di)_mm512_setzero_si512());
5538}
5539
5540static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5541_mm512_srav_epi32(__m512i __X, __m512i __Y) {
5542 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5543}
5544
5545static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5546_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5547 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5548 (__v16si)_mm512_srav_epi32(__X, __Y),
5549 (__v16si)__W);
5550}
5551
5552static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5553_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5554 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5555 (__v16si)_mm512_srav_epi32(__X, __Y),
5556 (__v16si)_mm512_setzero_si512());
5557}
5558
5559static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5560_mm512_srav_epi64(__m512i __X, __m512i __Y)
5561{
5562 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5563}
5564
5565static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5566_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5567{
5568 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5569 (__v8di)_mm512_srav_epi64(__X, __Y),
5570 (__v8di)__W);
5571}
5572
5573static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5574_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5575{
5576 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5577 (__v8di)_mm512_srav_epi64(__X, __Y),
5578 (__v8di)_mm512_setzero_si512());
5579}
5580
5581static __inline__ __m512i __DEFAULT_FN_ATTRS512
5582_mm512_srl_epi32(__m512i __A, __m128i __B)
5583{
5584 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5585}
5586
5587static __inline__ __m512i __DEFAULT_FN_ATTRS512
5588_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5589{
5590 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5591 (__v16si)_mm512_srl_epi32(__A, __B),
5592 (__v16si)__W);
5593}
5594
5595static __inline__ __m512i __DEFAULT_FN_ATTRS512
5596_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5597{
5598 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5599 (__v16si)_mm512_srl_epi32(__A, __B),
5600 (__v16si)_mm512_setzero_si512());
5601}
5602
5603static __inline__ __m512i __DEFAULT_FN_ATTRS512
5604_mm512_srl_epi64(__m512i __A, __m128i __B)
5605{
5606 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5607}
5608
5609static __inline__ __m512i __DEFAULT_FN_ATTRS512
5610_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5611{
5612 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5613 (__v8di)_mm512_srl_epi64(__A, __B),
5614 (__v8di)__W);
5615}
5616
5617static __inline__ __m512i __DEFAULT_FN_ATTRS512
5618_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5619{
5620 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5621 (__v8di)_mm512_srl_epi64(__A, __B),
5622 (__v8di)_mm512_setzero_si512());
5623}
5624
5625static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5626_mm512_srlv_epi32(__m512i __X, __m512i __Y) {
5627 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5628}
5629
5630static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5631_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5632 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5633 (__v16si)_mm512_srlv_epi32(__X, __Y),
5634 (__v16si)__W);
5635}
5636
5637static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5638_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5639 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5640 (__v16si)_mm512_srlv_epi32(__X, __Y),
5641 (__v16si)_mm512_setzero_si512());
5642}
5643
5644static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5645_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5646{
5647 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5648}
5649
5650static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5651_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5652{
5653 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5654 (__v8di)_mm512_srlv_epi64(__X, __Y),
5655 (__v8di)__W);
5656}
5657
5658static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5659_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5660{
5661 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5662 (__v8di)_mm512_srlv_epi64(__X, __Y),
5663 (__v8di)_mm512_setzero_si512());
5664}
5665
5666/// \enum _MM_TERNLOG_ENUM
5667/// A helper to represent the ternary logic operations among vector \a A,
5668/// \a B and \a C. The representation is passed to \a imm.
5674
5675#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5676 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5677 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5678 (unsigned char)(imm), (__mmask16)-1))
5679
5680#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5681 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5682 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5683 (unsigned char)(imm), (__mmask16)(U)))
5684
5685#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5686 ((__m512i)__builtin_ia32_pternlogd512_maskz( \
5687 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5688 (unsigned char)(imm), (__mmask16)(U)))
5689
5690#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5691 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5692 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5693 (unsigned char)(imm), (__mmask8)-1))
5694
5695#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5696 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5697 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5698 (unsigned char)(imm), (__mmask8)(U)))
5699
5700#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5701 ((__m512i)__builtin_ia32_pternlogq512_maskz( \
5702 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5703 (unsigned char)(imm), (__mmask8)(U)))
5704
5705#ifdef __x86_64__
5706#define _mm_cvt_roundsd_i64(A, R) \
5707 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5708#endif
5709
5710#define _mm_cvt_roundsd_si32(A, R) \
5711 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5712
5713#define _mm_cvt_roundsd_i32(A, R) \
5714 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5715
5716#define _mm_cvt_roundsd_u32(A, R) \
5717 ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5718
5719static __inline__ unsigned __DEFAULT_FN_ATTRS128
5720_mm_cvtsd_u32 (__m128d __A)
5721{
5722 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5724}
5725
5726#ifdef __x86_64__
5727#define _mm_cvt_roundsd_u64(A, R) \
5728 ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5729 (int)(R)))
5730
5731static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5732_mm_cvtsd_u64 (__m128d __A)
5733{
5734 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5735 __A,
5737}
5738#endif
5739
5740#define _mm_cvt_roundss_si32(A, R) \
5741 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5742
5743#define _mm_cvt_roundss_i32(A, R) \
5744 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5745
5746#ifdef __x86_64__
5747#define _mm_cvt_roundss_si64(A, R) \
5748 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5749
5750#define _mm_cvt_roundss_i64(A, R) \
5751 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5752#endif
5753
5754#define _mm_cvt_roundss_u32(A, R) \
5755 ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
5756
5757static __inline__ unsigned __DEFAULT_FN_ATTRS128
5758_mm_cvtss_u32 (__m128 __A)
5759{
5760 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
5762}
5763
5764#ifdef __x86_64__
5765#define _mm_cvt_roundss_u64(A, R) \
5766 ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
5767 (int)(R)))
5768
5769static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5770_mm_cvtss_u64 (__m128 __A)
5771{
5772 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
5773 __A,
5775}
5776#endif
5777
5778#define _mm_cvtt_roundsd_i32(A, R) \
5779 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5780
5781#define _mm_cvtt_roundsd_si32(A, R) \
5782 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5783
5784static __inline__ int __DEFAULT_FN_ATTRS128
5785_mm_cvttsd_i32 (__m128d __A)
5786{
5787 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
5789}
5790
5791#ifdef __x86_64__
5792#define _mm_cvtt_roundsd_si64(A, R) \
5793 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5794
5795#define _mm_cvtt_roundsd_i64(A, R) \
5796 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5797
5798static __inline__ long long __DEFAULT_FN_ATTRS128
5799_mm_cvttsd_i64 (__m128d __A)
5800{
5801 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
5803}
5804#endif
5805
5806#define _mm_cvtt_roundsd_u32(A, R) \
5807 ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5808
5809static __inline__ unsigned __DEFAULT_FN_ATTRS128
5810_mm_cvttsd_u32 (__m128d __A)
5811{
5812 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
5814}
5815
5816#ifdef __x86_64__
5817#define _mm_cvtt_roundsd_u64(A, R) \
5818 ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
5819 (int)(R)))
5820
5821static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5822_mm_cvttsd_u64 (__m128d __A)
5823{
5824 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
5825 __A,
5827}
5828#endif
5829
5830#define _mm_cvtt_roundss_i32(A, R) \
5831 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5832
5833#define _mm_cvtt_roundss_si32(A, R) \
5834 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5835
5836static __inline__ int __DEFAULT_FN_ATTRS128
5837_mm_cvttss_i32 (__m128 __A)
5838{
5839 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
5841}
5842
5843#ifdef __x86_64__
5844#define _mm_cvtt_roundss_i64(A, R) \
5845 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5846
5847#define _mm_cvtt_roundss_si64(A, R) \
5848 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5849
5850static __inline__ long long __DEFAULT_FN_ATTRS128
5851_mm_cvttss_i64 (__m128 __A)
5852{
5853 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
5855}
5856#endif
5857
5858#define _mm_cvtt_roundss_u32(A, R) \
5859 ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
5860
5861static __inline__ unsigned __DEFAULT_FN_ATTRS128
5862_mm_cvttss_u32 (__m128 __A)
5863{
5864 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
5866}
5867
5868#ifdef __x86_64__
5869#define _mm_cvtt_roundss_u64(A, R) \
5870 ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
5871 (int)(R)))
5872
5873static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5874_mm_cvttss_u64 (__m128 __A)
5875{
5876 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
5877 __A,
5879}
5880#endif
5881
5882#define _mm512_permute_pd(X, C) \
5883 ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
5884
5885#define _mm512_mask_permute_pd(W, U, X, C) \
5886 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5887 (__v8df)_mm512_permute_pd((X), (C)), \
5888 (__v8df)(__m512d)(W)))
5889
5890#define _mm512_maskz_permute_pd(U, X, C) \
5891 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5892 (__v8df)_mm512_permute_pd((X), (C)), \
5893 (__v8df)_mm512_setzero_pd()))
5894
5895#define _mm512_permute_ps(X, C) \
5896 ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
5897
5898#define _mm512_mask_permute_ps(W, U, X, C) \
5899 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5900 (__v16sf)_mm512_permute_ps((X), (C)), \
5901 (__v16sf)(__m512)(W)))
5902
5903#define _mm512_maskz_permute_ps(U, X, C) \
5904 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5905 (__v16sf)_mm512_permute_ps((X), (C)), \
5906 (__v16sf)_mm512_setzero_ps()))
5907
5908static __inline__ __m512d __DEFAULT_FN_ATTRS512
5909_mm512_permutevar_pd(__m512d __A, __m512i __C)
5910{
5911 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
5912}
5913
5914static __inline__ __m512d __DEFAULT_FN_ATTRS512
5915_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5916{
5917 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5918 (__v8df)_mm512_permutevar_pd(__A, __C),
5919 (__v8df)__W);
5920}
5921
5922static __inline__ __m512d __DEFAULT_FN_ATTRS512
5923_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
5924{
5925 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5926 (__v8df)_mm512_permutevar_pd(__A, __C),
5927 (__v8df)_mm512_setzero_pd());
5928}
5929
5930static __inline__ __m512 __DEFAULT_FN_ATTRS512
5931_mm512_permutevar_ps(__m512 __A, __m512i __C)
5932{
5933 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
5934}
5935
5936static __inline__ __m512 __DEFAULT_FN_ATTRS512
5937_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5938{
5939 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
5940 (__v16sf)_mm512_permutevar_ps(__A, __C),
5941 (__v16sf)__W);
5942}
5943
5944static __inline__ __m512 __DEFAULT_FN_ATTRS512
5945_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
5946{
5947 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
5948 (__v16sf)_mm512_permutevar_ps(__A, __C),
5949 (__v16sf)_mm512_setzero_ps());
5950}
5951
5952static __inline __m512d __DEFAULT_FN_ATTRS512
5953_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
5954{
5955 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
5956 (__v8df)__B);
5957}
5958
5959static __inline__ __m512d __DEFAULT_FN_ATTRS512
5960_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
5961{
5962 return (__m512d)__builtin_ia32_selectpd_512(__U,
5963 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5964 (__v8df)__A);
5965}
5966
5967static __inline__ __m512d __DEFAULT_FN_ATTRS512
5968_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
5969 __m512d __B)
5970{
5971 return (__m512d)__builtin_ia32_selectpd_512(__U,
5972 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5973 (__v8df)(__m512d)__I);
5974}
5975
5976static __inline__ __m512d __DEFAULT_FN_ATTRS512
5977_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
5978 __m512d __B)
5979{
5980 return (__m512d)__builtin_ia32_selectpd_512(__U,
5981 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5982 (__v8df)_mm512_setzero_pd());
5983}
5984
5985static __inline __m512 __DEFAULT_FN_ATTRS512
5986_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
5987{
5988 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
5989 (__v16sf) __B);
5990}
5991
5992static __inline__ __m512 __DEFAULT_FN_ATTRS512
5993_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
5994{
5995 return (__m512)__builtin_ia32_selectps_512(__U,
5996 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5997 (__v16sf)__A);
5998}
5999
6000static __inline__ __m512 __DEFAULT_FN_ATTRS512
6001_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
6002{
6003 return (__m512)__builtin_ia32_selectps_512(__U,
6004 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6005 (__v16sf)(__m512)__I);
6006}
6007
6008static __inline__ __m512 __DEFAULT_FN_ATTRS512
6009_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
6010{
6011 return (__m512)__builtin_ia32_selectps_512(__U,
6012 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6013 (__v16sf)_mm512_setzero_ps());
6014}
6015
6016
6017#define _mm512_cvtt_roundpd_epu32(A, R) \
6018 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6019 (__v8si)_mm256_undefined_si256(), \
6020 (__mmask8)-1, (int)(R)))
6021
6022#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
6023 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6024 (__v8si)(__m256i)(W), \
6025 (__mmask8)(U), (int)(R)))
6026
6027#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
6028 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6029 (__v8si)_mm256_setzero_si256(), \
6030 (__mmask8)(U), (int)(R)))
6031
6032static __inline__ __m256i __DEFAULT_FN_ATTRS512
6034{
6035 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6036 (__v8si)
6038 (__mmask8) -1,
6040}
6041
6042static __inline__ __m256i __DEFAULT_FN_ATTRS512
6043_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6044{
6045 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6046 (__v8si) __W,
6047 (__mmask8) __U,
6049}
6050
6051static __inline__ __m256i __DEFAULT_FN_ATTRS512
6053{
6054 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6055 (__v8si)
6057 (__mmask8) __U,
6059}
6060
6061#define _mm_roundscale_round_sd(A, B, imm, R) \
6062 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6063 (__v2df)(__m128d)(B), \
6064 (__v2df)_mm_setzero_pd(), \
6065 (__mmask8)-1, (int)(imm), \
6066 (int)(R)))
6067
6068#define _mm_roundscale_sd(A, B, imm) \
6069 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6070 (__v2df)(__m128d)(B), \
6071 (__v2df)_mm_setzero_pd(), \
6072 (__mmask8)-1, (int)(imm), \
6073 _MM_FROUND_CUR_DIRECTION))
6074
6075#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6076 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6077 (__v2df)(__m128d)(B), \
6078 (__v2df)(__m128d)(W), \
6079 (__mmask8)(U), (int)(imm), \
6080 _MM_FROUND_CUR_DIRECTION))
6081
6082#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6083 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6084 (__v2df)(__m128d)(B), \
6085 (__v2df)(__m128d)(W), \
6086 (__mmask8)(U), (int)(I), \
6087 (int)(R)))
6088
6089#define _mm_maskz_roundscale_sd(U, A, B, I) \
6090 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6091 (__v2df)(__m128d)(B), \
6092 (__v2df)_mm_setzero_pd(), \
6093 (__mmask8)(U), (int)(I), \
6094 _MM_FROUND_CUR_DIRECTION))
6095
6096#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6097 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6098 (__v2df)(__m128d)(B), \
6099 (__v2df)_mm_setzero_pd(), \
6100 (__mmask8)(U), (int)(I), \
6101 (int)(R)))
6102
6103#define _mm_roundscale_round_ss(A, B, imm, R) \
6104 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6105 (__v4sf)(__m128)(B), \
6106 (__v4sf)_mm_setzero_ps(), \
6107 (__mmask8)-1, (int)(imm), \
6108 (int)(R)))
6109
6110#define _mm_roundscale_ss(A, B, imm) \
6111 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6112 (__v4sf)(__m128)(B), \
6113 (__v4sf)_mm_setzero_ps(), \
6114 (__mmask8)-1, (int)(imm), \
6115 _MM_FROUND_CUR_DIRECTION))
6116
6117#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6118 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6119 (__v4sf)(__m128)(B), \
6120 (__v4sf)(__m128)(W), \
6121 (__mmask8)(U), (int)(I), \
6122 _MM_FROUND_CUR_DIRECTION))
6123
6124#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6125 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6126 (__v4sf)(__m128)(B), \
6127 (__v4sf)(__m128)(W), \
6128 (__mmask8)(U), (int)(I), \
6129 (int)(R)))
6130
6131#define _mm_maskz_roundscale_ss(U, A, B, I) \
6132 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6133 (__v4sf)(__m128)(B), \
6134 (__v4sf)_mm_setzero_ps(), \
6135 (__mmask8)(U), (int)(I), \
6136 _MM_FROUND_CUR_DIRECTION))
6137
6138#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6139 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6140 (__v4sf)(__m128)(B), \
6141 (__v4sf)_mm_setzero_ps(), \
6142 (__mmask8)(U), (int)(I), \
6143 (int)(R)))
6144
6145#define _mm512_scalef_round_pd(A, B, R) \
6146 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6147 (__v8df)(__m512d)(B), \
6148 (__v8df)_mm512_undefined_pd(), \
6149 (__mmask8)-1, (int)(R)))
6150
6151#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6152 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6153 (__v8df)(__m512d)(B), \
6154 (__v8df)(__m512d)(W), \
6155 (__mmask8)(U), (int)(R)))
6156
6157#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6158 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6159 (__v8df)(__m512d)(B), \
6160 (__v8df)_mm512_setzero_pd(), \
6161 (__mmask8)(U), (int)(R)))
6162
6163static __inline__ __m512d __DEFAULT_FN_ATTRS512
6164_mm512_scalef_pd (__m512d __A, __m512d __B)
6165{
6166 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6167 (__v8df) __B,
6168 (__v8df)
6170 (__mmask8) -1,
6172}
6173
6174static __inline__ __m512d __DEFAULT_FN_ATTRS512
6175_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6176{
6177 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6178 (__v8df) __B,
6179 (__v8df) __W,
6180 (__mmask8) __U,
6182}
6183
6184static __inline__ __m512d __DEFAULT_FN_ATTRS512
6185_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6186{
6187 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6188 (__v8df) __B,
6189 (__v8df)
6191 (__mmask8) __U,
6193}
6194
6195#define _mm512_scalef_round_ps(A, B, R) \
6196 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6197 (__v16sf)(__m512)(B), \
6198 (__v16sf)_mm512_undefined_ps(), \
6199 (__mmask16)-1, (int)(R)))
6200
6201#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6202 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6203 (__v16sf)(__m512)(B), \
6204 (__v16sf)(__m512)(W), \
6205 (__mmask16)(U), (int)(R)))
6206
6207#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6208 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6209 (__v16sf)(__m512)(B), \
6210 (__v16sf)_mm512_setzero_ps(), \
6211 (__mmask16)(U), (int)(R)))
6212
6213static __inline__ __m512 __DEFAULT_FN_ATTRS512
6214_mm512_scalef_ps (__m512 __A, __m512 __B)
6215{
6216 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6217 (__v16sf) __B,
6218 (__v16sf)
6220 (__mmask16) -1,
6222}
6223
6224static __inline__ __m512 __DEFAULT_FN_ATTRS512
6225_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6226{
6227 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6228 (__v16sf) __B,
6229 (__v16sf) __W,
6230 (__mmask16) __U,
6232}
6233
6234static __inline__ __m512 __DEFAULT_FN_ATTRS512
6235_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6236{
6237 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6238 (__v16sf) __B,
6239 (__v16sf)
6241 (__mmask16) __U,
6243}
6244
6245#define _mm_scalef_round_sd(A, B, R) \
6246 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6247 (__v2df)(__m128d)(B), \
6248 (__v2df)_mm_setzero_pd(), \
6249 (__mmask8)-1, (int)(R)))
6250
6251static __inline__ __m128d __DEFAULT_FN_ATTRS128
6252_mm_scalef_sd (__m128d __A, __m128d __B)
6253{
6254 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6255 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6256 (__mmask8) -1,
6258}
6259
6260static __inline__ __m128d __DEFAULT_FN_ATTRS128
6261_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6262{
6263 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6264 (__v2df) __B,
6265 (__v2df) __W,
6266 (__mmask8) __U,
6268}
6269
6270#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6271 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6272 (__v2df)(__m128d)(B), \
6273 (__v2df)(__m128d)(W), \
6274 (__mmask8)(U), (int)(R)))
6275
6276static __inline__ __m128d __DEFAULT_FN_ATTRS128
6277_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6278{
6279 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6280 (__v2df) __B,
6281 (__v2df) _mm_setzero_pd (),
6282 (__mmask8) __U,
6284}
6285
6286#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6287 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6288 (__v2df)(__m128d)(B), \
6289 (__v2df)_mm_setzero_pd(), \
6290 (__mmask8)(U), (int)(R)))
6291
6292#define _mm_scalef_round_ss(A, B, R) \
6293 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6294 (__v4sf)(__m128)(B), \
6295 (__v4sf)_mm_setzero_ps(), \
6296 (__mmask8)-1, (int)(R)))
6297
6298static __inline__ __m128 __DEFAULT_FN_ATTRS128
6299_mm_scalef_ss (__m128 __A, __m128 __B)
6300{
6301 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6302 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6303 (__mmask8) -1,
6305}
6306
6307static __inline__ __m128 __DEFAULT_FN_ATTRS128
6308_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6309{
6310 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6311 (__v4sf) __B,
6312 (__v4sf) __W,
6313 (__mmask8) __U,
6315}
6316
6317#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6318 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6319 (__v4sf)(__m128)(B), \
6320 (__v4sf)(__m128)(W), \
6321 (__mmask8)(U), (int)(R)))
6322
6323static __inline__ __m128 __DEFAULT_FN_ATTRS128
6324_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6325{
6326 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6327 (__v4sf) __B,
6328 (__v4sf) _mm_setzero_ps (),
6329 (__mmask8) __U,
6331}
6332
6333#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6334 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6335 (__v4sf)(__m128)(B), \
6336 (__v4sf)_mm_setzero_ps(), \
6337 (__mmask8)(U), \
6338 (int)(R)))
6339
6340static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6341_mm512_srai_epi32(__m512i __A, unsigned int __B) {
6342 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B);
6343}
6344
6345static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6346_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
6347 unsigned int __B) {
6348 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6349 (__v16si)_mm512_srai_epi32(__A, __B),
6350 (__v16si)__W);
6351}
6352
6353static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6354_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
6355 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6356 (__v16si)_mm512_srai_epi32(__A, __B),
6357 (__v16si)_mm512_setzero_si512());
6358}
6359
6360static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6361_mm512_srai_epi64(__m512i __A, unsigned int __B) {
6362 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B);
6363}
6364
6365static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6366_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A,
6367 unsigned int __B) {
6368 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6369 (__v8di)_mm512_srai_epi64(__A, __B),
6370 (__v8di)__W);
6371}
6372
6373static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6374_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
6375 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6376 (__v8di)_mm512_srai_epi64(__A, __B),
6377 (__v8di)_mm512_setzero_si512());
6378}
6379
6380#define _mm512_shuffle_f32x4(A, B, imm) \
6381 ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6382 (__v16sf)(__m512)(B), (int)(imm)))
6383
6384#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6385 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6386 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6387 (__v16sf)(__m512)(W)))
6388
6389#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6390 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6391 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6392 (__v16sf)_mm512_setzero_ps()))
6393
6394#define _mm512_shuffle_f64x2(A, B, imm) \
6395 ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6396 (__v8df)(__m512d)(B), (int)(imm)))
6397
6398#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6399 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6400 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6401 (__v8df)(__m512d)(W)))
6402
6403#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6404 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6405 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6406 (__v8df)_mm512_setzero_pd()))
6407
6408#define _mm512_shuffle_i32x4(A, B, imm) \
6409 ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6410 (__v16si)(__m512i)(B), (int)(imm)))
6411
6412#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6413 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6414 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6415 (__v16si)(__m512i)(W)))
6416
6417#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6418 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6419 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6420 (__v16si)_mm512_setzero_si512()))
6421
6422#define _mm512_shuffle_i64x2(A, B, imm) \
6423 ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6424 (__v8di)(__m512i)(B), (int)(imm)))
6425
6426#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6427 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6428 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6429 (__v8di)(__m512i)(W)))
6430
6431#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6432 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6433 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6434 (__v8di)_mm512_setzero_si512()))
6435
6436#define _mm512_shuffle_pd(A, B, M) \
6437 ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6438 (__v8df)(__m512d)(B), (int)(M)))
6439
6440#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6441 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6442 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6443 (__v8df)(__m512d)(W)))
6444
6445#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6446 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6447 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6448 (__v8df)_mm512_setzero_pd()))
6449
6450#define _mm512_shuffle_ps(A, B, M) \
6451 ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6452 (__v16sf)(__m512)(B), (int)(M)))
6453
6454#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6455 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6456 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6457 (__v16sf)(__m512)(W)))
6458
6459#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6460 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6461 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6462 (__v16sf)_mm512_setzero_ps()))
6463
6464#define _mm_sqrt_round_sd(A, B, R) \
6465 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6466 (__v2df)(__m128d)(B), \
6467 (__v2df)_mm_setzero_pd(), \
6468 (__mmask8)-1, (int)(R)))
6469
6470static __inline__ __m128d __DEFAULT_FN_ATTRS128
6471_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6472{
6473 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6474 (__v2df) __B,
6475 (__v2df) __W,
6476 (__mmask8) __U,
6478}
6479
6480#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6481 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6482 (__v2df)(__m128d)(B), \
6483 (__v2df)(__m128d)(W), \
6484 (__mmask8)(U), (int)(R)))
6485
6486static __inline__ __m128d __DEFAULT_FN_ATTRS128
6487_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6488{
6489 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6490 (__v2df) __B,
6491 (__v2df) _mm_setzero_pd (),
6492 (__mmask8) __U,
6494}
6495
6496#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6497 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6498 (__v2df)(__m128d)(B), \
6499 (__v2df)_mm_setzero_pd(), \
6500 (__mmask8)(U), (int)(R)))
6501
6502#define _mm_sqrt_round_ss(A, B, R) \
6503 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6504 (__v4sf)(__m128)(B), \
6505 (__v4sf)_mm_setzero_ps(), \
6506 (__mmask8)-1, (int)(R)))
6507
6508static __inline__ __m128 __DEFAULT_FN_ATTRS128
6509_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6510{
6511 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6512 (__v4sf) __B,
6513 (__v4sf) __W,
6514 (__mmask8) __U,
6516}
6517
6518#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6519 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6520 (__v4sf)(__m128)(B), \
6521 (__v4sf)(__m128)(W), (__mmask8)(U), \
6522 (int)(R)))
6523
6524static __inline__ __m128 __DEFAULT_FN_ATTRS128
6525_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6526{
6527 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6528 (__v4sf) __B,
6529 (__v4sf) _mm_setzero_ps (),
6530 (__mmask8) __U,
6532}
6533
6534#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6535 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6536 (__v4sf)(__m128)(B), \
6537 (__v4sf)_mm_setzero_ps(), \
6538 (__mmask8)(U), (int)(R)))
6539
6540static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6542 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6543 0, 1, 2, 3, 0, 1, 2, 3,
6544 0, 1, 2, 3, 0, 1, 2, 3);
6545}
6546
6547static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6548_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) {
6549 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6550 (__v16sf)_mm512_broadcast_f32x4(__A),
6551 (__v16sf)__O);
6552}
6553
6554static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6556 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6557 (__v16sf)_mm512_broadcast_f32x4(__A),
6558 (__v16sf)_mm512_setzero_ps());
6559}
6560
6561static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6563 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6564 0, 1, 2, 3, 0, 1, 2, 3);
6565}
6566
6567static __inline__ __m512d __DEFAULT_FN_ATTRS512
6568_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
6569{
6570 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6571 (__v8df)_mm512_broadcast_f64x4(__A),
6572 (__v8df)__O);
6573}
6574
6575static __inline__ __m512d __DEFAULT_FN_ATTRS512
6577{
6578 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6579 (__v8df)_mm512_broadcast_f64x4(__A),
6580 (__v8df)_mm512_setzero_pd());
6581}
6582
6583static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6585 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6586 0, 1, 2, 3, 0, 1, 2, 3,
6587 0, 1, 2, 3, 0, 1, 2, 3);
6588}
6589
6590static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6591_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) {
6592 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6593 (__v16si)_mm512_broadcast_i32x4(__A),
6594 (__v16si)__O);
6595}
6596
6597static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6599 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6600 (__v16si)_mm512_broadcast_i32x4(__A),
6601 (__v16si)_mm512_setzero_si512());
6602}
6603
6604static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6606 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6607 0, 1, 2, 3, 0, 1, 2, 3);
6608}
6609
6610static __inline__ __m512i __DEFAULT_FN_ATTRS512
6611_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
6612{
6613 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6614 (__v8di)_mm512_broadcast_i64x4(__A),
6615 (__v8di)__O);
6616}
6617
6618static __inline__ __m512i __DEFAULT_FN_ATTRS512
6620{
6621 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6622 (__v8di)_mm512_broadcast_i64x4(__A),
6623 (__v8di)_mm512_setzero_si512());
6624}
6625
6626static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6627_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) {
6628 return (__m512d)__builtin_ia32_selectpd_512(__M,
6629 (__v8df) _mm512_broadcastsd_pd(__A),
6630 (__v8df) __O);
6631}
6632
6633static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6635 return (__m512d)__builtin_ia32_selectpd_512(__M,
6636 (__v8df) _mm512_broadcastsd_pd(__A),
6637 (__v8df) _mm512_setzero_pd());
6638}
6639
6640static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6641_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) {
6642 return (__m512)__builtin_ia32_selectps_512(__M,
6643 (__v16sf) _mm512_broadcastss_ps(__A),
6644 (__v16sf) __O);
6645}
6646
6647static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6649 return (__m512)__builtin_ia32_selectps_512(__M,
6650 (__v16sf) _mm512_broadcastss_ps(__A),
6651 (__v16sf) _mm512_setzero_ps());
6652}
6653
6654static __inline__ __m128i __DEFAULT_FN_ATTRS512
6656{
6657 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6658 (__v16qi) _mm_undefined_si128 (),
6659 (__mmask16) -1);
6660}
6661
6662static __inline__ __m128i __DEFAULT_FN_ATTRS512
6663_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6664{
6665 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6666 (__v16qi) __O, __M);
6667}
6668
6669static __inline__ __m128i __DEFAULT_FN_ATTRS512
6671{
6672 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6673 (__v16qi) _mm_setzero_si128 (),
6674 __M);
6675}
6676
6677static __inline__ void __DEFAULT_FN_ATTRS512
6679{
6680 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6681}
6682
6683static __inline__ __m256i __DEFAULT_FN_ATTRS512
6685{
6686 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6687 (__v16hi) _mm256_undefined_si256 (),
6688 (__mmask16) -1);
6689}
6690
6691static __inline__ __m256i __DEFAULT_FN_ATTRS512
6692_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6693{
6694 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6695 (__v16hi) __O, __M);
6696}
6697
6698static __inline__ __m256i __DEFAULT_FN_ATTRS512
6700{
6701 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6702 (__v16hi) _mm256_setzero_si256 (),
6703 __M);
6704}
6705
6706static __inline__ void __DEFAULT_FN_ATTRS512
6708{
6709 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6710}
6711
6712static __inline__ __m128i __DEFAULT_FN_ATTRS512
6714{
6715 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6716 (__v16qi) _mm_undefined_si128 (),
6717 (__mmask8) -1);
6718}
6719
6720static __inline__ __m128i __DEFAULT_FN_ATTRS512
6721_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6722{
6723 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6724 (__v16qi) __O, __M);
6725}
6726
6727static __inline__ __m128i __DEFAULT_FN_ATTRS512
6729{
6730 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6731 (__v16qi) _mm_setzero_si128 (),
6732 __M);
6733}
6734
6735static __inline__ void __DEFAULT_FN_ATTRS512
6737{
6738 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6739}
6740
6741static __inline__ __m256i __DEFAULT_FN_ATTRS512
6743{
6744 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6745 (__v8si) _mm256_undefined_si256 (),
6746 (__mmask8) -1);
6747}
6748
6749static __inline__ __m256i __DEFAULT_FN_ATTRS512
6750_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6751{
6752 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6753 (__v8si) __O, __M);
6754}
6755
6756static __inline__ __m256i __DEFAULT_FN_ATTRS512
6758{
6759 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6760 (__v8si) _mm256_setzero_si256 (),
6761 __M);
6762}
6763
6764static __inline__ void __DEFAULT_FN_ATTRS512
6766{
6767 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
6768}
6769
6770static __inline__ __m128i __DEFAULT_FN_ATTRS512
6772{
6773 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6774 (__v8hi) _mm_undefined_si128 (),
6775 (__mmask8) -1);
6776}
6777
6778static __inline__ __m128i __DEFAULT_FN_ATTRS512
6779_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6780{
6781 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6782 (__v8hi) __O, __M);
6783}
6784
6785static __inline__ __m128i __DEFAULT_FN_ATTRS512
6787{
6788 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6789 (__v8hi) _mm_setzero_si128 (),
6790 __M);
6791}
6792
6793static __inline__ void __DEFAULT_FN_ATTRS512
6795{
6796 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
6797}
6798
6799static __inline__ __m128i __DEFAULT_FN_ATTRS512
6801{
6802 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6803 (__v16qi) _mm_undefined_si128 (),
6804 (__mmask16) -1);
6805}
6806
6807static __inline__ __m128i __DEFAULT_FN_ATTRS512
6808_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6809{
6810 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6811 (__v16qi) __O,
6812 __M);
6813}
6814
6815static __inline__ __m128i __DEFAULT_FN_ATTRS512
6817{
6818 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6819 (__v16qi) _mm_setzero_si128 (),
6820 __M);
6821}
6822
6823static __inline__ void __DEFAULT_FN_ATTRS512
6825{
6826 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6827}
6828
6829static __inline__ __m256i __DEFAULT_FN_ATTRS512
6831{
6832 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6833 (__v16hi) _mm256_undefined_si256 (),
6834 (__mmask16) -1);
6835}
6836
6837static __inline__ __m256i __DEFAULT_FN_ATTRS512
6838_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6839{
6840 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6841 (__v16hi) __O,
6842 __M);
6843}
6844
6845static __inline__ __m256i __DEFAULT_FN_ATTRS512
6847{
6848 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6849 (__v16hi) _mm256_setzero_si256 (),
6850 __M);
6851}
6852
6853static __inline__ void __DEFAULT_FN_ATTRS512
6855{
6856 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6857}
6858
6859static __inline__ __m128i __DEFAULT_FN_ATTRS512
6861{
6862 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6863 (__v16qi) _mm_undefined_si128 (),
6864 (__mmask8) -1);
6865}
6866
6867static __inline__ __m128i __DEFAULT_FN_ATTRS512
6868_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6869{
6870 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6871 (__v16qi) __O,
6872 __M);
6873}
6874
6875static __inline__ __m128i __DEFAULT_FN_ATTRS512
6877{
6878 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6879 (__v16qi) _mm_setzero_si128 (),
6880 __M);
6881}
6882
6883static __inline__ void __DEFAULT_FN_ATTRS512
6885{
6886 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6887}
6888
6889static __inline__ __m256i __DEFAULT_FN_ATTRS512
6891{
6892 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6893 (__v8si) _mm256_undefined_si256 (),
6894 (__mmask8) -1);
6895}
6896
6897static __inline__ __m256i __DEFAULT_FN_ATTRS512
6898_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6899{
6900 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6901 (__v8si) __O, __M);
6902}
6903
6904static __inline__ __m256i __DEFAULT_FN_ATTRS512
6906{
6907 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6908 (__v8si) _mm256_setzero_si256 (),
6909 __M);
6910}
6911
6912static __inline__ void __DEFAULT_FN_ATTRS512
6914{
6915 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
6916}
6917
6918static __inline__ __m128i __DEFAULT_FN_ATTRS512
6920{
6921 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6922 (__v8hi) _mm_undefined_si128 (),
6923 (__mmask8) -1);
6924}
6925
6926static __inline__ __m128i __DEFAULT_FN_ATTRS512
6927_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6928{
6929 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6930 (__v8hi) __O, __M);
6931}
6932
6933static __inline__ __m128i __DEFAULT_FN_ATTRS512
6935{
6936 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6937 (__v8hi) _mm_setzero_si128 (),
6938 __M);
6939}
6940
6941static __inline__ void __DEFAULT_FN_ATTRS512
6943{
6944 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
6945}
6946
6947static __inline__ __m128i __DEFAULT_FN_ATTRS512
6949{
6950 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6951 (__v16qi) _mm_undefined_si128 (),
6952 (__mmask16) -1);
6953}
6954
6955static __inline__ __m128i __DEFAULT_FN_ATTRS512
6956_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6957{
6958 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6959 (__v16qi) __O, __M);
6960}
6961
6962static __inline__ __m128i __DEFAULT_FN_ATTRS512
6964{
6965 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6966 (__v16qi) _mm_setzero_si128 (),
6967 __M);
6968}
6969
6970static __inline__ void __DEFAULT_FN_ATTRS512
6972{
6973 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6974}
6975
6976static __inline__ __m256i __DEFAULT_FN_ATTRS512
6978{
6979 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6980 (__v16hi) _mm256_undefined_si256 (),
6981 (__mmask16) -1);
6982}
6983
6984static __inline__ __m256i __DEFAULT_FN_ATTRS512
6985_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6986{
6987 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6988 (__v16hi) __O, __M);
6989}
6990
6991static __inline__ __m256i __DEFAULT_FN_ATTRS512
6993{
6994 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6995 (__v16hi) _mm256_setzero_si256 (),
6996 __M);
6997}
6998
6999static __inline__ void __DEFAULT_FN_ATTRS512
7001{
7002 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7003}
7004
7005static __inline__ __m128i __DEFAULT_FN_ATTRS512
7007{
7008 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7009 (__v16qi) _mm_undefined_si128 (),
7010 (__mmask8) -1);
7011}
7012
7013static __inline__ __m128i __DEFAULT_FN_ATTRS512
7014_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7015{
7016 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7017 (__v16qi) __O, __M);
7018}
7019
7020static __inline__ __m128i __DEFAULT_FN_ATTRS512
7022{
7023 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7024 (__v16qi) _mm_setzero_si128 (),
7025 __M);
7026}
7027
7028static __inline__ void __DEFAULT_FN_ATTRS512
7030{
7031 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7032}
7033
7034static __inline__ __m256i __DEFAULT_FN_ATTRS512
7036{
7037 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7038 (__v8si) _mm256_undefined_si256 (),
7039 (__mmask8) -1);
7040}
7041
7042static __inline__ __m256i __DEFAULT_FN_ATTRS512
7043_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7044{
7045 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7046 (__v8si) __O, __M);
7047}
7048
7049static __inline__ __m256i __DEFAULT_FN_ATTRS512
7051{
7052 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7053 (__v8si) _mm256_setzero_si256 (),
7054 __M);
7055}
7056
7057static __inline__ void __DEFAULT_FN_ATTRS512
7059{
7060 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7061}
7062
7063static __inline__ __m128i __DEFAULT_FN_ATTRS512
7065{
7066 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7067 (__v8hi) _mm_undefined_si128 (),
7068 (__mmask8) -1);
7069}
7070
7071static __inline__ __m128i __DEFAULT_FN_ATTRS512
7072_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7073{
7074 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7075 (__v8hi) __O, __M);
7076}
7077
7078static __inline__ __m128i __DEFAULT_FN_ATTRS512
7080{
7081 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7082 (__v8hi) _mm_setzero_si128 (),
7083 __M);
7084}
7085
7086static __inline__ void __DEFAULT_FN_ATTRS512
7088{
7089 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7090}
7091
7092#define _mm512_extracti32x4_epi32(A, imm) \
7093 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7094 (__v4si)_mm_undefined_si128(), \
7095 (__mmask8)-1))
7096
7097#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7098 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7099 (__v4si)(__m128i)(W), \
7100 (__mmask8)(U)))
7101
7102#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7103 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7104 (__v4si)_mm_setzero_si128(), \
7105 (__mmask8)(U)))
7106
7107#define _mm512_extracti64x4_epi64(A, imm) \
7108 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7109 (__v4di)_mm256_undefined_si256(), \
7110 (__mmask8)-1))
7111
7112#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7113 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7114 (__v4di)(__m256i)(W), \
7115 (__mmask8)(U)))
7116
7117#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7118 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7119 (__v4di)_mm256_setzero_si256(), \
7120 (__mmask8)(U)))
7121
7122#define _mm512_insertf64x4(A, B, imm) \
7123 ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7124 (__v4df)(__m256d)(B), (int)(imm)))
7125
7126#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7127 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7128 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7129 (__v8df)(__m512d)(W)))
7130
7131#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7132 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7133 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7134 (__v8df)_mm512_setzero_pd()))
7135
7136#define _mm512_inserti64x4(A, B, imm) \
7137 ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7138 (__v4di)(__m256i)(B), (int)(imm)))
7139
7140#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7141 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7142 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7143 (__v8di)(__m512i)(W)))
7144
7145#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7146 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7147 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7148 (__v8di)_mm512_setzero_si512()))
7149
7150#define _mm512_insertf32x4(A, B, imm) \
7151 ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7152 (__v4sf)(__m128)(B), (int)(imm)))
7153
7154#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7155 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7156 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7157 (__v16sf)(__m512)(W)))
7158
7159#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7160 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7161 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7162 (__v16sf)_mm512_setzero_ps()))
7163
7164#define _mm512_inserti32x4(A, B, imm) \
7165 ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7166 (__v4si)(__m128i)(B), (int)(imm)))
7167
7168#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7169 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7170 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7171 (__v16si)(__m512i)(W)))
7172
7173#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7174 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7175 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7176 (__v16si)_mm512_setzero_si512()))
7177
7178#define _mm512_getmant_round_pd(A, B, C, R) \
7179 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7180 (int)(((C)<<2) | (B)), \
7181 (__v8df)_mm512_undefined_pd(), \
7182 (__mmask8)-1, (int)(R)))
7183
7184#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7185 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7186 (int)(((C)<<2) | (B)), \
7187 (__v8df)(__m512d)(W), \
7188 (__mmask8)(U), (int)(R)))
7189
7190#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7191 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7192 (int)(((C)<<2) | (B)), \
7193 (__v8df)_mm512_setzero_pd(), \
7194 (__mmask8)(U), (int)(R)))
7195
7196#define _mm512_getmant_pd(A, B, C) \
7197 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7198 (int)(((C)<<2) | (B)), \
7199 (__v8df)_mm512_setzero_pd(), \
7200 (__mmask8)-1, \
7201 _MM_FROUND_CUR_DIRECTION))
7202
7203#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7204 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7205 (int)(((C)<<2) | (B)), \
7206 (__v8df)(__m512d)(W), \
7207 (__mmask8)(U), \
7208 _MM_FROUND_CUR_DIRECTION))
7209
7210#define _mm512_maskz_getmant_pd(U, A, B, C) \
7211 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7212 (int)(((C)<<2) | (B)), \
7213 (__v8df)_mm512_setzero_pd(), \
7214 (__mmask8)(U), \
7215 _MM_FROUND_CUR_DIRECTION))
7216
7217#define _mm512_getmant_round_ps(A, B, C, R) \
7218 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7219 (int)(((C)<<2) | (B)), \
7220 (__v16sf)_mm512_undefined_ps(), \
7221 (__mmask16)-1, (int)(R)))
7222
7223#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7224 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7225 (int)(((C)<<2) | (B)), \
7226 (__v16sf)(__m512)(W), \
7227 (__mmask16)(U), (int)(R)))
7228
7229#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7230 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7231 (int)(((C)<<2) | (B)), \
7232 (__v16sf)_mm512_setzero_ps(), \
7233 (__mmask16)(U), (int)(R)))
7234
7235#define _mm512_getmant_ps(A, B, C) \
7236 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7237 (int)(((C)<<2)|(B)), \
7238 (__v16sf)_mm512_undefined_ps(), \
7239 (__mmask16)-1, \
7240 _MM_FROUND_CUR_DIRECTION))
7241
7242#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7243 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7244 (int)(((C)<<2)|(B)), \
7245 (__v16sf)(__m512)(W), \
7246 (__mmask16)(U), \
7247 _MM_FROUND_CUR_DIRECTION))
7248
7249#define _mm512_maskz_getmant_ps(U, A, B, C) \
7250 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7251 (int)(((C)<<2)|(B)), \
7252 (__v16sf)_mm512_setzero_ps(), \
7253 (__mmask16)(U), \
7254 _MM_FROUND_CUR_DIRECTION))
7255
7256#define _mm512_getexp_round_pd(A, R) \
7257 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7258 (__v8df)_mm512_undefined_pd(), \
7259 (__mmask8)-1, (int)(R)))
7260
7261#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7262 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7263 (__v8df)(__m512d)(W), \
7264 (__mmask8)(U), (int)(R)))
7265
7266#define _mm512_maskz_getexp_round_pd(U, A, R) \
7267 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7268 (__v8df)_mm512_setzero_pd(), \
7269 (__mmask8)(U), (int)(R)))
7270
7271static __inline__ __m512d __DEFAULT_FN_ATTRS512
7272_mm512_getexp_pd (__m512d __A)
7273{
7274 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7275 (__v8df) _mm512_undefined_pd (),
7276 (__mmask8) -1,
7278}
7279
7280static __inline__ __m512d __DEFAULT_FN_ATTRS512
7281_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7282{
7283 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7284 (__v8df) __W,
7285 (__mmask8) __U,
7287}
7288
7289static __inline__ __m512d __DEFAULT_FN_ATTRS512
7291{
7292 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7293 (__v8df) _mm512_setzero_pd (),
7294 (__mmask8) __U,
7296}
7297
7298#define _mm512_getexp_round_ps(A, R) \
7299 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7300 (__v16sf)_mm512_undefined_ps(), \
7301 (__mmask16)-1, (int)(R)))
7302
7303#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7304 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7305 (__v16sf)(__m512)(W), \
7306 (__mmask16)(U), (int)(R)))
7307
7308#define _mm512_maskz_getexp_round_ps(U, A, R) \
7309 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7310 (__v16sf)_mm512_setzero_ps(), \
7311 (__mmask16)(U), (int)(R)))
7312
7313static __inline__ __m512 __DEFAULT_FN_ATTRS512
7315{
7316 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7317 (__v16sf) _mm512_undefined_ps (),
7318 (__mmask16) -1,
7320}
7321
7322static __inline__ __m512 __DEFAULT_FN_ATTRS512
7323_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7324{
7325 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7326 (__v16sf) __W,
7327 (__mmask16) __U,
7329}
7330
7331static __inline__ __m512 __DEFAULT_FN_ATTRS512
7333{
7334 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7335 (__v16sf) _mm512_setzero_ps (),
7336 (__mmask16) __U,
7338}
7339
7340#define _mm512_i64gather_ps(index, addr, scale) \
7341 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7342 (void const *)(addr), \
7343 (__v8di)(__m512i)(index), (__mmask8)-1, \
7344 (int)(scale)))
7345
7346#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7347 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7348 (void const *)(addr), \
7349 (__v8di)(__m512i)(index), \
7350 (__mmask8)(mask), (int)(scale)))
7351
7352#define _mm512_i64gather_epi32(index, addr, scale) \
7353 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7354 (void const *)(addr), \
7355 (__v8di)(__m512i)(index), \
7356 (__mmask8)-1, (int)(scale)))
7357
7358#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7359 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7360 (void const *)(addr), \
7361 (__v8di)(__m512i)(index), \
7362 (__mmask8)(mask), (int)(scale)))
7363
7364#define _mm512_i64gather_pd(index, addr, scale) \
7365 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7366 (void const *)(addr), \
7367 (__v8di)(__m512i)(index), (__mmask8)-1, \
7368 (int)(scale)))
7369
7370#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7371 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7372 (void const *)(addr), \
7373 (__v8di)(__m512i)(index), \
7374 (__mmask8)(mask), (int)(scale)))
7375
7376#define _mm512_i64gather_epi64(index, addr, scale) \
7377 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7378 (void const *)(addr), \
7379 (__v8di)(__m512i)(index), (__mmask8)-1, \
7380 (int)(scale)))
7381
7382#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7383 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7384 (void const *)(addr), \
7385 (__v8di)(__m512i)(index), \
7386 (__mmask8)(mask), (int)(scale)))
7387
7388#define _mm512_i32gather_ps(index, addr, scale) \
7389 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7390 (void const *)(addr), \
7391 (__v16si)(__m512)(index), \
7392 (__mmask16)-1, (int)(scale)))
7393
7394#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7395 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7396 (void const *)(addr), \
7397 (__v16si)(__m512)(index), \
7398 (__mmask16)(mask), (int)(scale)))
7399
7400#define _mm512_i32gather_epi32(index, addr, scale) \
7401 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7402 (void const *)(addr), \
7403 (__v16si)(__m512i)(index), \
7404 (__mmask16)-1, (int)(scale)))
7405
7406#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7407 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7408 (void const *)(addr), \
7409 (__v16si)(__m512i)(index), \
7410 (__mmask16)(mask), (int)(scale)))
7411
7412#define _mm512_i32gather_pd(index, addr, scale) \
7413 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7414 (void const *)(addr), \
7415 (__v8si)(__m256i)(index), (__mmask8)-1, \
7416 (int)(scale)))
7417
7418#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7419 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7420 (void const *)(addr), \
7421 (__v8si)(__m256i)(index), \
7422 (__mmask8)(mask), (int)(scale)))
7423
7424#define _mm512_i32gather_epi64(index, addr, scale) \
7425 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7426 (void const *)(addr), \
7427 (__v8si)(__m256i)(index), (__mmask8)-1, \
7428 (int)(scale)))
7429
7430#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7431 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7432 (void const *)(addr), \
7433 (__v8si)(__m256i)(index), \
7434 (__mmask8)(mask), (int)(scale)))
7435
7436#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7437 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7438 (__v8di)(__m512i)(index), \
7439 (__v8sf)(__m256)(v1), (int)(scale))
7440
7441#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7442 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7443 (__v8di)(__m512i)(index), \
7444 (__v8sf)(__m256)(v1), (int)(scale))
7445
7446#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7447 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7448 (__v8di)(__m512i)(index), \
7449 (__v8si)(__m256i)(v1), (int)(scale))
7450
7451#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7452 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7453 (__v8di)(__m512i)(index), \
7454 (__v8si)(__m256i)(v1), (int)(scale))
7455
7456#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7457 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7458 (__v8di)(__m512i)(index), \
7459 (__v8df)(__m512d)(v1), (int)(scale))
7460
7461#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7462 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7463 (__v8di)(__m512i)(index), \
7464 (__v8df)(__m512d)(v1), (int)(scale))
7465
7466#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7467 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7468 (__v8di)(__m512i)(index), \
7469 (__v8di)(__m512i)(v1), (int)(scale))
7470
7471#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7472 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7473 (__v8di)(__m512i)(index), \
7474 (__v8di)(__m512i)(v1), (int)(scale))
7475
7476#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7477 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7478 (__v16si)(__m512i)(index), \
7479 (__v16sf)(__m512)(v1), (int)(scale))
7480
7481#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7482 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7483 (__v16si)(__m512i)(index), \
7484 (__v16sf)(__m512)(v1), (int)(scale))
7485
7486#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7487 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7488 (__v16si)(__m512i)(index), \
7489 (__v16si)(__m512i)(v1), (int)(scale))
7490
7491#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7492 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7493 (__v16si)(__m512i)(index), \
7494 (__v16si)(__m512i)(v1), (int)(scale))
7495
7496#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7497 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7498 (__v8si)(__m256i)(index), \
7499 (__v8df)(__m512d)(v1), (int)(scale))
7500
7501#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7502 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7503 (__v8si)(__m256i)(index), \
7504 (__v8df)(__m512d)(v1), (int)(scale))
7505
7506#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7507 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7508 (__v8si)(__m256i)(index), \
7509 (__v8di)(__m512i)(v1), (int)(scale))
7510
7511#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7512 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7513 (__v8si)(__m256i)(index), \
7514 (__v8di)(__m512i)(v1), (int)(scale))
7515
7516static __inline__ __m128 __DEFAULT_FN_ATTRS128
7517_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7518{
7519 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7520 (__v4sf)__A,
7521 (__v4sf)__B,
7522 (__mmask8)__U,
7524}
7525
7526#define _mm_fmadd_round_ss(A, B, C, R) \
7527 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7528 (__v4sf)(__m128)(B), \
7529 (__v4sf)(__m128)(C), (__mmask8)-1, \
7530 (int)(R)))
7531
7532#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7533 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7534 (__v4sf)(__m128)(A), \
7535 (__v4sf)(__m128)(B), (__mmask8)(U), \
7536 (int)(R)))
7537
7538static __inline__ __m128 __DEFAULT_FN_ATTRS128
7539_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7540{
7541 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7542 (__v4sf)__B,
7543 (__v4sf)__C,
7544 (__mmask8)__U,
7546}
7547
7548#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7549 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7550 (__v4sf)(__m128)(B), \
7551 (__v4sf)(__m128)(C), (__mmask8)(U), \
7552 (int)(R)))
7553
7554static __inline__ __m128 __DEFAULT_FN_ATTRS128
7555_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7556{
7557 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7558 (__v4sf)__X,
7559 (__v4sf)__Y,
7560 (__mmask8)__U,
7562}
7563
7564#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7565 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7566 (__v4sf)(__m128)(X), \
7567 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7568 (int)(R)))
7569
7570static __inline__ __m128 __DEFAULT_FN_ATTRS128
7571_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7572{
7573 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7574 (__v4sf)__A,
7575 -(__v4sf)__B,
7576 (__mmask8)__U,
7578}
7579
7580#define _mm_fmsub_round_ss(A, B, C, R) \
7581 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7582 (__v4sf)(__m128)(B), \
7583 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7584 (int)(R)))
7585
7586#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7587 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7588 (__v4sf)(__m128)(A), \
7589 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7590 (int)(R)))
7591
7592static __inline__ __m128 __DEFAULT_FN_ATTRS128
7593_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7594{
7595 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7596 (__v4sf)__B,
7597 -(__v4sf)__C,
7598 (__mmask8)__U,
7600}
7601
7602#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7603 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7604 (__v4sf)(__m128)(B), \
7605 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7606 (int)(R)))
7607
7608static __inline__ __m128 __DEFAULT_FN_ATTRS128
7609_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7610{
7611 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7612 (__v4sf)__X,
7613 (__v4sf)__Y,
7614 (__mmask8)__U,
7616}
7617
7618#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7619 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7620 (__v4sf)(__m128)(X), \
7621 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7622 (int)(R)))
7623
7624static __inline__ __m128 __DEFAULT_FN_ATTRS128
7625_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7626{
7627 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7628 -(__v4sf)__A,
7629 (__v4sf)__B,
7630 (__mmask8)__U,
7632}
7633
7634#define _mm_fnmadd_round_ss(A, B, C, R) \
7635 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7636 -(__v4sf)(__m128)(B), \
7637 (__v4sf)(__m128)(C), (__mmask8)-1, \
7638 (int)(R)))
7639
7640#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7641 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7642 -(__v4sf)(__m128)(A), \
7643 (__v4sf)(__m128)(B), (__mmask8)(U), \
7644 (int)(R)))
7645
7646static __inline__ __m128 __DEFAULT_FN_ATTRS128
7647_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7648{
7649 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7650 -(__v4sf)__B,
7651 (__v4sf)__C,
7652 (__mmask8)__U,
7654}
7655
7656#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7657 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7658 -(__v4sf)(__m128)(B), \
7659 (__v4sf)(__m128)(C), (__mmask8)(U), \
7660 (int)(R)))
7661
7662static __inline__ __m128 __DEFAULT_FN_ATTRS128
7663_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7664{
7665 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7666 -(__v4sf)__X,
7667 (__v4sf)__Y,
7668 (__mmask8)__U,
7670}
7671
7672#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7673 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7674 -(__v4sf)(__m128)(X), \
7675 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7676 (int)(R)))
7677
7678static __inline__ __m128 __DEFAULT_FN_ATTRS128
7679_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7680{
7681 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7682 -(__v4sf)__A,
7683 -(__v4sf)__B,
7684 (__mmask8)__U,
7686}
7687
7688#define _mm_fnmsub_round_ss(A, B, C, R) \
7689 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7690 -(__v4sf)(__m128)(B), \
7691 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7692 (int)(R)))
7693
7694#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7695 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7696 -(__v4sf)(__m128)(A), \
7697 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7698 (int)(R)))
7699
7700static __inline__ __m128 __DEFAULT_FN_ATTRS128
7701_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7702{
7703 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7704 -(__v4sf)__B,
7705 -(__v4sf)__C,
7706 (__mmask8)__U,
7708}
7709
7710#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
7711 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7712 -(__v4sf)(__m128)(B), \
7713 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7714 (int)(R)))
7715
7716static __inline__ __m128 __DEFAULT_FN_ATTRS128
7717_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7718{
7719 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7720 -(__v4sf)__X,
7721 (__v4sf)__Y,
7722 (__mmask8)__U,
7724}
7725
7726#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
7727 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7728 -(__v4sf)(__m128)(X), \
7729 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7730 (int)(R)))
7731
7732static __inline__ __m128d __DEFAULT_FN_ATTRS128
7733_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7734{
7735 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7736 (__v2df)__A,
7737 (__v2df)__B,
7738 (__mmask8)__U,
7740}
7741
7742#define _mm_fmadd_round_sd(A, B, C, R) \
7743 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7744 (__v2df)(__m128d)(B), \
7745 (__v2df)(__m128d)(C), (__mmask8)-1, \
7746 (int)(R)))
7747
7748#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
7749 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7750 (__v2df)(__m128d)(A), \
7751 (__v2df)(__m128d)(B), (__mmask8)(U), \
7752 (int)(R)))
7753
7754static __inline__ __m128d __DEFAULT_FN_ATTRS128
7755_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7756{
7757 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7758 (__v2df)__B,
7759 (__v2df)__C,
7760 (__mmask8)__U,
7762}
7763
7764#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
7765 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7766 (__v2df)(__m128d)(B), \
7767 (__v2df)(__m128d)(C), (__mmask8)(U), \
7768 (int)(R)))
7769
7770static __inline__ __m128d __DEFAULT_FN_ATTRS128
7771_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7772{
7773 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7774 (__v2df)__X,
7775 (__v2df)__Y,
7776 (__mmask8)__U,
7778}
7779
7780#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
7781 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7782 (__v2df)(__m128d)(X), \
7783 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7784 (int)(R)))
7785
7786static __inline__ __m128d __DEFAULT_FN_ATTRS128
7787_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7788{
7789 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7790 (__v2df)__A,
7791 -(__v2df)__B,
7792 (__mmask8)__U,
7794}
7795
7796#define _mm_fmsub_round_sd(A, B, C, R) \
7797 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7798 (__v2df)(__m128d)(B), \
7799 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7800 (int)(R)))
7801
7802#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
7803 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7804 (__v2df)(__m128d)(A), \
7805 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7806 (int)(R)))
7807
7808static __inline__ __m128d __DEFAULT_FN_ATTRS128
7809_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7810{
7811 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7812 (__v2df)__B,
7813 -(__v2df)__C,
7814 (__mmask8)__U,
7816}
7817
7818#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
7819 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7820 (__v2df)(__m128d)(B), \
7821 -(__v2df)(__m128d)(C), \
7822 (__mmask8)(U), (int)(R)))
7823
7824static __inline__ __m128d __DEFAULT_FN_ATTRS128
7825_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7826{
7827 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7828 (__v2df)__X,
7829 (__v2df)__Y,
7830 (__mmask8)__U,
7832}
7833
7834#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
7835 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7836 (__v2df)(__m128d)(X), \
7837 (__v2df)(__m128d)(Y), \
7838 (__mmask8)(U), (int)(R)))
7839
7840static __inline__ __m128d __DEFAULT_FN_ATTRS128
7841_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7842{
7843 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7844 -(__v2df)__A,
7845 (__v2df)__B,
7846 (__mmask8)__U,
7848}
7849
7850#define _mm_fnmadd_round_sd(A, B, C, R) \
7851 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7852 -(__v2df)(__m128d)(B), \
7853 (__v2df)(__m128d)(C), (__mmask8)-1, \
7854 (int)(R)))
7855
7856#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
7857 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7858 -(__v2df)(__m128d)(A), \
7859 (__v2df)(__m128d)(B), (__mmask8)(U), \
7860 (int)(R)))
7861
7862static __inline__ __m128d __DEFAULT_FN_ATTRS128
7863_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7864{
7865 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7866 -(__v2df)__B,
7867 (__v2df)__C,
7868 (__mmask8)__U,
7870}
7871
7872#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
7873 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7874 -(__v2df)(__m128d)(B), \
7875 (__v2df)(__m128d)(C), (__mmask8)(U), \
7876 (int)(R)))
7877
7878static __inline__ __m128d __DEFAULT_FN_ATTRS128
7879_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7880{
7881 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7882 -(__v2df)__X,
7883 (__v2df)__Y,
7884 (__mmask8)__U,
7886}
7887
7888#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
7889 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7890 -(__v2df)(__m128d)(X), \
7891 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7892 (int)(R)))
7893
7894static __inline__ __m128d __DEFAULT_FN_ATTRS128
7895_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7896{
7897 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7898 -(__v2df)__A,
7899 -(__v2df)__B,
7900 (__mmask8)__U,
7902}
7903
7904#define _mm_fnmsub_round_sd(A, B, C, R) \
7905 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7906 -(__v2df)(__m128d)(B), \
7907 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7908 (int)(R)))
7909
7910#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
7911 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7912 -(__v2df)(__m128d)(A), \
7913 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7914 (int)(R)))
7915
7916static __inline__ __m128d __DEFAULT_FN_ATTRS128
7917_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7918{
7919 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7920 -(__v2df)__B,
7921 -(__v2df)__C,
7922 (__mmask8)__U,
7924}
7925
7926#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
7927 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7928 -(__v2df)(__m128d)(B), \
7929 -(__v2df)(__m128d)(C), \
7930 (__mmask8)(U), \
7931 (int)(R)))
7932
7933static __inline__ __m128d __DEFAULT_FN_ATTRS128
7934_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7935{
7936 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7937 -(__v2df)__X,
7938 (__v2df)__Y,
7939 (__mmask8)__U,
7941}
7942
7943#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
7944 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7945 -(__v2df)(__m128d)(X), \
7946 (__v2df)(__m128d)(Y), \
7947 (__mmask8)(U), (int)(R)))
7948
7949#define _mm512_permutex_pd(X, C) \
7950 ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
7951
7952#define _mm512_mask_permutex_pd(W, U, X, C) \
7953 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7954 (__v8df)_mm512_permutex_pd((X), (C)), \
7955 (__v8df)(__m512d)(W)))
7956
7957#define _mm512_maskz_permutex_pd(U, X, C) \
7958 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7959 (__v8df)_mm512_permutex_pd((X), (C)), \
7960 (__v8df)_mm512_setzero_pd()))
7961
7962#define _mm512_permutex_epi64(X, C) \
7963 ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
7964
7965#define _mm512_mask_permutex_epi64(W, U, X, C) \
7966 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7967 (__v8di)_mm512_permutex_epi64((X), (C)), \
7968 (__v8di)(__m512i)(W)))
7969
7970#define _mm512_maskz_permutex_epi64(U, X, C) \
7971 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7972 (__v8di)_mm512_permutex_epi64((X), (C)), \
7973 (__v8di)_mm512_setzero_si512()))
7974
7975static __inline__ __m512d __DEFAULT_FN_ATTRS512
7976_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
7977{
7978 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
7979}
7980
7981static __inline__ __m512d __DEFAULT_FN_ATTRS512
7982_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
7983{
7984 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
7985 (__v8df)_mm512_permutexvar_pd(__X, __Y),
7986 (__v8df)__W);
7987}
7988
7989static __inline__ __m512d __DEFAULT_FN_ATTRS512
7990_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
7991{
7992 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
7993 (__v8df)_mm512_permutexvar_pd(__X, __Y),
7994 (__v8df)_mm512_setzero_pd());
7995}
7996
7997static __inline__ __m512i __DEFAULT_FN_ATTRS512
7998_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
7999{
8000 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8001}
8002
8003static __inline__ __m512i __DEFAULT_FN_ATTRS512
8005{
8006 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8007 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8008 (__v8di)_mm512_setzero_si512());
8009}
8010
8011static __inline__ __m512i __DEFAULT_FN_ATTRS512
8012_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8013 __m512i __Y)
8014{
8015 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8016 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8017 (__v8di)__W);
8018}
8019
8020static __inline__ __m512 __DEFAULT_FN_ATTRS512
8021_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8022{
8023 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8024}
8025
8026static __inline__ __m512 __DEFAULT_FN_ATTRS512
8027_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8028{
8029 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8030 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8031 (__v16sf)__W);
8032}
8033
8034static __inline__ __m512 __DEFAULT_FN_ATTRS512
8036{
8037 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8038 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8039 (__v16sf)_mm512_setzero_ps());
8040}
8041
8042static __inline__ __m512i __DEFAULT_FN_ATTRS512
8043_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8044{
8045 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
8046}
8047
8048#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8049
8050static __inline__ __m512i __DEFAULT_FN_ATTRS512
8052{
8053 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8054 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8055 (__v16si)_mm512_setzero_si512());
8056}
8057
8058static __inline__ __m512i __DEFAULT_FN_ATTRS512
8059_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8060 __m512i __Y)
8061{
8062 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8063 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8064 (__v16si)__W);
8065}
8066
8067#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8068
8069static __inline__ __mmask16
8071 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8072}
8073
8076 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8077}
8078
8081 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8082}
8083
8084static __inline__ int __DEFAULT_FN_ATTRS
8086{
8087 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8088}
8089
8090static __inline__ int __DEFAULT_FN_ATTRS
8092{
8093 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8094}
8095
8096static __inline__ unsigned char __DEFAULT_FN_ATTRS
8098{
8099 return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8100}
8101
8102static __inline__ unsigned char __DEFAULT_FN_ATTRS
8104{
8105 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8106}
8107
8108static __inline__ unsigned char __DEFAULT_FN_ATTRS
8109_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8110 *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8111 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8112}
8113
8114static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8116{
8117 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8118}
8119
8122 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8123}
8124
8127 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8128}
8129
8130#define _kand_mask16 _mm512_kand
8131#define _kandn_mask16 _mm512_kandn
8132#define _knot_mask16 _mm512_knot
8133#define _kor_mask16 _mm512_kor
8134#define _kxnor_mask16 _mm512_kxnor
8135#define _kxor_mask16 _mm512_kxor
8136
8137#define _kshiftli_mask16(A, I) \
8138 ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
8139
8140#define _kshiftri_mask16(A, I) \
8141 ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
8142
8143static __inline__ unsigned int __DEFAULT_FN_ATTRS
8145 return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8146}
8147
8148static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8149_cvtu32_mask16(unsigned int __A) {
8150 return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8151}
8152
8153static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8155 return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8156}
8157
8158static __inline__ void __DEFAULT_FN_ATTRS
8160 *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8161}
8162
8163static __inline__ void __DEFAULT_FN_ATTRS512
8164_mm512_stream_si512 (void * __P, __m512i __A)
8165{
8166 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8167 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8168}
8169
8170static __inline__ __m512i __DEFAULT_FN_ATTRS512
8172{
8173 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8174 return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8175}
8176
8177static __inline__ void __DEFAULT_FN_ATTRS512
8178_mm512_stream_pd (void *__P, __m512d __A)
8179{
8180 typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8181 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8182}
8183
8184static __inline__ void __DEFAULT_FN_ATTRS512
8185_mm512_stream_ps (void *__P, __m512 __A)
8186{
8187 typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8188 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8189}
8190
8191static __inline__ __m512d __DEFAULT_FN_ATTRS512
8192_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8193{
8194 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8195 (__v8df) __W,
8196 (__mmask8) __U);
8197}
8198
8199static __inline__ __m512d __DEFAULT_FN_ATTRS512
8201{
8202 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8203 (__v8df)
8205 (__mmask8) __U);
8206}
8207
8208static __inline__ __m512i __DEFAULT_FN_ATTRS512
8209_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8210{
8211 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8212 (__v8di) __W,
8213 (__mmask8) __U);
8214}
8215
8216static __inline__ __m512i __DEFAULT_FN_ATTRS512
8218{
8219 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8220 (__v8di)
8222 (__mmask8) __U);
8223}
8224
8225static __inline__ __m512 __DEFAULT_FN_ATTRS512
8226_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8227{
8228 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8229 (__v16sf) __W,
8230 (__mmask16) __U);
8231}
8232
8233static __inline__ __m512 __DEFAULT_FN_ATTRS512
8235{
8236 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8237 (__v16sf)
8239 (__mmask16) __U);
8240}
8241
8242static __inline__ __m512i __DEFAULT_FN_ATTRS512
8243_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8244{
8245 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8246 (__v16si) __W,
8247 (__mmask16) __U);
8248}
8249
8250static __inline__ __m512i __DEFAULT_FN_ATTRS512
8252{
8253 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8254 (__v16si)
8256 (__mmask16) __U);
8257}
8258
8259#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8260 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8261 (__v4sf)(__m128)(Y), (int)(P), \
8262 (__mmask8)-1, (int)(R)))
8263
8264#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8265 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8266 (__v4sf)(__m128)(Y), (int)(P), \
8267 (__mmask8)(M), (int)(R)))
8268
8269#define _mm_cmp_ss_mask(X, Y, P) \
8270 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8271 (__v4sf)(__m128)(Y), (int)(P), \
8272 (__mmask8)-1, \
8273 _MM_FROUND_CUR_DIRECTION))
8274
8275#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8276 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8277 (__v4sf)(__m128)(Y), (int)(P), \
8278 (__mmask8)(M), \
8279 _MM_FROUND_CUR_DIRECTION))
8280
8281#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8282 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8283 (__v2df)(__m128d)(Y), (int)(P), \
8284 (__mmask8)-1, (int)(R)))
8285
8286#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8287 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8288 (__v2df)(__m128d)(Y), (int)(P), \
8289 (__mmask8)(M), (int)(R)))
8290
8291#define _mm_cmp_sd_mask(X, Y, P) \
8292 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8293 (__v2df)(__m128d)(Y), (int)(P), \
8294 (__mmask8)-1, \
8295 _MM_FROUND_CUR_DIRECTION))
8296
8297#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8298 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8299 (__v2df)(__m128d)(Y), (int)(P), \
8300 (__mmask8)(M), \
8301 _MM_FROUND_CUR_DIRECTION))
8302
8303/* Bit Test */
8304
8305static __inline __mmask16 __DEFAULT_FN_ATTRS512
8306_mm512_test_epi32_mask (__m512i __A, __m512i __B)
8307{
8310}
8311
8312static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8313_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8314{
8315 return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8317}
8318
8319static __inline __mmask8 __DEFAULT_FN_ATTRS512
8320_mm512_test_epi64_mask (__m512i __A, __m512i __B)
8321{
8322 return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8324}
8325
8326static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8327_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8328{
8329 return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8331}
8332
8333static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8334_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8335{
8336 return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8338}
8339
8340static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8341_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8342{
8343 return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8345}
8346
8347static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8348_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8349{
8350 return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8352}
8353
8354static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8355_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8356{
8357 return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8359}
8360
8361static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8363{
8364 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8365 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8366}
8367
8368static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8369_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8370 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8371 (__v16sf)_mm512_movehdup_ps(__A),
8372 (__v16sf)__W);
8373}
8374
8375static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8377 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8378 (__v16sf)_mm512_movehdup_ps(__A),
8379 (__v16sf)_mm512_setzero_ps());
8380}
8381
8382static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8384{
8385 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8386 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8387}
8388
8389static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8390_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8391 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8392 (__v16sf)_mm512_moveldup_ps(__A),
8393 (__v16sf)__W);
8394}
8395
8396static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8398 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8399 (__v16sf)_mm512_moveldup_ps(__A),
8400 (__v16sf)_mm512_setzero_ps());
8401}
8402
8403static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8404_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
8405 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8406}
8407
8408static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8409_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
8410 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8411 _mm_setzero_ps());
8412}
8413
8414static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8415_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
8416 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8417}
8418
8419static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8420_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
8421 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8422 _mm_setzero_pd());
8423}
8424
8425static __inline__ void __DEFAULT_FN_ATTRS128
8426_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8427{
8428 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8429}
8430
8431static __inline__ void __DEFAULT_FN_ATTRS128
8432_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8433{
8434 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8435}
8436
8437static __inline__ __m128 __DEFAULT_FN_ATTRS128
8438_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8439{
8440 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8441 (__v4sf)_mm_setzero_ps(),
8442 0, 4, 4, 4);
8443
8444 return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
8445}
8446
8447static __inline__ __m128 __DEFAULT_FN_ATTRS128
8448_mm_maskz_load_ss (__mmask8 __U, const float* __A)
8449{
8450 return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
8451 (__v4sf) _mm_setzero_ps(),
8452 __U & 1);
8453}
8454
8455static __inline__ __m128d __DEFAULT_FN_ATTRS128
8456_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8457{
8458 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8459 (__v2df)_mm_setzero_pd(),
8460 0, 2);
8461
8462 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
8463}
8464
8465static __inline__ __m128d __DEFAULT_FN_ATTRS128
8466_mm_maskz_load_sd (__mmask8 __U, const double* __A)
8467{
8468 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
8469 (__v2df) _mm_setzero_pd(),
8470 __U & 1);
8471}
8472
8473#define _mm512_shuffle_epi32(A, I) \
8474 ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
8475
8476#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8477 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8478 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8479 (__v16si)(__m512i)(W)))
8480
8481#define _mm512_maskz_shuffle_epi32(U, A, I) \
8482 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8483 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8484 (__v16si)_mm512_setzero_si512()))
8485
8486static __inline__ __m512d __DEFAULT_FN_ATTRS512
8487_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8488{
8489 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8490 (__v8df) __W,
8491 (__mmask8) __U);
8492}
8493
8494static __inline__ __m512d __DEFAULT_FN_ATTRS512
8496{
8497 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8498 (__v8df) _mm512_setzero_pd (),
8499 (__mmask8) __U);
8500}
8501
8502static __inline__ __m512i __DEFAULT_FN_ATTRS512
8503_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8504{
8505 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8506 (__v8di) __W,
8507 (__mmask8) __U);
8508}
8509
8510static __inline__ __m512i __DEFAULT_FN_ATTRS512
8512{
8513 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8514 (__v8di) _mm512_setzero_si512 (),
8515 (__mmask8) __U);
8516}
8517
8518static __inline__ __m512d __DEFAULT_FN_ATTRS512
8519_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8520{
8521 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8522 (__v8df) __W,
8523 (__mmask8) __U);
8524}
8525
8526static __inline__ __m512d __DEFAULT_FN_ATTRS512
8528{
8529 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8530 (__v8df) _mm512_setzero_pd(),
8531 (__mmask8) __U);
8532}
8533
8534static __inline__ __m512i __DEFAULT_FN_ATTRS512
8535_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8536{
8537 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8538 (__v8di) __W,
8539 (__mmask8) __U);
8540}
8541
8542static __inline__ __m512i __DEFAULT_FN_ATTRS512
8544{
8545 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8546 (__v8di) _mm512_setzero_si512(),
8547 (__mmask8) __U);
8548}
8549
8550static __inline__ __m512 __DEFAULT_FN_ATTRS512
8551_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8552{
8553 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8554 (__v16sf) __W,
8555 (__mmask16) __U);
8556}
8557
8558static __inline__ __m512 __DEFAULT_FN_ATTRS512
8560{
8561 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8562 (__v16sf) _mm512_setzero_ps(),
8563 (__mmask16) __U);
8564}
8565
8566static __inline__ __m512i __DEFAULT_FN_ATTRS512
8567_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8568{
8569 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8570 (__v16si) __W,
8571 (__mmask16) __U);
8572}
8573
8574static __inline__ __m512i __DEFAULT_FN_ATTRS512
8576{
8577 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8578 (__v16si) _mm512_setzero_si512(),
8579 (__mmask16) __U);
8580}
8581
8582static __inline__ __m512 __DEFAULT_FN_ATTRS512
8583_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8584{
8585 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8586 (__v16sf) __W,
8587 (__mmask16) __U);
8588}
8589
8590static __inline__ __m512 __DEFAULT_FN_ATTRS512
8592{
8593 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8594 (__v16sf) _mm512_setzero_ps(),
8595 (__mmask16) __U);
8596}
8597
8598static __inline__ __m512i __DEFAULT_FN_ATTRS512
8599_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8600{
8601 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8602 (__v16si) __W,
8603 (__mmask16) __U);
8604}
8605
8606static __inline__ __m512i __DEFAULT_FN_ATTRS512
8608{
8609 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8610 (__v16si) _mm512_setzero_si512(),
8611 (__mmask16) __U);
8612}
8613
8614#define _mm512_cvt_roundps_pd(A, R) \
8615 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8616 (__v8df)_mm512_undefined_pd(), \
8617 (__mmask8)-1, (int)(R)))
8618
8619#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8620 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8621 (__v8df)(__m512d)(W), \
8622 (__mmask8)(U), (int)(R)))
8623
8624#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8625 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8626 (__v8df)_mm512_setzero_pd(), \
8627 (__mmask8)(U), (int)(R)))
8628
8629static __inline__ __m512d
8631 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8632}
8633
8634static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8635_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) {
8636 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8637 (__v8df)_mm512_cvtps_pd(__A),
8638 (__v8df)__W);
8639}
8640
8641static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8643 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8644 (__v8df)_mm512_cvtps_pd(__A),
8645 (__v8df)_mm512_setzero_pd());
8646}
8647
8648static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8650 return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8651}
8652
8653static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8654_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) {
8655 return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8656}
8657
8658static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8659_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) {
8660 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8661 (__v8df)__W);
8662}
8663
8664static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8666 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8667 (__v8df)_mm512_setzero_pd());
8668}
8669
8670static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8671_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8672 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8673 (__v16sf)__W);
8674}
8675
8676static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8678 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8679 (__v16sf)_mm512_setzero_ps());
8680}
8681
8682static __inline__ void __DEFAULT_FN_ATTRS512
8684{
8685 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8686 (__mmask8) __U);
8687}
8688
8689static __inline__ void __DEFAULT_FN_ATTRS512
8691{
8692 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8693 (__mmask8) __U);
8694}
8695
8696static __inline__ void __DEFAULT_FN_ATTRS512
8698{
8699 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
8700 (__mmask16) __U);
8701}
8702
8703static __inline__ void __DEFAULT_FN_ATTRS512
8705{
8706 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
8707 (__mmask16) __U);
8708}
8709
8710#define _mm_cvt_roundsd_ss(A, B, R) \
8711 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8712 (__v2df)(__m128d)(B), \
8713 (__v4sf)_mm_undefined_ps(), \
8714 (__mmask8)-1, (int)(R)))
8715
8716#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
8717 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8718 (__v2df)(__m128d)(B), \
8719 (__v4sf)(__m128)(W), \
8720 (__mmask8)(U), (int)(R)))
8721
8722#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
8723 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8724 (__v2df)(__m128d)(B), \
8725 (__v4sf)_mm_setzero_ps(), \
8726 (__mmask8)(U), (int)(R)))
8727
8728static __inline__ __m128 __DEFAULT_FN_ATTRS128
8729_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
8730{
8731 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8732 (__v2df)__B,
8733 (__v4sf)__W,
8735}
8736
8737static __inline__ __m128 __DEFAULT_FN_ATTRS128
8738_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
8739{
8740 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8741 (__v2df)__B,
8742 (__v4sf)_mm_setzero_ps(),
8744}
8745
8746#define _mm_cvtss_i32 _mm_cvtss_si32
8747#define _mm_cvtsd_i32 _mm_cvtsd_si32
8748#define _mm_cvti32_sd _mm_cvtsi32_sd
8749#define _mm_cvti32_ss _mm_cvtsi32_ss
8750#ifdef __x86_64__
8751#define _mm_cvtss_i64 _mm_cvtss_si64
8752#define _mm_cvtsd_i64 _mm_cvtsd_si64
8753#define _mm_cvti64_sd _mm_cvtsi64_sd
8754#define _mm_cvti64_ss _mm_cvtsi64_ss
8755#endif
8756
8757#ifdef __x86_64__
8758#define _mm_cvt_roundi64_sd(A, B, R) \
8759 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8760 (int)(R)))
8761
8762#define _mm_cvt_roundsi64_sd(A, B, R) \
8763 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8764 (int)(R)))
8765#endif
8766
8767#define _mm_cvt_roundsi32_ss(A, B, R) \
8768 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8769
8770#define _mm_cvt_roundi32_ss(A, B, R) \
8771 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8772
8773#ifdef __x86_64__
8774#define _mm_cvt_roundsi64_ss(A, B, R) \
8775 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8776 (int)(R)))
8777
8778#define _mm_cvt_roundi64_ss(A, B, R) \
8779 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8780 (int)(R)))
8781#endif
8782
8783#define _mm_cvt_roundss_sd(A, B, R) \
8784 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8785 (__v4sf)(__m128)(B), \
8786 (__v2df)_mm_undefined_pd(), \
8787 (__mmask8)-1, (int)(R)))
8788
8789#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
8790 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8791 (__v4sf)(__m128)(B), \
8792 (__v2df)(__m128d)(W), \
8793 (__mmask8)(U), (int)(R)))
8794
8795#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
8796 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8797 (__v4sf)(__m128)(B), \
8798 (__v2df)_mm_setzero_pd(), \
8799 (__mmask8)(U), (int)(R)))
8800
8801static __inline__ __m128d __DEFAULT_FN_ATTRS128
8802_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
8803{
8804 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8805 (__v4sf)__B,
8806 (__v2df)__W,
8808}
8809
8810static __inline__ __m128d __DEFAULT_FN_ATTRS128
8811_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
8812{
8813 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8814 (__v4sf)__B,
8815 (__v2df)_mm_setzero_pd(),
8817}
8818
8819static __inline__ __m128d __DEFAULT_FN_ATTRS128
8820_mm_cvtu32_sd (__m128d __A, unsigned __B)
8821{
8822 __A[0] = __B;
8823 return __A;
8824}
8825
8826#ifdef __x86_64__
8827#define _mm_cvt_roundu64_sd(A, B, R) \
8828 ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
8829 (unsigned long long)(B), (int)(R)))
8830
8831static __inline__ __m128d __DEFAULT_FN_ATTRS128
8832_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
8833{
8834 __A[0] = __B;
8835 return __A;
8836}
8837#endif
8838
8839#define _mm_cvt_roundu32_ss(A, B, R) \
8840 ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
8841 (int)(R)))
8842
8843static __inline__ __m128 __DEFAULT_FN_ATTRS128
8844_mm_cvtu32_ss (__m128 __A, unsigned __B)
8845{
8846 __A[0] = __B;
8847 return __A;
8848}
8849
8850#ifdef __x86_64__
8851#define _mm_cvt_roundu64_ss(A, B, R) \
8852 ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
8853 (unsigned long long)(B), (int)(R)))
8854
8855static __inline__ __m128 __DEFAULT_FN_ATTRS128
8856_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
8857{
8858 __A[0] = __B;
8859 return __A;
8860}
8861#endif
8862
8863static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8864_mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A) {
8865 return (__m512i) __builtin_ia32_selectd_512(__M,
8866 (__v16si) _mm512_set1_epi32(__A),
8867 (__v16si) __O);
8868}
8869
8870static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8871_mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A) {
8872 return (__m512i) __builtin_ia32_selectq_512(__M,
8873 (__v8di) _mm512_set1_epi64(__A),
8874 (__v8di) __O);
8875}
8876
8878 char __e63, char __e62, char __e61, char __e60, char __e59, char __e58,
8879 char __e57, char __e56, char __e55, char __e54, char __e53, char __e52,
8880 char __e51, char __e50, char __e49, char __e48, char __e47, char __e46,
8881 char __e45, char __e44, char __e43, char __e42, char __e41, char __e40,
8882 char __e39, char __e38, char __e37, char __e36, char __e35, char __e34,
8883 char __e33, char __e32, char __e31, char __e30, char __e29, char __e28,
8884 char __e27, char __e26, char __e25, char __e24, char __e23, char __e22,
8885 char __e21, char __e20, char __e19, char __e18, char __e17, char __e16,
8886 char __e15, char __e14, char __e13, char __e12, char __e11, char __e10,
8887 char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3,
8888 char __e2, char __e1, char __e0) {
8889
8890 return __extension__ (__m512i)(__v64qi)
8891 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8892 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8893 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8894 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
8895 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
8896 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
8897 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
8898 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
8899}
8900
8902 short __e31, short __e30, short __e29, short __e28, short __e27,
8903 short __e26, short __e25, short __e24, short __e23, short __e22,
8904 short __e21, short __e20, short __e19, short __e18, short __e17,
8905 short __e16, short __e15, short __e14, short __e13, short __e12,
8906 short __e11, short __e10, short __e9, short __e8, short __e7, short __e6,
8907 short __e5, short __e4, short __e3, short __e2, short __e1, short __e0) {
8908 return __extension__ (__m512i)(__v32hi)
8909 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8910 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8911 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8912 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
8913}
8914
8916 int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H,
8917 int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P) {
8918 return __extension__ (__m512i)(__v16si)
8919 { __P, __O, __N, __M, __L, __K, __J, __I,
8920 __H, __G, __F, __E, __D, __C, __B, __A };
8921}
8922
8924 int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8,
8925 int e9, int e10, int e11, int e12, int e13, int e14, int e15) {
8926 return _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4,
8927 e3, e2, e1, e0);
8928}
8929
8930static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8931_mm512_set_epi64(long long __A, long long __B, long long __C, long long __D,
8932 long long __E, long long __F, long long __G, long long __H) {
8933 return __extension__ (__m512i) (__v8di)
8934 { __H, __G, __F, __E, __D, __C, __B, __A };
8935}
8936
8937static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8938_mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3,
8939 long long e4, long long e5, long long e6, long long e7) {
8940 return _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0);
8941}
8942
8943static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8944_mm512_set_pd(double __A, double __B, double __C, double __D, double __E,
8945 double __F, double __G, double __H) {
8946 return __extension__ (__m512d)
8947 { __H, __G, __F, __E, __D, __C, __B, __A };
8948}
8949
8950static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8951_mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5,
8952 double e6, double e7) {
8953 return _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0);
8954}
8955
8956static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8957_mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F,
8958 float __G, float __H, float __I, float __J, float __K, float __L,
8959 float __M, float __N, float __O, float __P) {
8960 return __extension__ (__m512)
8961 { __P, __O, __N, __M, __L, __K, __J, __I,
8962 __H, __G, __F, __E, __D, __C, __B, __A };
8963}
8964
8965static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8966_mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5,
8967 float e6, float e7, float e8, float e9, float e10, float e11,
8968 float e12, float e13, float e14, float e15) {
8969 return _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
8970 e2, e1, e0);
8971}
8972
8973static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8974_mm512_abs_ps(__m512 __A) {
8975 return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
8976}
8977
8978static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8979_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) {
8980 return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
8981}
8982
8983static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8984_mm512_abs_pd(__m512d __A) {
8985 return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
8986}
8987
8988static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8989_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) {
8990 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
8991}
8992
8993/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
8994 * outputs. This class of vector operation forms the basis of many scientific
8995 * computations. In vector-reduction arithmetic, the evaluation order is
8996 * independent of the order of the input elements of V.
8997
8998 * For floating-point intrinsics:
8999 * 1. When using fadd/fmul intrinsics, the order of operations within the
9000 * vector is unspecified (associative math).
9001 * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector
9002 * produce unspecified results.
9003
9004 * Used bisection method. At each step, we partition the vector with previous
9005 * step in half, and the operation is performed on its two halves.
9006 * This takes log2(n) steps where n is the number of elements in the vector.
9007 */
9008
9009static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9011 return __builtin_reduce_add((__v8di)__W);
9012}
9013
9014static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9016 return __builtin_reduce_mul((__v8di)__W);
9017}
9018
9019static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9021 return __builtin_reduce_and((__v8di)__W);
9022}
9023
9024static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9026 return __builtin_reduce_or((__v8di)__W);
9027}
9028
9029static __inline__ long long __DEFAULT_FN_ATTRS512
9031 __W = _mm512_maskz_mov_epi64(__M, __W);
9032 return __builtin_reduce_add((__v8di)__W);
9033}
9034
9035static __inline__ long long __DEFAULT_FN_ATTRS512
9037 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
9038 return __builtin_reduce_mul((__v8di)__W);
9039}
9040
9041static __inline__ long long __DEFAULT_FN_ATTRS512
9043 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __W);
9044 return __builtin_reduce_and((__v8di)__W);
9045}
9046
9047static __inline__ long long __DEFAULT_FN_ATTRS512
9049 __W = _mm512_maskz_mov_epi64(__M, __W);
9050 return __builtin_reduce_or((__v8di)__W);
9051}
9052
9053// -0.0 is used to ignore the start value since it is the neutral value of
9054// floating point addition. For more information, please refer to
9055// https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic
9056static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
9057 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9058}
9059
9060static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
9061 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9062}
9063
9064static __inline__ double __DEFAULT_FN_ATTRS512
9066 __W = _mm512_maskz_mov_pd(__M, __W);
9067 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9068}
9069
9070static __inline__ double __DEFAULT_FN_ATTRS512
9072 __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
9073 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9074}
9075
9076static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9078 return __builtin_reduce_add((__v16si)__W);
9079}
9080
9081static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9083 return __builtin_reduce_mul((__v16si)__W);
9084}
9085
9086static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9088 return __builtin_reduce_and((__v16si)__W);
9089}
9090
9091static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9093 return __builtin_reduce_or((__v16si)__W);
9094}
9095
9096static __inline__ int __DEFAULT_FN_ATTRS512
9098 __W = _mm512_maskz_mov_epi32(__M, __W);
9099 return __builtin_reduce_add((__v16si)__W);
9100}
9101
9102static __inline__ int __DEFAULT_FN_ATTRS512
9104 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
9105 return __builtin_reduce_mul((__v16si)__W);
9106}
9107
9108static __inline__ int __DEFAULT_FN_ATTRS512
9110 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __W);
9111 return __builtin_reduce_and((__v16si)__W);
9112}
9113
9114static __inline__ int __DEFAULT_FN_ATTRS512
9116 __W = _mm512_maskz_mov_epi32(__M, __W);
9117 return __builtin_reduce_or((__v16si)__W);
9118}
9119
9120static __inline__ float __DEFAULT_FN_ATTRS512
9122 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9123}
9124
9125static __inline__ float __DEFAULT_FN_ATTRS512
9127 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9128}
9129
9130static __inline__ float __DEFAULT_FN_ATTRS512
9132 __W = _mm512_maskz_mov_ps(__M, __W);
9133 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9134}
9135
9136static __inline__ float __DEFAULT_FN_ATTRS512
9138 __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9139 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9140}
9141
9142static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9144 return __builtin_reduce_max((__v8di)__V);
9145}
9146
9147static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9149 return __builtin_reduce_max((__v8du)__V);
9150}
9151
9152static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9154 return __builtin_reduce_min((__v8di)__V);
9155}
9156
9157static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9159 return __builtin_reduce_min((__v8du)__V);
9160}
9161
9162static __inline__ long long __DEFAULT_FN_ATTRS512
9164 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9165 return __builtin_reduce_max((__v8di)__V);
9166}
9167
9168static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9170 __V = _mm512_maskz_mov_epi64(__M, __V);
9171 return __builtin_reduce_max((__v8du)__V);
9172}
9173
9174static __inline__ long long __DEFAULT_FN_ATTRS512
9176 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9177 return __builtin_reduce_min((__v8di)__V);
9178}
9179
9180static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9182 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V);
9183 return __builtin_reduce_min((__v8du)__V);
9184}
9185static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9187 return __builtin_reduce_max((__v16si)__V);
9188}
9189
9190static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9192 return __builtin_reduce_max((__v16su)__V);
9193}
9194
9195static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9197 return __builtin_reduce_min((__v16si)__V);
9198}
9199
9200static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9202 return __builtin_reduce_min((__v16su)__V);
9203}
9204
9205static __inline__ int __DEFAULT_FN_ATTRS512
9207 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9208 return __builtin_reduce_max((__v16si)__V);
9209}
9210
9211static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9213 __V = _mm512_maskz_mov_epi32(__M, __V);
9214 return __builtin_reduce_max((__v16su)__V);
9215}
9216
9217static __inline__ int __DEFAULT_FN_ATTRS512
9219 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9220 return __builtin_reduce_min((__v16si)__V);
9221}
9222
9223static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9225 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __V);
9226 return __builtin_reduce_min((__v16su)__V);
9227}
9228
9229static __inline__ double __DEFAULT_FN_ATTRS512
9231 return __builtin_ia32_reduce_fmax_pd512(__V);
9232}
9233
9234static __inline__ double __DEFAULT_FN_ATTRS512
9236 return __builtin_ia32_reduce_fmin_pd512(__V);
9237}
9238
9239static __inline__ double __DEFAULT_FN_ATTRS512
9241 __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9242 return __builtin_ia32_reduce_fmax_pd512(__V);
9243}
9244
9245static __inline__ double __DEFAULT_FN_ATTRS512
9247 __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9248 return __builtin_ia32_reduce_fmin_pd512(__V);
9249}
9250
9251static __inline__ float __DEFAULT_FN_ATTRS512
9253 return __builtin_ia32_reduce_fmax_ps512(__V);
9254}
9255
9256static __inline__ float __DEFAULT_FN_ATTRS512
9258 return __builtin_ia32_reduce_fmin_ps512(__V);
9259}
9260
9261static __inline__ float __DEFAULT_FN_ATTRS512
9263 __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9264 return __builtin_ia32_reduce_fmax_ps512(__V);
9265}
9266
9267static __inline__ float __DEFAULT_FN_ATTRS512
9269 __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9270 return __builtin_ia32_reduce_fmin_ps512(__V);
9271}
9272
9273/// Moves the least significant 32 bits of a vector of [16 x i32] to a
9274/// 32-bit signed integer value.
9275///
9276/// \headerfile <x86intrin.h>
9277///
9278/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
9279///
9280/// \param __A
9281/// A vector of [16 x i32]. The least significant 32 bits are moved to the
9282/// destination.
9283/// \returns A 32-bit signed integer containing the moved value.
9284static __inline__ int __DEFAULT_FN_ATTRS512
9286 __v16si __b = (__v16si)__A;
9287 return __b[0];
9288}
9289
9290/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
9291/// locations starting at location \a base_addr at packed 32-bit integer indices
9292/// stored in the lower half of \a vindex scaled by \a scale them in dst.
9293///
9294/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9295///
9296/// \code{.operation}
9297/// FOR j := 0 to 7
9298/// i := j*64
9299/// m := j*32
9300/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9301/// dst[i+63:i] := MEM[addr+63:addr]
9302/// ENDFOR
9303/// dst[MAX:512] := 0
9304/// \endcode
9305#define _mm512_i32logather_pd(vindex, base_addr, scale) \
9306 _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9307
9308/// Loads 8 double-precision (64-bit) floating-point elements from memory
9309/// starting at location \a base_addr at packed 32-bit integer indices stored in
9310/// the lower half of \a vindex scaled by \a scale into dst using writemask
9311/// \a mask (elements are copied from \a src when the corresponding mask bit is
9312/// not set).
9313///
9314/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9315///
9316/// \code{.operation}
9317/// FOR j := 0 to 7
9318/// i := j*64
9319/// m := j*32
9320/// IF mask[j]
9321/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9322/// dst[i+63:i] := MEM[addr+63:addr]
9323/// ELSE
9324/// dst[i+63:i] := src[i+63:i]
9325/// FI
9326/// ENDFOR
9327/// dst[MAX:512] := 0
9328/// \endcode
9329#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \
9330 _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \
9331 (base_addr), (scale))
9332
9333/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9334/// at packed 32-bit integer indices stored in the lower half of \a vindex
9335/// scaled by \a scale and stores them in dst.
9336///
9337/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9338///
9339/// \code{.operation}
9340/// FOR j := 0 to 7
9341/// i := j*64
9342/// m := j*32
9343/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9344/// dst[i+63:i] := MEM[addr+63:addr]
9345/// ENDFOR
9346/// dst[MAX:512] := 0
9347/// \endcode
9348#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
9349 _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9350
9351/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9352/// at packed 32-bit integer indices stored in the lower half of \a vindex
9353/// scaled by \a scale and stores them in dst using writemask \a mask (elements
9354/// are copied from \a src when the corresponding mask bit is not set).
9355///
9356/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9357///
9358/// \code{.operation}
9359/// FOR j := 0 to 7
9360/// i := j*64
9361/// m := j*32
9362/// IF mask[j]
9363/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9364/// dst[i+63:i] := MEM[addr+63:addr]
9365/// ELSE
9366/// dst[i+63:i] := src[i+63:i]
9367/// FI
9368/// ENDFOR
9369/// dst[MAX:512] := 0
9370/// \endcode
9371#define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \
9372 _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \
9373 (base_addr), (scale))
9374
9375/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9376/// and to memory locations starting at location \a base_addr at packed 32-bit
9377/// integer indices stored in \a vindex scaled by \a scale.
9378///
9379/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9380///
9381/// \code{.operation}
9382/// FOR j := 0 to 7
9383/// i := j*64
9384/// m := j*32
9385/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9386/// MEM[addr+63:addr] := v1[i+63:i]
9387/// ENDFOR
9388/// \endcode
9389#define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \
9390 _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9391
9392/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9393/// to memory locations starting at location \a base_addr at packed 32-bit
9394/// integer indices stored in \a vindex scaled by \a scale. Only those elements
9395/// whose corresponding mask bit is set in writemask \a mask are written to
9396/// memory.
9397///
9398/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9399///
9400/// \code{.operation}
9401/// FOR j := 0 to 7
9402/// i := j*64
9403/// m := j*32
9404/// IF mask[j]
9405/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9406/// MEM[addr+63:addr] := a[i+63:i]
9407/// FI
9408/// ENDFOR
9409/// \endcode
9410#define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \
9411 _mm512_mask_i32scatter_pd((base_addr), (mask), \
9412 _mm512_castsi512_si256(vindex), (v1), (scale))
9413
9414/// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in
9415/// memory locations starting at location \a base_addr at packed 32-bit integer
9416/// indices stored in \a vindex scaled by \a scale.
9417///
9418/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9419///
9420/// \code{.operation}
9421/// FOR j := 0 to 7
9422/// i := j*64
9423/// m := j*32
9424/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9425/// MEM[addr+63:addr] := a[i+63:i]
9426/// ENDFOR
9427/// \endcode
9428#define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \
9429 _mm512_i32scatter_epi64((base_addr), \
9430 _mm512_castsi512_si256(vindex), (v1), (scale))
9431
9432/// Stores 8 packed 64-bit integer elements located in a and stores them in
9433/// memory locations starting at location \a base_addr at packed 32-bit integer
9434/// indices stored in \a vindex scaled by scale using writemask \a mask (elements
9435/// whose corresponding mask bit is not set are not written to memory).
9436///
9437/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9438///
9439/// \code{.operation}
9440/// FOR j := 0 to 7
9441/// i := j*64
9442/// m := j*32
9443/// IF mask[j]
9444/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9445/// MEM[addr+63:addr] := a[i+63:i]
9446/// FI
9447/// ENDFOR
9448/// \endcode
9449#define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \
9450 _mm512_mask_i32scatter_epi64((base_addr), (mask), \
9451 _mm512_castsi512_si256(vindex), (v1), (scale))
9452
9453#undef __DEFAULT_FN_ATTRS512
9454#undef __DEFAULT_FN_ATTRS128
9455#undef __DEFAULT_FN_ATTRS
9456#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
9457#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
9458#undef __DEFAULT_FN_ATTRS_CONSTEXPR
9459
9460#endif /* __AVX512FINTRIN_H */
#define __L(__X)
#define __DEFAULT_FN_ATTRS
static __inline__ vector float vector float __b
Definition altivec.h:578
static __inline__ uint32_t volatile uint32_t * __p
Definition arm_acle.h:57
return __v
Definition arm_acle.h:88
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
Definition avx2intrin.h:30
#define __DEFAULT_FN_ATTRS_CONSTEXPR
#define __DEFAULT_FN_ATTRS512_CONSTEXPR
#define __DEFAULT_FN_ATTRS512
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi64(long long __d)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi64(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f32x4(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi64(__m512i __W)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi64(__m512i __A, unsigned int __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov(__mmask16 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A)
Moves the least significant 32 bits of a vector of [16 x i32] to a 32-bit signed integer value.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_ps(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi64(__m512i __W)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(void *__P, __m512d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi32(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi32(int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8, int e9, int e10, int e11, int e12, int e13, int e14, int e15)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi64(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_si256(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movehdup_ps(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5, float e6, float e7, float e8, float e9, float e10, float e11, float e12, float e13, float e14, float e15)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_pd(double __w)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
_MM_MANTISSA_NORM_ENUM
@ _MM_MANT_NORM_p5_1
@ _MM_MANT_NORM_p5_2
@ _MM_MANT_NORM_1_2
@ _MM_MANT_NORM_p75_1p5
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_ps(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i64x4(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5, double e6, double e7)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps512_ps256(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi8(char __w)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3, long long e4, long long e5, long long e6, long long e7)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_int2mask(int __a)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask2int(__mmask16 __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_pd(__m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi64(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movedup_pd(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_pd(__m512d __a, __m512d __b)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtps_pd(__m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
_MM_CMPINT_ENUM
@ _MM_CMPINT_NE
@ _MM_CMPINT_NLT
@ _MM_CMPINT_LE
@ _MM_CMPINT_EQ
@ _MM_CMPINT_LT
@ _MM_CMPINT_UNUSED
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_knot(__mmask16 __M)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastq_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_pd(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
unsigned char __mmask8
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
_MM_TERNLOG_ENUM
A helper to represent the ternary logic operations among vector A, B and C.
@ _MM_TERNLOG_A
@ _MM_TERNLOG_B
@ _MM_TERNLOG_C
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi32(__m512i __W)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(void *__P, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
_MM_MANTISSA_SIGN_ENUM
@ _MM_MANT_SIGN_zero
@ _MM_MANT_SIGN_src
@ _MM_MANT_SIGN_nan
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_ps(__m512 __a, __m512 __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi32(int e0, int e1, int e2, int e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi32(__m512i __A, unsigned int __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_ps(float __w)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f64x4(__m256d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi32(__m512i __A, __m512i __B)
_MM_PERM_ENUM
@ _MM_PERM_BBCA
@ _MM_PERM_BCBD
@ _MM_PERM_DAAB
@ _MM_PERM_CBBD
@ _MM_PERM_DCCC
@ _MM_PERM_CDBB
@ _MM_PERM_DDDD
@ _MM_PERM_CCCC
@ _MM_PERM_CADA
@ _MM_PERM_BACD
@ _MM_PERM_CCAD
@ _MM_PERM_ABDB
@ _MM_PERM_BBBD
@ _MM_PERM_DCAB
@ _MM_PERM_BABC
@ _MM_PERM_AACD
@ _MM_PERM_BBAB
@ _MM_PERM_DCDB
@ _MM_PERM_BACC
@ _MM_PERM_ABDA
@ _MM_PERM_ACCC
@ _MM_PERM_ADAC
@ _MM_PERM_CCCD
@ _MM_PERM_CADD
@ _MM_PERM_ACCB
@ _MM_PERM_BBDB
@ _MM_PERM_ABBB
@ _MM_PERM_BACB
@ _MM_PERM_CDCA
@ _MM_PERM_ACBC
@ _MM_PERM_ADCB
@ _MM_PERM_BBBC
@ _MM_PERM_DBBA
@ _MM_PERM_BBBB
@ _MM_PERM_DDDB
@ _MM_PERM_CAAA
@ _MM_PERM_ADBB
@ _MM_PERM_ACDB
@ _MM_PERM_DCAD
@ _MM_PERM_DBBC
@ _MM_PERM_BAAB
@ _MM_PERM_BDDD
@ _MM_PERM_BBAD
@ _MM_PERM_DDBA
@ _MM_PERM_CDCD
@ _MM_PERM_CCCA
@ _MM_PERM_DBBB
@ _MM_PERM_DAAD
@ _MM_PERM_DCBA
@ _MM_PERM_CCBC
@ _MM_PERM_ADDD
@ _MM_PERM_DBAC
@ _MM_PERM_ABAB
@ _MM_PERM_CBDB
@ _MM_PERM_CDBC
@ _MM_PERM_AABC
@ _MM_PERM_DABD
@ _MM_PERM_CBBA
@ _MM_PERM_CBAA
@ _MM_PERM_BDDB
@ _MM_PERM_CABC
@ _MM_PERM_BDBD
@ _MM_PERM_BCAD
@ _MM_PERM_ACBA
@ _MM_PERM_ADBA
@ _MM_PERM_ADBC
@ _MM_PERM_DBCB
@ _MM_PERM_CBDC
@ _MM_PERM_CBAD
@ _MM_PERM_ABCC
@ _MM_PERM_AAAD
@ _MM_PERM_CBAC
@ _MM_PERM_CCDA
@ _MM_PERM_CDAC
@ _MM_PERM_BADD
@ _MM_PERM_DAAC
@ _MM_PERM_BCCC
@ _MM_PERM_DBBD
@ _MM_PERM_DDAC
@ _MM_PERM_DACD
@ _MM_PERM_BAAC
@ _MM_PERM_ACCA
@ _MM_PERM_ABDD
@ _MM_PERM_BBCC
@ _MM_PERM_DAAA
@ _MM_PERM_CAAB
@ _MM_PERM_BCDB
@ _MM_PERM_ACBB
@ _MM_PERM_CDAB
@ _MM_PERM_DBDB
@ _MM_PERM_AABB
@ _MM_PERM_DBDA
@ _MM_PERM_BCBA
@ _MM_PERM_CBAB
@ _MM_PERM_DCDC
@ _MM_PERM_BBCB
@ _MM_PERM_CDCB
@ _MM_PERM_AACA
@ _MM_PERM_ACBD
@ _MM_PERM_AAAB
@ _MM_PERM_DCCB
@ _MM_PERM_ADDB
@ _MM_PERM_AAAA
@ _MM_PERM_AACC
@ _MM_PERM_BDDC
@ _MM_PERM_CBBC
@ _MM_PERM_DDCC
@ _MM_PERM_CABD
@ _MM_PERM_AADC
@ _MM_PERM_BCBC
@ _MM_PERM_BCCA
@ _MM_PERM_CCBD
@ _MM_PERM_CBBB
@ _MM_PERM_CDBA
@ _MM_PERM_CACD
@ _MM_PERM_BDAD
@ _MM_PERM_ADCA
@ _MM_PERM_AAAC
@ _MM_PERM_BDDA
@ _MM_PERM_CCAC
@ _MM_PERM_ACDC
@ _MM_PERM_DBCA
@ _MM_PERM_DBAA
@ _MM_PERM_AABD
@ _MM_PERM_CDCC
@ _MM_PERM_DCAA
@ _MM_PERM_DDDC
@ _MM_PERM_CDDB
@ _MM_PERM_AABA
@ _MM_PERM_DDBB
@ _MM_PERM_CDDA
@ _MM_PERM_AADD
@ _MM_PERM_BADC
@ _MM_PERM_BDBA
@ _MM_PERM_DBDD
@ _MM_PERM_BDAC
@ _MM_PERM_DBDC
@ _MM_PERM_BBBA
@ _MM_PERM_DDBC
@ _MM_PERM_BAAA
@ _MM_PERM_BDCC
@ _MM_PERM_DDAB
@ _MM_PERM_BCCB
@ _MM_PERM_BCCD
@ _MM_PERM_ADBD
@ _MM_PERM_ADCC
@ _MM_PERM_CCBB
@ _MM_PERM_CDAA
@ _MM_PERM_BBDA
@ _MM_PERM_CACC
@ _MM_PERM_DCBB
@ _MM_PERM_DABA
@ _MM_PERM_BADB
@ _MM_PERM_ABCA
@ _MM_PERM_CBCC
@ _MM_PERM_ABAD
@ _MM_PERM_BDBC
@ _MM_PERM_DDDA
@ _MM_PERM_ADAB
@ _MM_PERM_CADB
@ _MM_PERM_ADAA
@ _MM_PERM_ACAC
@ _MM_PERM_DADD
@ _MM_PERM_BABD
@ _MM_PERM_ACCD
@ _MM_PERM_CCAA
@ _MM_PERM_AADA
@ _MM_PERM_BDCA
@ _MM_PERM_CDDD
@ _MM_PERM_ABBD
@ _MM_PERM_ACAA
@ _MM_PERM_ACDD
@ _MM_PERM_DABB
@ _MM_PERM_CCCB
@ _MM_PERM_AADB
@ _MM_PERM_DBAD
@ _MM_PERM_BBDD
@ _MM_PERM_BCDC
@ _MM_PERM_CABA
@ _MM_PERM_BBAA
@ _MM_PERM_ADAD
@ _MM_PERM_BADA
@ _MM_PERM_DCDA
@ _MM_PERM_ABBA
@ _MM_PERM_ACAB
@ _MM_PERM_CCDD
@ _MM_PERM_CADC
@ _MM_PERM_DDCB
@ _MM_PERM_BABB
@ _MM_PERM_CCDB
@ _MM_PERM_DDAD
@ _MM_PERM_DBCC
@ _MM_PERM_BCBB
@ _MM_PERM_ADDC
@ _MM_PERM_CCBA
@ _MM_PERM_ABCD
@ _MM_PERM_BCAB
@ _MM_PERM_DCBC
@ _MM_PERM_BCDD
@ _MM_PERM_CCDC
@ _MM_PERM_ABAC
@ _MM_PERM_CBCB
@ _MM_PERM_CCAB
@ _MM_PERM_DDCD
@ _MM_PERM_DACA
@ _MM_PERM_ACAD
@ _MM_PERM_BABA
@ _MM_PERM_CBCD
@ _MM_PERM_CAAD
@ _MM_PERM_DCDD
@ _MM_PERM_BDBB
@ _MM_PERM_BCAA
@ _MM_PERM_ABDC
@ _MM_PERM_BBCD
@ _MM_PERM_CAAC
@ _MM_PERM_BBAC
@ _MM_PERM_CBCA
@ _MM_PERM_DCAC
@ _MM_PERM_ABAA
@ _MM_PERM_CACB
@ _MM_PERM_BBDC
@ _MM_PERM_CDAD
@ _MM_PERM_ADCD
@ _MM_PERM_DADB
@ _MM_PERM_DBCD
@ _MM_PERM_DACC
@ _MM_PERM_DACB
@ _MM_PERM_DCBD
@ _MM_PERM_CACA
@ _MM_PERM_ABBC
@ _MM_PERM_DCCA
@ _MM_PERM_DABC
@ _MM_PERM_CBDD
@ _MM_PERM_DDBD
@ _MM_PERM_DDCA
@ _MM_PERM_BDCD
@ _MM_PERM_CDBD
@ _MM_PERM_ABCB
@ _MM_PERM_CDDC
@ _MM_PERM_AACB
@ _MM_PERM_DDAA
@ _MM_PERM_ADDA
@ _MM_PERM_DADA
@ _MM_PERM_BCDA
@ _MM_PERM_BDAB
@ _MM_PERM_BAAD
@ _MM_PERM_DBAB
@ _MM_PERM_DCCD
@ _MM_PERM_CABB
@ _MM_PERM_BDAA
@ _MM_PERM_BDCB
@ _MM_PERM_ACDA
@ _MM_PERM_DADC
@ _MM_PERM_CBDA
@ _MM_PERM_BCAC
@ _MM_PERM_BACA
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi32(int __s)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_pd(double e0, double e1, double e2, double e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _cvtu32_mask16(unsigned int __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32lo_pd(__m512i __A)
unsigned short __mmask16
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_si512(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_ps(float e0, float e1, float e2, float e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi32(__m512i __W)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask16_u32(__mmask16 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi16(short __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_moveldup_ps(__m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi32(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
Definition avxintrin.h:3622
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition avxintrin.h:3635
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition avxintrin.h:4306
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition avxintrin.h:4294
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4318
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
Definition emmintrin.h:120
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
Definition emmintrin.h:199
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
Definition emmintrin.h:80
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3878
static __inline__ void int __a
Definition emmintrin.h:4077
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3493
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
Definition emmintrin.h:1887
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition emmintrin.h:1867
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
Definition emmintrin.h:159
static __inline__ void short __D
Definition immintrin.h:342
static __inline__ void const void * __src
__inline unsigned int unsigned int unsigned int * __P
Definition bmi2intrin.h:25
__inline unsigned int unsigned int __Y
Definition bmi2intrin.h:19
#define _MM_FROUND_FLOOR
Definition smmintrin.h:41
#define _MM_FROUND_CEIL
Definition smmintrin.h:42
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
Definition xmmintrin.h:2804
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:160
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
Definition xmmintrin.h:119
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:79
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:2021
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
Definition xmmintrin.h:200