clang 22.0.0git
avx512fintrin.h
Go to the documentation of this file.
1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
15
16typedef char __v64qi __attribute__((__vector_size__(64)));
17typedef short __v32hi __attribute__((__vector_size__(64)));
18typedef double __v8df __attribute__((__vector_size__(64)));
19typedef float __v16sf __attribute__((__vector_size__(64)));
20typedef long long __v8di __attribute__((__vector_size__(64)));
21typedef int __v16si __attribute__((__vector_size__(64)));
22
23/* Unsigned types */
24typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28
29/* We need an explicitly signed variant for char. Note that this shouldn't
30 * appear in the interface though. */
31typedef signed char __v64qs __attribute__((__vector_size__(64)));
32
33typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
34typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
35typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
36
37typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
38typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
39typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
40
41typedef unsigned char __mmask8;
42typedef unsigned short __mmask16;
43
44/* Rounding mode macros. */
45#define _MM_FROUND_TO_NEAREST_INT 0x00
46#define _MM_FROUND_TO_NEG_INF 0x01
47#define _MM_FROUND_TO_POS_INF 0x02
48#define _MM_FROUND_TO_ZERO 0x03
49#define _MM_FROUND_CUR_DIRECTION 0x04
50
51/* Constants for integer comparison predicates */
52typedef enum {
53 _MM_CMPINT_EQ, /* Equal */
54 _MM_CMPINT_LT, /* Less than */
55 _MM_CMPINT_LE, /* Less than or Equal */
57 _MM_CMPINT_NE, /* Not Equal */
58 _MM_CMPINT_NLT, /* Not Less than */
59#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
60 _MM_CMPINT_NLE /* Not Less than or Equal */
61#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
63
64typedef enum
65{
153
154typedef enum
155{
156 _MM_MANT_NORM_1_2, /* interval [1, 2) */
157 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
158 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
159 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
161
162typedef enum
163{
164 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
165 _MM_MANT_SIGN_zero, /* sign = 0 */
166 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
168
169/* Define the default attributes for the functions in this file. */
170#define __DEFAULT_FN_ATTRS512 \
171 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
172 __min_vector_width__(512)))
173#define __DEFAULT_FN_ATTRS128 \
174 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
175 __min_vector_width__(128)))
176#define __DEFAULT_FN_ATTRS \
177 __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
178
179#if defined(__cplusplus) && (__cplusplus >= 201103L)
180#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
181#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
182#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
183#else
184#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
185#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
186#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
187#endif
188
189/* Create vectors with repeated elements */
190
191static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
193 return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0};
194}
195
196#define _mm512_setzero_epi32 _mm512_setzero_si512
197
198static __inline__ __m512d __DEFAULT_FN_ATTRS512
200{
201 return (__m512d)__builtin_ia32_undef512();
202}
203
204static __inline__ __m512 __DEFAULT_FN_ATTRS512
206{
207 return (__m512)__builtin_ia32_undef512();
208}
209
210static __inline__ __m512 __DEFAULT_FN_ATTRS512
212{
213 return (__m512)__builtin_ia32_undef512();
214}
215
216static __inline__ __m512i __DEFAULT_FN_ATTRS512
218{
219 return (__m512i)__builtin_ia32_undef512();
220}
221
222static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
224 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
226}
227
228static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
229_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) {
230 return (__m512i)__builtin_ia32_selectd_512(__M,
231 (__v16si) _mm512_broadcastd_epi32(__A),
232 (__v16si) __O);
233}
234
235static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
237 return (__m512i)__builtin_ia32_selectd_512(__M,
238 (__v16si) _mm512_broadcastd_epi32(__A),
239 (__v16si) _mm512_setzero_si512());
240}
241
242static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
244 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
245 0, 0, 0, 0, 0, 0, 0, 0);
246}
247
248static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
249_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) {
250 return (__m512i)__builtin_ia32_selectq_512(
251 __M, (__v8di)_mm512_broadcastq_epi64(__A), (__v8di)__O);
252}
253
254static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
256 return (__m512i)__builtin_ia32_selectq_512(__M,
257 (__v8di) _mm512_broadcastq_epi64(__A),
258 (__v8di) _mm512_setzero_si512());
259}
260
262 return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
263 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
264}
265
266#define _mm512_setzero _mm512_setzero_ps
267
268static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
270 return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
271}
272
273static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
275{
276 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
277 __w, __w, __w, __w, __w, __w, __w, __w };
278}
279
280static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
281_mm512_set1_pd(double __w)
282{
283 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
284}
285
286static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
288{
289 return __extension__ (__m512i)(__v64qi){
290 __w, __w, __w, __w, __w, __w, __w, __w,
291 __w, __w, __w, __w, __w, __w, __w, __w,
292 __w, __w, __w, __w, __w, __w, __w, __w,
293 __w, __w, __w, __w, __w, __w, __w, __w,
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w,
296 __w, __w, __w, __w, __w, __w, __w, __w,
297 __w, __w, __w, __w, __w, __w, __w, __w };
298}
299
300static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
302{
303 return __extension__ (__m512i)(__v32hi){
304 __w, __w, __w, __w, __w, __w, __w, __w,
305 __w, __w, __w, __w, __w, __w, __w, __w,
306 __w, __w, __w, __w, __w, __w, __w, __w,
307 __w, __w, __w, __w, __w, __w, __w, __w };
308}
309
310static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
312{
313 return __extension__ (__m512i)(__v16si){
314 __s, __s, __s, __s, __s, __s, __s, __s,
315 __s, __s, __s, __s, __s, __s, __s, __s };
316}
317
318static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
320 return (__m512i)__builtin_ia32_selectd_512(__M,
321 (__v16si)_mm512_set1_epi32(__A),
322 (__v16si)_mm512_setzero_si512());
323}
324
325static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
326_mm512_set1_epi64(long long __d)
327{
328 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
329}
330
331static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
333 return (__m512i)__builtin_ia32_selectq_512(__M,
334 (__v8di)_mm512_set1_epi64(__A),
335 (__v8di)_mm512_setzero_si512());
336}
337
338static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
340 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
342}
343
344static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
345_mm512_set4_epi32(int __A, int __B, int __C, int __D) {
346 return __extension__ (__m512i)(__v16si)
347 { __D, __C, __B, __A, __D, __C, __B, __A,
348 __D, __C, __B, __A, __D, __C, __B, __A };
349}
350
351static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
352_mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D) {
353 return __extension__ (__m512i) (__v8di)
354 { __D, __C, __B, __A, __D, __C, __B, __A };
355}
356
357static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
358_mm512_set4_pd(double __A, double __B, double __C, double __D) {
359 return __extension__ (__m512d)
360 { __D, __C, __B, __A, __D, __C, __B, __A };
361}
362
363static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
364_mm512_set4_ps(float __A, float __B, float __C, float __D) {
365 return __extension__ (__m512)
366 { __D, __C, __B, __A, __D, __C, __B, __A,
367 __D, __C, __B, __A, __D, __C, __B, __A };
368}
369
370static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
371_mm512_setr4_epi32(int e0, int e1, int e2, int e3) {
372 return _mm512_set4_epi32(e3, e2, e1, e0);
373}
374
375static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
376_mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3) {
377 return _mm512_set4_epi64(e3, e2, e1, e0);
378}
379
380static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
381_mm512_setr4_pd(double e0, double e1, double e2, double e3) {
382 return _mm512_set4_pd(e3, e2, e1, e0);
383}
384
385static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
386_mm512_setr4_ps(float e0, float e1, float e2, float e3) {
387 return _mm512_set4_ps(e3, e2, e1, e0);
388}
389
390static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
392 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
393 0, 0, 0, 0, 0, 0, 0, 0);
394}
395
396/* Cast between vector types */
397
398static __inline __m512d __DEFAULT_FN_ATTRS512
400{
401 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
402 1, 2, 3, 4, 5, 6, 7);
403}
404
405static __inline __m512 __DEFAULT_FN_ATTRS512
407{
408 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
409 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
410}
411
412static __inline __m128d __DEFAULT_FN_ATTRS512
414{
415 return __builtin_shufflevector(__a, __a, 0, 1);
416}
417
418static __inline __m256d __DEFAULT_FN_ATTRS512
420{
421 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
422}
423
424static __inline __m128 __DEFAULT_FN_ATTRS512
426{
427 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
428}
429
430static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
432 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
433}
434
435static __inline __m512 __DEFAULT_FN_ATTRS512
436_mm512_castpd_ps (__m512d __A)
437{
438 return (__m512) (__A);
439}
440
441static __inline __m512i __DEFAULT_FN_ATTRS512
443{
444 return (__m512i) (__A);
445}
446
447static __inline__ __m512d __DEFAULT_FN_ATTRS512
449{
450 __m256d __B = __builtin_nondeterministic_value(__B);
451 return __builtin_shufflevector(
452 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
453 __B, 0, 1, 2, 3, 4, 5, 6, 7);
454}
455
456static __inline __m512d __DEFAULT_FN_ATTRS512
458{
459 return (__m512d) (__A);
460}
461
462static __inline __m512i __DEFAULT_FN_ATTRS512
464{
465 return (__m512i) (__A);
466}
467
468static __inline__ __m512 __DEFAULT_FN_ATTRS512
470{
471 __m256 __B = __builtin_nondeterministic_value(__B);
472 return __builtin_shufflevector(
473 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
474 __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
475}
476
477static __inline__ __m512i __DEFAULT_FN_ATTRS512
479{
480 __m256i __B = __builtin_nondeterministic_value(__B);
481 return __builtin_shufflevector(
482 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
483 __B, 0, 1, 2, 3, 4, 5, 6, 7);
484}
485
486static __inline__ __m512i __DEFAULT_FN_ATTRS512
488{
489 return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
490}
491
492static __inline __m512 __DEFAULT_FN_ATTRS512
494{
495 return (__m512) (__A);
496}
497
498static __inline __m512d __DEFAULT_FN_ATTRS512
500{
501 return (__m512d) (__A);
502}
503
504static __inline __m128i __DEFAULT_FN_ATTRS512
506{
507 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
508}
509
510static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR
512 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
513}
514
517 return (__mmask16)__a;
518}
519
520static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
522 return (int)__a;
523}
524
525/// Constructs a 512-bit floating-point vector of [8 x double] from a
526/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
527/// contain the value of the source vector. The upper 384 bits are set
528/// to zero.
529///
530/// \headerfile <x86intrin.h>
531///
532/// This intrinsic has no corresponding instruction.
533///
534/// \param __a
535/// A 128-bit vector of [2 x double].
536/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
537/// contain the value of the parameter. The upper 384 bits are set to zero.
538static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
540 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
541}
542
543/// Constructs a 512-bit floating-point vector of [8 x double] from a
544/// 256-bit floating-point vector of [4 x double]. The lower 256 bits
545/// contain the value of the source vector. The upper 256 bits are set
546/// to zero.
547///
548/// \headerfile <x86intrin.h>
549///
550/// This intrinsic has no corresponding instruction.
551///
552/// \param __a
553/// A 256-bit vector of [4 x double].
554/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
555/// contain the value of the parameter. The upper 256 bits are set to zero.
556static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
558 return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
559}
560
561/// Constructs a 512-bit floating-point vector of [16 x float] from a
562/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
563/// the value of the source vector. The upper 384 bits are set to zero.
564///
565/// \headerfile <x86intrin.h>
566///
567/// This intrinsic has no corresponding instruction.
568///
569/// \param __a
570/// A 128-bit vector of [4 x float].
571/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
572/// contain the value of the parameter. The upper 384 bits are set to zero.
573static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
575 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
576}
577
578/// Constructs a 512-bit floating-point vector of [16 x float] from a
579/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
580/// the value of the source vector. The upper 256 bits are set to zero.
581///
582/// \headerfile <x86intrin.h>
583///
584/// This intrinsic has no corresponding instruction.
585///
586/// \param __a
587/// A 256-bit vector of [8 x float].
588/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
589/// contain the value of the parameter. The upper 256 bits are set to zero.
590static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
592 return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
593}
594
595/// Constructs a 512-bit integer vector from a 128-bit integer vector.
596/// The lower 128 bits contain the value of the source vector. The upper
597/// 384 bits are set to zero.
598///
599/// \headerfile <x86intrin.h>
600///
601/// This intrinsic has no corresponding instruction.
602///
603/// \param __a
604/// A 128-bit integer vector.
605/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
606/// the parameter. The upper 384 bits are set to zero.
607static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
609 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
610}
611
612/// Constructs a 512-bit integer vector from a 256-bit integer vector.
613/// The lower 256 bits contain the value of the source vector. The upper
614/// 256 bits are set to zero.
615///
616/// \headerfile <x86intrin.h>
617///
618/// This intrinsic has no corresponding instruction.
619///
620/// \param __a
621/// A 256-bit integer vector.
622/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
623/// the parameter. The upper 256 bits are set to zero.
624static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
626 return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
627}
628
629/* Bitwise operators */
630static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
631_mm512_and_epi32(__m512i __a, __m512i __b)
632{
633 return (__m512i)((__v16su)__a & (__v16su)__b);
634}
635
636static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
637_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) {
638 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
639 (__v16si) _mm512_and_epi32(__a, __b),
640 (__v16si) __src);
641}
642
643static __inline__ __m512i __DEFAULT_FN_ATTRS512
645{
647 __k, __a, __b);
648}
649
650static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
651_mm512_and_epi64(__m512i __a, __m512i __b)
652{
653 return (__m512i)((__v8du)__a & (__v8du)__b);
654}
655
656static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
657_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) {
658 return (__m512i)__builtin_ia32_selectq_512(
659 (__mmask8)__k, (__v8di)_mm512_and_epi64(__a, __b), (__v8di)__src);
660}
661
662static __inline__ __m512i __DEFAULT_FN_ATTRS512
664{
666 __k, __a, __b);
667}
668
669static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
670_mm512_andnot_si512 (__m512i __A, __m512i __B)
671{
672 return (__m512i)(~(__v8du)__A & (__v8du)__B);
673}
674
675static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
676_mm512_andnot_epi32 (__m512i __A, __m512i __B)
677{
678 return (__m512i)(~(__v16su)__A & (__v16su)__B);
679}
680
681static __inline__ __m512i __DEFAULT_FN_ATTRS512
682_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
683{
684 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
685 (__v16si)_mm512_andnot_epi32(__A, __B),
686 (__v16si)__W);
687}
688
689static __inline__ __m512i __DEFAULT_FN_ATTRS512
690_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
691{
693 __U, __A, __B);
694}
695
696static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
697_mm512_andnot_epi64(__m512i __A, __m512i __B)
698{
699 return (__m512i)(~(__v8du)__A & (__v8du)__B);
700}
701
702static __inline__ __m512i __DEFAULT_FN_ATTRS512
703_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
704{
705 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
706 (__v8di)_mm512_andnot_epi64(__A, __B),
707 (__v8di)__W);
708}
709
710static __inline__ __m512i __DEFAULT_FN_ATTRS512
711_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
712{
714 __U, __A, __B);
715}
716
717static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
718_mm512_or_epi32(__m512i __a, __m512i __b)
719{
720 return (__m512i)((__v16su)__a | (__v16su)__b);
721}
722
723static __inline__ __m512i __DEFAULT_FN_ATTRS512
724_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
725{
726 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
727 (__v16si)_mm512_or_epi32(__a, __b),
728 (__v16si)__src);
729}
730
731static __inline__ __m512i __DEFAULT_FN_ATTRS512
733{
734 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
735}
736
737static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
738_mm512_or_epi64(__m512i __a, __m512i __b)
739{
740 return (__m512i)((__v8du)__a | (__v8du)__b);
741}
742
743static __inline__ __m512i __DEFAULT_FN_ATTRS512
744_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
745{
746 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
747 (__v8di)_mm512_or_epi64(__a, __b),
748 (__v8di)__src);
749}
750
751static __inline__ __m512i __DEFAULT_FN_ATTRS512
752_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
753{
754 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
755}
756
757static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
758_mm512_xor_epi32(__m512i __a, __m512i __b)
759{
760 return (__m512i)((__v16su)__a ^ (__v16su)__b);
761}
762
763static __inline__ __m512i __DEFAULT_FN_ATTRS512
764_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
765{
766 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
767 (__v16si)_mm512_xor_epi32(__a, __b),
768 (__v16si)__src);
769}
770
771static __inline__ __m512i __DEFAULT_FN_ATTRS512
773{
774 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
775}
776
777static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
778_mm512_xor_epi64(__m512i __a, __m512i __b)
779{
780 return (__m512i)((__v8du)__a ^ (__v8du)__b);
781}
782
783static __inline__ __m512i __DEFAULT_FN_ATTRS512
784_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
785{
786 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
787 (__v8di)_mm512_xor_epi64(__a, __b),
788 (__v8di)__src);
789}
790
791static __inline__ __m512i __DEFAULT_FN_ATTRS512
793{
794 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
795}
796
797static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
798_mm512_and_si512(__m512i __a, __m512i __b)
799{
800 return (__m512i)((__v8du)__a & (__v8du)__b);
801}
802
803static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
804_mm512_or_si512(__m512i __a, __m512i __b)
805{
806 return (__m512i)((__v8du)__a | (__v8du)__b);
807}
808
809static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
810_mm512_xor_si512(__m512i __a, __m512i __b)
811{
812 return (__m512i)((__v8du)__a ^ (__v8du)__b);
813}
814
815/* Arithmetic */
816
817static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
818_mm512_add_pd(__m512d __a, __m512d __b) {
819 return (__m512d)((__v8df)__a + (__v8df)__b);
820}
821
822static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
823_mm512_add_ps(__m512 __a, __m512 __b) {
824 return (__m512)((__v16sf)__a + (__v16sf)__b);
825}
826
827static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
828_mm512_mul_pd(__m512d __a, __m512d __b) {
829 return (__m512d)((__v8df)__a * (__v8df)__b);
830}
831
832static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
833_mm512_mul_ps(__m512 __a, __m512 __b) {
834 return (__m512)((__v16sf)__a * (__v16sf)__b);
835}
836
837static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
838_mm512_sub_pd(__m512d __a, __m512d __b) {
839 return (__m512d)((__v8df)__a - (__v8df)__b);
840}
841
842static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
843_mm512_sub_ps(__m512 __a, __m512 __b) {
844 return (__m512)((__v16sf)__a - (__v16sf)__b);
845}
846
847static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
848_mm512_add_epi64(__m512i __A, __m512i __B) {
849 return (__m512i) ((__v8du) __A + (__v8du) __B);
850}
851
852static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
853_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
854 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
855 (__v8di)_mm512_add_epi64(__A, __B),
856 (__v8di)__W);
857}
858
859static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
860_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
861 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
862 (__v8di)_mm512_add_epi64(__A, __B),
863 (__v8di)_mm512_setzero_si512());
864}
865
866static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
867_mm512_sub_epi64(__m512i __A, __m512i __B) {
868 return (__m512i) ((__v8du) __A - (__v8du) __B);
869}
870
871static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
872_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
873 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
874 (__v8di)_mm512_sub_epi64(__A, __B),
875 (__v8di)__W);
876}
877
878static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
879_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
880 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
881 (__v8di)_mm512_sub_epi64(__A, __B),
882 (__v8di)_mm512_setzero_si512());
883}
884
885static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
886_mm512_add_epi32(__m512i __A, __m512i __B) {
887 return (__m512i) ((__v16su) __A + (__v16su) __B);
888}
889
890static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
891_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
892 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
893 (__v16si)_mm512_add_epi32(__A, __B),
894 (__v16si)__W);
895}
896
897static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
898_mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
899 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
900 (__v16si)_mm512_add_epi32(__A, __B),
901 (__v16si)_mm512_setzero_si512());
902}
903
904static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
905_mm512_sub_epi32(__m512i __A, __m512i __B) {
906 return (__m512i) ((__v16su) __A - (__v16su) __B);
907}
908
909static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
910_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
911 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
912 (__v16si)_mm512_sub_epi32(__A, __B),
913 (__v16si)__W);
914}
915
916static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
917_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
918 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
919 (__v16si)_mm512_sub_epi32(__A, __B),
920 (__v16si)_mm512_setzero_si512());
921}
922
923#define _mm512_max_round_pd(A, B, R) \
924 ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
925 (__v8df)(__m512d)(B), (int)(R)))
926
927#define _mm512_mask_max_round_pd(W, U, A, B, R) \
928 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
929 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
930 (__v8df)(W)))
931
932#define _mm512_maskz_max_round_pd(U, A, B, R) \
933 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
934 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
935 (__v8df)_mm512_setzero_pd()))
936
937static __inline__ __m512d __DEFAULT_FN_ATTRS512
938_mm512_max_pd(__m512d __A, __m512d __B)
939{
940 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
942}
943
944static __inline__ __m512d __DEFAULT_FN_ATTRS512
945_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
946{
947 return (__m512d)__builtin_ia32_selectpd_512(__U,
948 (__v8df)_mm512_max_pd(__A, __B),
949 (__v8df)__W);
950}
951
952static __inline__ __m512d __DEFAULT_FN_ATTRS512
953_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
954{
955 return (__m512d)__builtin_ia32_selectpd_512(__U,
956 (__v8df)_mm512_max_pd(__A, __B),
957 (__v8df)_mm512_setzero_pd());
958}
959
960#define _mm512_max_round_ps(A, B, R) \
961 ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
962 (__v16sf)(__m512)(B), (int)(R)))
963
964#define _mm512_mask_max_round_ps(W, U, A, B, R) \
965 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
966 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
967 (__v16sf)(W)))
968
969#define _mm512_maskz_max_round_ps(U, A, B, R) \
970 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
971 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
972 (__v16sf)_mm512_setzero_ps()))
973
974static __inline__ __m512 __DEFAULT_FN_ATTRS512
975_mm512_max_ps(__m512 __A, __m512 __B)
976{
977 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
979}
980
981static __inline__ __m512 __DEFAULT_FN_ATTRS512
982_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
983{
984 return (__m512)__builtin_ia32_selectps_512(__U,
985 (__v16sf)_mm512_max_ps(__A, __B),
986 (__v16sf)__W);
987}
988
989static __inline__ __m512 __DEFAULT_FN_ATTRS512
990_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
991{
992 return (__m512)__builtin_ia32_selectps_512(__U,
993 (__v16sf)_mm512_max_ps(__A, __B),
994 (__v16sf)_mm512_setzero_ps());
995}
996
997static __inline__ __m128 __DEFAULT_FN_ATTRS128
998_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
999 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1000 (__v4sf) __B,
1001 (__v4sf) __W,
1002 (__mmask8) __U,
1004}
1005
1006static __inline__ __m128 __DEFAULT_FN_ATTRS128
1007_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1008 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1009 (__v4sf) __B,
1010 (__v4sf) _mm_setzero_ps (),
1011 (__mmask8) __U,
1013}
1014
1015#define _mm_max_round_ss(A, B, R) \
1016 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1017 (__v4sf)(__m128)(B), \
1018 (__v4sf)_mm_setzero_ps(), \
1019 (__mmask8)-1, (int)(R)))
1020
1021#define _mm_mask_max_round_ss(W, U, A, B, R) \
1022 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1023 (__v4sf)(__m128)(B), \
1024 (__v4sf)(__m128)(W), (__mmask8)(U), \
1025 (int)(R)))
1026
1027#define _mm_maskz_max_round_ss(U, A, B, R) \
1028 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1029 (__v4sf)(__m128)(B), \
1030 (__v4sf)_mm_setzero_ps(), \
1031 (__mmask8)(U), (int)(R)))
1032
1033static __inline__ __m128d __DEFAULT_FN_ATTRS128
1034_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1035 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1036 (__v2df) __B,
1037 (__v2df) __W,
1038 (__mmask8) __U,
1040}
1041
1042static __inline__ __m128d __DEFAULT_FN_ATTRS128
1043_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1044 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1045 (__v2df) __B,
1046 (__v2df) _mm_setzero_pd (),
1047 (__mmask8) __U,
1049}
1050
1051#define _mm_max_round_sd(A, B, R) \
1052 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1053 (__v2df)(__m128d)(B), \
1054 (__v2df)_mm_setzero_pd(), \
1055 (__mmask8)-1, (int)(R)))
1056
1057#define _mm_mask_max_round_sd(W, U, A, B, R) \
1058 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1059 (__v2df)(__m128d)(B), \
1060 (__v2df)(__m128d)(W), \
1061 (__mmask8)(U), (int)(R)))
1062
1063#define _mm_maskz_max_round_sd(U, A, B, R) \
1064 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1065 (__v2df)(__m128d)(B), \
1066 (__v2df)_mm_setzero_pd(), \
1067 (__mmask8)(U), (int)(R)))
1068
1069static __inline __m512i
1071 return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1072}
1073
1074static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1075_mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1076 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1077 (__v16si)_mm512_max_epi32(__A, __B),
1078 (__v16si)__W);
1079}
1080
1081static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1082_mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1083 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1084 (__v16si)_mm512_max_epi32(__A, __B),
1085 (__v16si)_mm512_setzero_si512());
1086}
1087
1088static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1089_mm512_max_epu32(__m512i __A, __m512i __B) {
1090 return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1091}
1092
1093static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1094_mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1095 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1096 (__v16si)_mm512_max_epu32(__A, __B),
1097 (__v16si)__W);
1098}
1099
1100static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1101_mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
1102 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1103 (__v16si)_mm512_max_epu32(__A, __B),
1104 (__v16si)_mm512_setzero_si512());
1105}
1106
1107static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1108_mm512_max_epi64(__m512i __A, __m512i __B) {
1109 return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1110}
1111
1112static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1113_mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1114 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1115 (__v8di)_mm512_max_epi64(__A, __B),
1116 (__v8di)__W);
1117}
1118
1119static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1120_mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
1121 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1122 (__v8di)_mm512_max_epi64(__A, __B),
1123 (__v8di)_mm512_setzero_si512());
1124}
1125
1126static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1127_mm512_max_epu64(__m512i __A, __m512i __B) {
1128 return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1129}
1130
1131static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1132_mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1133 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1134 (__v8di)_mm512_max_epu64(__A, __B),
1135 (__v8di)__W);
1136}
1137
1138static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1139_mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
1140 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1141 (__v8di)_mm512_max_epu64(__A, __B),
1142 (__v8di)_mm512_setzero_si512());
1143}
1144
1145#define _mm512_min_round_pd(A, B, R) \
1146 ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1147 (__v8df)(__m512d)(B), (int)(R)))
1148
1149#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1150 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1151 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1152 (__v8df)(W)))
1153
1154#define _mm512_maskz_min_round_pd(U, A, B, R) \
1155 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1156 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1157 (__v8df)_mm512_setzero_pd()))
1158
1159static __inline__ __m512d __DEFAULT_FN_ATTRS512
1160_mm512_min_pd(__m512d __A, __m512d __B)
1161{
1162 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1164}
1165
1166static __inline__ __m512d __DEFAULT_FN_ATTRS512
1167_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1168{
1169 return (__m512d)__builtin_ia32_selectpd_512(__U,
1170 (__v8df)_mm512_min_pd(__A, __B),
1171 (__v8df)__W);
1172}
1173
1174static __inline__ __m512d __DEFAULT_FN_ATTRS512
1175_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1176{
1177 return (__m512d)__builtin_ia32_selectpd_512(__U,
1178 (__v8df)_mm512_min_pd(__A, __B),
1179 (__v8df)_mm512_setzero_pd());
1180}
1181
1182#define _mm512_min_round_ps(A, B, R) \
1183 ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1184 (__v16sf)(__m512)(B), (int)(R)))
1185
1186#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1187 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1188 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1189 (__v16sf)(W)))
1190
1191#define _mm512_maskz_min_round_ps(U, A, B, R) \
1192 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1193 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1194 (__v16sf)_mm512_setzero_ps()))
1195
1196static __inline__ __m512 __DEFAULT_FN_ATTRS512
1197_mm512_min_ps(__m512 __A, __m512 __B)
1198{
1199 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1201}
1202
1203static __inline__ __m512 __DEFAULT_FN_ATTRS512
1204_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1205{
1206 return (__m512)__builtin_ia32_selectps_512(__U,
1207 (__v16sf)_mm512_min_ps(__A, __B),
1208 (__v16sf)__W);
1209}
1210
1211static __inline__ __m512 __DEFAULT_FN_ATTRS512
1212_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1213{
1214 return (__m512)__builtin_ia32_selectps_512(__U,
1215 (__v16sf)_mm512_min_ps(__A, __B),
1216 (__v16sf)_mm512_setzero_ps());
1217}
1218
1219static __inline__ __m128 __DEFAULT_FN_ATTRS128
1220_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1221 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1222 (__v4sf) __B,
1223 (__v4sf) __W,
1224 (__mmask8) __U,
1226}
1227
1228static __inline__ __m128 __DEFAULT_FN_ATTRS128
1229_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1230 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1231 (__v4sf) __B,
1232 (__v4sf) _mm_setzero_ps (),
1233 (__mmask8) __U,
1235}
1236
1237#define _mm_min_round_ss(A, B, R) \
1238 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1239 (__v4sf)(__m128)(B), \
1240 (__v4sf)_mm_setzero_ps(), \
1241 (__mmask8)-1, (int)(R)))
1242
1243#define _mm_mask_min_round_ss(W, U, A, B, R) \
1244 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1245 (__v4sf)(__m128)(B), \
1246 (__v4sf)(__m128)(W), (__mmask8)(U), \
1247 (int)(R)))
1248
1249#define _mm_maskz_min_round_ss(U, A, B, R) \
1250 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1251 (__v4sf)(__m128)(B), \
1252 (__v4sf)_mm_setzero_ps(), \
1253 (__mmask8)(U), (int)(R)))
1254
1255static __inline__ __m128d __DEFAULT_FN_ATTRS128
1256_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1257 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1258 (__v2df) __B,
1259 (__v2df) __W,
1260 (__mmask8) __U,
1262}
1263
1264static __inline__ __m128d __DEFAULT_FN_ATTRS128
1265_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1266 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1267 (__v2df) __B,
1268 (__v2df) _mm_setzero_pd (),
1269 (__mmask8) __U,
1271}
1272
1273#define _mm_min_round_sd(A, B, R) \
1274 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1275 (__v2df)(__m128d)(B), \
1276 (__v2df)_mm_setzero_pd(), \
1277 (__mmask8)-1, (int)(R)))
1278
1279#define _mm_mask_min_round_sd(W, U, A, B, R) \
1280 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1281 (__v2df)(__m128d)(B), \
1282 (__v2df)(__m128d)(W), \
1283 (__mmask8)(U), (int)(R)))
1284
1285#define _mm_maskz_min_round_sd(U, A, B, R) \
1286 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1287 (__v2df)(__m128d)(B), \
1288 (__v2df)_mm_setzero_pd(), \
1289 (__mmask8)(U), (int)(R)))
1290
1291static __inline __m512i
1293 return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1294}
1295
1296static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1297_mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1298 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1299 (__v16si)_mm512_min_epi32(__A, __B),
1300 (__v16si)__W);
1301}
1302
1303static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1304_mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1305 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1306 (__v16si)_mm512_min_epi32(__A, __B),
1307 (__v16si)_mm512_setzero_si512());
1308}
1309
1310static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1311_mm512_min_epu32(__m512i __A, __m512i __B) {
1312 return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1313}
1314
1315static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1316_mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1317 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1318 (__v16si)_mm512_min_epu32(__A, __B),
1319 (__v16si)__W);
1320}
1321
1322static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1323_mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
1324 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1325 (__v16si)_mm512_min_epu32(__A, __B),
1326 (__v16si)_mm512_setzero_si512());
1327}
1328
1329static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1330_mm512_min_epi64(__m512i __A, __m512i __B) {
1331 return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1332}
1333
1334static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1335_mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1336 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1337 (__v8di)_mm512_min_epi64(__A, __B),
1338 (__v8di)__W);
1339}
1340
1341static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1342_mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
1343 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1344 (__v8di)_mm512_min_epi64(__A, __B),
1345 (__v8di)_mm512_setzero_si512());
1346}
1347
1348static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1349_mm512_min_epu64(__m512i __A, __m512i __B) {
1350 return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1351}
1352
1353static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1354_mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1355 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1356 (__v8di)_mm512_min_epu64(__A, __B),
1357 (__v8di)__W);
1358}
1359
1360static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1361_mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
1362 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1363 (__v8di)_mm512_min_epu64(__A, __B),
1364 (__v8di)_mm512_setzero_si512());
1365}
1366
1367static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1368_mm512_mul_epi32(__m512i __X, __m512i __Y) {
1369 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1370}
1371
1372static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1373_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
1374 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1375 (__v8di)_mm512_mul_epi32(__X, __Y),
1376 (__v8di)__W);
1377}
1378
1379static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1380_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) {
1381 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1382 (__v8di)_mm512_mul_epi32(__X, __Y),
1383 (__v8di)_mm512_setzero_si512 ());
1384}
1385
1386static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1387_mm512_mul_epu32(__m512i __X, __m512i __Y) {
1388 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1389}
1390
1391static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1392_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
1393 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1394 (__v8di)_mm512_mul_epu32(__X, __Y),
1395 (__v8di)__W);
1396}
1397
1398static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1399_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) {
1400 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1401 (__v8di)_mm512_mul_epu32(__X, __Y),
1402 (__v8di)_mm512_setzero_si512 ());
1403}
1404
1405static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1406_mm512_mullo_epi32(__m512i __A, __m512i __B) {
1407 return (__m512i) ((__v16su) __A * (__v16su) __B);
1408}
1409
1410static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1411_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1412 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1413 (__v16si)_mm512_mullo_epi32(__A, __B),
1414 (__v16si)_mm512_setzero_si512());
1415}
1416
1417static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1418_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1419 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1420 (__v16si)_mm512_mullo_epi32(__A, __B),
1421 (__v16si)__W);
1422}
1423
1424static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1425_mm512_mullox_epi64(__m512i __A, __m512i __B) {
1426 return (__m512i) ((__v8du) __A * (__v8du) __B);
1427}
1428
1429static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1430_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1431 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1432 (__v8di)_mm512_mullox_epi64(__A, __B),
1433 (__v8di)__W);
1434}
1435
1436#define _mm512_sqrt_round_pd(A, R) \
1437 ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1438
1439#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1440 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1441 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1442 (__v8df)(__m512d)(W)))
1443
1444#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1445 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1446 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1447 (__v8df)_mm512_setzero_pd()))
1448
1449static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A) {
1450 return (__m512d)__builtin_elementwise_sqrt((__v8df)__A);
1451}
1452
1453static __inline__ __m512d __DEFAULT_FN_ATTRS512
1454_mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A) {
1455 return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A),
1456 (__v8df)__W);
1457}
1458
1459static __inline__ __m512d __DEFAULT_FN_ATTRS512
1461 return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A),
1462 (__v8df)_mm512_setzero_pd());
1463}
1464
1465#define _mm512_sqrt_round_ps(A, R) \
1466 ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1467
1468#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1469 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1470 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1471 (__v16sf)(__m512)(W)))
1472
1473#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1474 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1475 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1476 (__v16sf)_mm512_setzero_ps()))
1477
1478static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A) {
1479 return (__m512)__builtin_elementwise_sqrt((__v16sf)__A);
1480}
1481
1482static __inline__ __m512 __DEFAULT_FN_ATTRS512
1483_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) {
1484 return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A),
1485 (__v16sf)__W);
1486}
1487
1488static __inline__ __m512 __DEFAULT_FN_ATTRS512
1490 return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A),
1491 (__v16sf)_mm512_setzero_ps());
1492}
1493
1494static __inline__ __m512d __DEFAULT_FN_ATTRS512
1496{
1497 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1498 (__v8df)
1500 (__mmask8) -1);}
1501
1502static __inline__ __m512d __DEFAULT_FN_ATTRS512
1503_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1504{
1505 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1506 (__v8df) __W,
1507 (__mmask8) __U);
1508}
1509
1510static __inline__ __m512d __DEFAULT_FN_ATTRS512
1512{
1513 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1514 (__v8df)
1516 (__mmask8) __U);
1517}
1518
1519static __inline__ __m512 __DEFAULT_FN_ATTRS512
1521{
1522 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1523 (__v16sf)
1525 (__mmask16) -1);
1526}
1527
1528static __inline__ __m512 __DEFAULT_FN_ATTRS512
1529_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1530{
1531 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1532 (__v16sf) __W,
1533 (__mmask16) __U);
1534}
1535
1536static __inline__ __m512 __DEFAULT_FN_ATTRS512
1538{
1539 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1540 (__v16sf)
1542 (__mmask16) __U);
1543}
1544
1545static __inline__ __m128 __DEFAULT_FN_ATTRS128
1546_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1547{
1548 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1549 (__v4sf) __B,
1550 (__v4sf)
1551 _mm_setzero_ps (),
1552 (__mmask8) -1);
1553}
1554
1555static __inline__ __m128 __DEFAULT_FN_ATTRS128
1556_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1557{
1558 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1559 (__v4sf) __B,
1560 (__v4sf) __W,
1561 (__mmask8) __U);
1562}
1563
1564static __inline__ __m128 __DEFAULT_FN_ATTRS128
1565_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1566{
1567 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1568 (__v4sf) __B,
1569 (__v4sf) _mm_setzero_ps (),
1570 (__mmask8) __U);
1571}
1572
1573static __inline__ __m128d __DEFAULT_FN_ATTRS128
1574_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1575{
1576 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1577 (__v2df) __B,
1578 (__v2df)
1579 _mm_setzero_pd (),
1580 (__mmask8) -1);
1581}
1582
1583static __inline__ __m128d __DEFAULT_FN_ATTRS128
1584_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1585{
1586 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1587 (__v2df) __B,
1588 (__v2df) __W,
1589 (__mmask8) __U);
1590}
1591
1592static __inline__ __m128d __DEFAULT_FN_ATTRS128
1593_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1594{
1595 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1596 (__v2df) __B,
1597 (__v2df) _mm_setzero_pd (),
1598 (__mmask8) __U);
1599}
1600
1601static __inline__ __m512d __DEFAULT_FN_ATTRS512
1603{
1604 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1605 (__v8df)
1607 (__mmask8) -1);
1608}
1609
1610static __inline__ __m512d __DEFAULT_FN_ATTRS512
1611_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1612{
1613 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1614 (__v8df) __W,
1615 (__mmask8) __U);
1616}
1617
1618static __inline__ __m512d __DEFAULT_FN_ATTRS512
1620{
1621 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1622 (__v8df)
1624 (__mmask8) __U);
1625}
1626
1627static __inline__ __m512 __DEFAULT_FN_ATTRS512
1629{
1630 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1631 (__v16sf)
1633 (__mmask16) -1);
1634}
1635
1636static __inline__ __m512 __DEFAULT_FN_ATTRS512
1637_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1638{
1639 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1640 (__v16sf) __W,
1641 (__mmask16) __U);
1642}
1643
1644static __inline__ __m512 __DEFAULT_FN_ATTRS512
1646{
1647 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1648 (__v16sf)
1650 (__mmask16) __U);
1651}
1652
1653static __inline__ __m128 __DEFAULT_FN_ATTRS128
1654_mm_rcp14_ss(__m128 __A, __m128 __B)
1655{
1656 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1657 (__v4sf) __B,
1658 (__v4sf)
1659 _mm_setzero_ps (),
1660 (__mmask8) -1);
1661}
1662
1663static __inline__ __m128 __DEFAULT_FN_ATTRS128
1664_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1665{
1666 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1667 (__v4sf) __B,
1668 (__v4sf) __W,
1669 (__mmask8) __U);
1670}
1671
1672static __inline__ __m128 __DEFAULT_FN_ATTRS128
1673_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1674{
1675 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1676 (__v4sf) __B,
1677 (__v4sf) _mm_setzero_ps (),
1678 (__mmask8) __U);
1679}
1680
1681static __inline__ __m128d __DEFAULT_FN_ATTRS128
1682_mm_rcp14_sd(__m128d __A, __m128d __B)
1683{
1684 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1685 (__v2df) __B,
1686 (__v2df)
1687 _mm_setzero_pd (),
1688 (__mmask8) -1);
1689}
1690
1691static __inline__ __m128d __DEFAULT_FN_ATTRS128
1692_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1693{
1694 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1695 (__v2df) __B,
1696 (__v2df) __W,
1697 (__mmask8) __U);
1698}
1699
1700static __inline__ __m128d __DEFAULT_FN_ATTRS128
1701_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1702{
1703 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1704 (__v2df) __B,
1705 (__v2df) _mm_setzero_pd (),
1706 (__mmask8) __U);
1707}
1708
1709static __inline __m512 __DEFAULT_FN_ATTRS512
1711{
1712 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1714 (__v16sf) __A, (unsigned short)-1,
1716}
1717
1718static __inline__ __m512 __DEFAULT_FN_ATTRS512
1719_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1720{
1721 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1723 (__v16sf) __W, __U,
1725}
1726
1727static __inline __m512d __DEFAULT_FN_ATTRS512
1729{
1730 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1732 (__v8df) __A, (unsigned char)-1,
1734}
1735
1736static __inline__ __m512d __DEFAULT_FN_ATTRS512
1737_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1738{
1739 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1741 (__v8df) __W, __U,
1743}
1744
1745static __inline__ __m512 __DEFAULT_FN_ATTRS512
1746_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1747{
1748 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1750 (__v16sf) __W, __U,
1752}
1753
1754static __inline __m512 __DEFAULT_FN_ATTRS512
1756{
1757 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1759 (__v16sf) __A, (unsigned short)-1,
1761}
1762
1763static __inline __m512d __DEFAULT_FN_ATTRS512
1764_mm512_ceil_pd(__m512d __A)
1765{
1766 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1768 (__v8df) __A, (unsigned char)-1,
1770}
1771
1772static __inline__ __m512d __DEFAULT_FN_ATTRS512
1773_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1774{
1775 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1777 (__v8df) __W, __U,
1779}
1780
1781static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1782_mm512_abs_epi64(__m512i __A) {
1783 return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1784}
1785
1786static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1787_mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
1788 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1789 (__v8di)_mm512_abs_epi64(__A),
1790 (__v8di)__W);
1791}
1792
1793static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1795 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1796 (__v8di)_mm512_abs_epi64(__A),
1797 (__v8di)_mm512_setzero_si512());
1798}
1799
1800static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1801_mm512_abs_epi32(__m512i __A) {
1802 return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1803}
1804
1805static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1806_mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
1807 return (__m512i)__builtin_ia32_selectd_512(__U,
1808 (__v16si)_mm512_abs_epi32(__A),
1809 (__v16si)__W);
1810}
1811
1812static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1814 return (__m512i)__builtin_ia32_selectd_512(__U,
1815 (__v16si)_mm512_abs_epi32(__A),
1816 (__v16si)_mm512_setzero_si512());
1817}
1818
1819static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1820_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1821 __A = _mm_add_ss(__A, __B);
1822 return __builtin_ia32_selectss_128(__U, __A, __W);
1823}
1824
1825static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1826_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
1827 __A = _mm_add_ss(__A, __B);
1828 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1829}
1830
1831#define _mm_add_round_ss(A, B, R) \
1832 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1833 (__v4sf)(__m128)(B), \
1834 (__v4sf)_mm_setzero_ps(), \
1835 (__mmask8)-1, (int)(R)))
1836
1837#define _mm_mask_add_round_ss(W, U, A, B, R) \
1838 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1839 (__v4sf)(__m128)(B), \
1840 (__v4sf)(__m128)(W), (__mmask8)(U), \
1841 (int)(R)))
1842
1843#define _mm_maskz_add_round_ss(U, A, B, R) \
1844 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1845 (__v4sf)(__m128)(B), \
1846 (__v4sf)_mm_setzero_ps(), \
1847 (__mmask8)(U), (int)(R)))
1848
1849static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1850_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1851 __A = _mm_add_sd(__A, __B);
1852 return __builtin_ia32_selectsd_128(__U, __A, __W);
1853}
1854
1855static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1856_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
1857 __A = _mm_add_sd(__A, __B);
1858 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1859}
1860#define _mm_add_round_sd(A, B, R) \
1861 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1862 (__v2df)(__m128d)(B), \
1863 (__v2df)_mm_setzero_pd(), \
1864 (__mmask8)-1, (int)(R)))
1865
1866#define _mm_mask_add_round_sd(W, U, A, B, R) \
1867 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1868 (__v2df)(__m128d)(B), \
1869 (__v2df)(__m128d)(W), \
1870 (__mmask8)(U), (int)(R)))
1871
1872#define _mm_maskz_add_round_sd(U, A, B, R) \
1873 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1874 (__v2df)(__m128d)(B), \
1875 (__v2df)_mm_setzero_pd(), \
1876 (__mmask8)(U), (int)(R)))
1877
1878static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1879_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1880 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1881 (__v8df)_mm512_add_pd(__A, __B),
1882 (__v8df)__W);
1883}
1884
1885static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1886_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1887 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1888 (__v8df)_mm512_add_pd(__A, __B),
1889 (__v8df)_mm512_setzero_pd());
1890}
1891
1892static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1893_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1894 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1895 (__v16sf)_mm512_add_ps(__A, __B),
1896 (__v16sf)__W);
1897}
1898
1899static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1900_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1901 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1902 (__v16sf)_mm512_add_ps(__A, __B),
1903 (__v16sf)_mm512_setzero_ps());
1904}
1905
1906#define _mm512_add_round_pd(A, B, R) \
1907 ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1908 (__v8df)(__m512d)(B), (int)(R)))
1909
1910#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1911 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1912 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1913 (__v8df)(__m512d)(W)))
1914
1915#define _mm512_maskz_add_round_pd(U, A, B, R) \
1916 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1917 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1918 (__v8df)_mm512_setzero_pd()))
1919
1920#define _mm512_add_round_ps(A, B, R) \
1921 ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1922 (__v16sf)(__m512)(B), (int)(R)))
1923
1924#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1925 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1926 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1927 (__v16sf)(__m512)(W)))
1928
1929#define _mm512_maskz_add_round_ps(U, A, B, R) \
1930 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1931 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1932 (__v16sf)_mm512_setzero_ps()))
1933
1934static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1935_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1936 __A = _mm_sub_ss(__A, __B);
1937 return __builtin_ia32_selectss_128(__U, __A, __W);
1938}
1939
1940static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1941_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
1942 __A = _mm_sub_ss(__A, __B);
1943 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1944}
1945#define _mm_sub_round_ss(A, B, R) \
1946 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1947 (__v4sf)(__m128)(B), \
1948 (__v4sf)_mm_setzero_ps(), \
1949 (__mmask8)-1, (int)(R)))
1950
1951#define _mm_mask_sub_round_ss(W, U, A, B, R) \
1952 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1953 (__v4sf)(__m128)(B), \
1954 (__v4sf)(__m128)(W), (__mmask8)(U), \
1955 (int)(R)))
1956
1957#define _mm_maskz_sub_round_ss(U, A, B, R) \
1958 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1959 (__v4sf)(__m128)(B), \
1960 (__v4sf)_mm_setzero_ps(), \
1961 (__mmask8)(U), (int)(R)))
1962
1963static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1964_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1965 __A = _mm_sub_sd(__A, __B);
1966 return __builtin_ia32_selectsd_128(__U, __A, __W);
1967}
1968
1969static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1970_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
1971 __A = _mm_sub_sd(__A, __B);
1972 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1973}
1974
1975#define _mm_sub_round_sd(A, B, R) \
1976 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1977 (__v2df)(__m128d)(B), \
1978 (__v2df)_mm_setzero_pd(), \
1979 (__mmask8)-1, (int)(R)))
1980
1981#define _mm_mask_sub_round_sd(W, U, A, B, R) \
1982 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1983 (__v2df)(__m128d)(B), \
1984 (__v2df)(__m128d)(W), \
1985 (__mmask8)(U), (int)(R)))
1986
1987#define _mm_maskz_sub_round_sd(U, A, B, R) \
1988 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1989 (__v2df)(__m128d)(B), \
1990 (__v2df)_mm_setzero_pd(), \
1991 (__mmask8)(U), (int)(R)))
1992
1993static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1994_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1995 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1996 (__v8df)_mm512_sub_pd(__A, __B),
1997 (__v8df)__W);
1998}
1999
2000static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2001_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2002 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2003 (__v8df)_mm512_sub_pd(__A, __B),
2004 (__v8df)_mm512_setzero_pd());
2005}
2006
2007static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2008_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2009 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2010 (__v16sf)_mm512_sub_ps(__A, __B),
2011 (__v16sf)__W);
2012}
2013
2014static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2015_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2016 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2017 (__v16sf)_mm512_sub_ps(__A, __B),
2018 (__v16sf)_mm512_setzero_ps());
2019}
2020
2021#define _mm512_sub_round_pd(A, B, R) \
2022 ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2023 (__v8df)(__m512d)(B), (int)(R)))
2024
2025#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2026 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2027 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2028 (__v8df)(__m512d)(W)))
2029
2030#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2031 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2032 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2033 (__v8df)_mm512_setzero_pd()))
2034
2035#define _mm512_sub_round_ps(A, B, R) \
2036 ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2037 (__v16sf)(__m512)(B), (int)(R)))
2038
2039#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2040 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2041 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2042 (__v16sf)(__m512)(W)))
2043
2044#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2045 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2046 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2047 (__v16sf)_mm512_setzero_ps()))
2048
2049static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2050_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2051 __A = _mm_mul_ss(__A, __B);
2052 return __builtin_ia32_selectss_128(__U, __A, __W);
2053}
2054
2055static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2056_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
2057 __A = _mm_mul_ss(__A, __B);
2058 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2059}
2060#define _mm_mul_round_ss(A, B, R) \
2061 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2062 (__v4sf)(__m128)(B), \
2063 (__v4sf)_mm_setzero_ps(), \
2064 (__mmask8)-1, (int)(R)))
2065
2066#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2067 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2068 (__v4sf)(__m128)(B), \
2069 (__v4sf)(__m128)(W), (__mmask8)(U), \
2070 (int)(R)))
2071
2072#define _mm_maskz_mul_round_ss(U, A, B, R) \
2073 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2074 (__v4sf)(__m128)(B), \
2075 (__v4sf)_mm_setzero_ps(), \
2076 (__mmask8)(U), (int)(R)))
2077
2078static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2079_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2080 __A = _mm_mul_sd(__A, __B);
2081 return __builtin_ia32_selectsd_128(__U, __A, __W);
2082}
2083
2084static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2085_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
2086 __A = _mm_mul_sd(__A, __B);
2087 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2088}
2089
2090#define _mm_mul_round_sd(A, B, R) \
2091 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2092 (__v2df)(__m128d)(B), \
2093 (__v2df)_mm_setzero_pd(), \
2094 (__mmask8)-1, (int)(R)))
2095
2096#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2097 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2098 (__v2df)(__m128d)(B), \
2099 (__v2df)(__m128d)(W), \
2100 (__mmask8)(U), (int)(R)))
2101
2102#define _mm_maskz_mul_round_sd(U, A, B, R) \
2103 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2104 (__v2df)(__m128d)(B), \
2105 (__v2df)_mm_setzero_pd(), \
2106 (__mmask8)(U), (int)(R)))
2107
2108static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2109_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2110 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2111 (__v8df)_mm512_mul_pd(__A, __B),
2112 (__v8df)__W);
2113}
2114
2115static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2116_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2117 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2118 (__v8df)_mm512_mul_pd(__A, __B),
2119 (__v8df)_mm512_setzero_pd());
2120}
2121
2122static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2123_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2124 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2125 (__v16sf)_mm512_mul_ps(__A, __B),
2126 (__v16sf)__W);
2127}
2128
2129static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2130_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2131 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2132 (__v16sf)_mm512_mul_ps(__A, __B),
2133 (__v16sf)_mm512_setzero_ps());
2134}
2135
2136#define _mm512_mul_round_pd(A, B, R) \
2137 ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2138 (__v8df)(__m512d)(B), (int)(R)))
2139
2140#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2141 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2142 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2143 (__v8df)(__m512d)(W)))
2144
2145#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2146 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2147 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2148 (__v8df)_mm512_setzero_pd()))
2149
2150#define _mm512_mul_round_ps(A, B, R) \
2151 ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2152 (__v16sf)(__m512)(B), (int)(R)))
2153
2154#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2155 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2156 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2157 (__v16sf)(__m512)(W)))
2158
2159#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2160 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2161 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2162 (__v16sf)_mm512_setzero_ps()))
2163
2164static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2165_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2166 __A = _mm_div_ss(__A, __B);
2167 return __builtin_ia32_selectss_128(__U, __A, __W);
2168}
2169
2170static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2171_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
2172 __A = _mm_div_ss(__A, __B);
2173 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2174}
2175
2176#define _mm_div_round_ss(A, B, R) \
2177 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2178 (__v4sf)(__m128)(B), \
2179 (__v4sf)_mm_setzero_ps(), \
2180 (__mmask8)-1, (int)(R)))
2181
2182#define _mm_mask_div_round_ss(W, U, A, B, R) \
2183 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2184 (__v4sf)(__m128)(B), \
2185 (__v4sf)(__m128)(W), (__mmask8)(U), \
2186 (int)(R)))
2187
2188#define _mm_maskz_div_round_ss(U, A, B, R) \
2189 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2190 (__v4sf)(__m128)(B), \
2191 (__v4sf)_mm_setzero_ps(), \
2192 (__mmask8)(U), (int)(R)))
2193
2194static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2195_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2196 __A = _mm_div_sd(__A, __B);
2197 return __builtin_ia32_selectsd_128(__U, __A, __W);
2198}
2199
2200static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2201_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
2202 __A = _mm_div_sd(__A, __B);
2203 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2204}
2205
2206#define _mm_div_round_sd(A, B, R) \
2207 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2208 (__v2df)(__m128d)(B), \
2209 (__v2df)_mm_setzero_pd(), \
2210 (__mmask8)-1, (int)(R)))
2211
2212#define _mm_mask_div_round_sd(W, U, A, B, R) \
2213 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2214 (__v2df)(__m128d)(B), \
2215 (__v2df)(__m128d)(W), \
2216 (__mmask8)(U), (int)(R)))
2217
2218#define _mm_maskz_div_round_sd(U, A, B, R) \
2219 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2220 (__v2df)(__m128d)(B), \
2221 (__v2df)_mm_setzero_pd(), \
2222 (__mmask8)(U), (int)(R)))
2223
2224static __inline __m512d
2226 return (__m512d)((__v8df)__a/(__v8df)__b);
2227}
2228
2229static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2230_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2231 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2232 (__v8df)_mm512_div_pd(__A, __B),
2233 (__v8df)__W);
2234}
2235
2236static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2237_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2238 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2239 (__v8df)_mm512_div_pd(__A, __B),
2240 (__v8df)_mm512_setzero_pd());
2241}
2242
2243static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2244_mm512_div_ps(__m512 __a, __m512 __b) {
2245 return (__m512)((__v16sf)__a/(__v16sf)__b);
2246}
2247
2248static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2249_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2250 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2251 (__v16sf)_mm512_div_ps(__A, __B),
2252 (__v16sf)__W);
2253}
2254
2255static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2256_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2257 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2258 (__v16sf)_mm512_div_ps(__A, __B),
2259 (__v16sf)_mm512_setzero_ps());
2260}
2261
2262#define _mm512_div_round_pd(A, B, R) \
2263 ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2264 (__v8df)(__m512d)(B), (int)(R)))
2265
2266#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2267 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2268 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2269 (__v8df)(__m512d)(W)))
2270
2271#define _mm512_maskz_div_round_pd(U, A, B, R) \
2272 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2273 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2274 (__v8df)_mm512_setzero_pd()))
2275
2276#define _mm512_div_round_ps(A, B, R) \
2277 ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2278 (__v16sf)(__m512)(B), (int)(R)))
2279
2280#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2281 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2282 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2283 (__v16sf)(__m512)(W)))
2284
2285#define _mm512_maskz_div_round_ps(U, A, B, R) \
2286 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2287 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2288 (__v16sf)_mm512_setzero_ps()))
2289
2290#define _mm512_roundscale_ps(A, B) \
2291 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2292 (__v16sf)_mm512_undefined_ps(), \
2293 (__mmask16)-1, \
2294 _MM_FROUND_CUR_DIRECTION))
2295
2296#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2297 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2298 (__v16sf)(__m512)(A), (__mmask16)(B), \
2299 _MM_FROUND_CUR_DIRECTION))
2300
2301#define _mm512_maskz_roundscale_ps(A, B, imm) \
2302 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2303 (__v16sf)_mm512_setzero_ps(), \
2304 (__mmask16)(A), \
2305 _MM_FROUND_CUR_DIRECTION))
2306
2307#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2308 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2309 (__v16sf)(__m512)(A), (__mmask16)(B), \
2310 (int)(R)))
2311
2312#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2313 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2314 (__v16sf)_mm512_setzero_ps(), \
2315 (__mmask16)(A), (int)(R)))
2316
2317#define _mm512_roundscale_round_ps(A, imm, R) \
2318 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2319 (__v16sf)_mm512_undefined_ps(), \
2320 (__mmask16)-1, (int)(R)))
2321
2322#define _mm512_roundscale_pd(A, B) \
2323 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2324 (__v8df)_mm512_undefined_pd(), \
2325 (__mmask8)-1, \
2326 _MM_FROUND_CUR_DIRECTION))
2327
2328#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2329 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2330 (__v8df)(__m512d)(A), (__mmask8)(B), \
2331 _MM_FROUND_CUR_DIRECTION))
2332
2333#define _mm512_maskz_roundscale_pd(A, B, imm) \
2334 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2335 (__v8df)_mm512_setzero_pd(), \
2336 (__mmask8)(A), \
2337 _MM_FROUND_CUR_DIRECTION))
2338
2339#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2340 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2341 (__v8df)(__m512d)(A), (__mmask8)(B), \
2342 (int)(R)))
2343
2344#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2345 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2346 (__v8df)_mm512_setzero_pd(), \
2347 (__mmask8)(A), (int)(R)))
2348
2349#define _mm512_roundscale_round_pd(A, imm, R) \
2350 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2351 (__v8df)_mm512_undefined_pd(), \
2352 (__mmask8)-1, (int)(R)))
2353
2354#define _mm512_fmadd_round_pd(A, B, C, R) \
2355 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2356 (__v8df)(__m512d)(B), \
2357 (__v8df)(__m512d)(C), \
2358 (__mmask8)-1, (int)(R)))
2359
2360
2361#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2362 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2363 (__v8df)(__m512d)(B), \
2364 (__v8df)(__m512d)(C), \
2365 (__mmask8)(U), (int)(R)))
2366
2367
2368#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2369 ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2370 (__v8df)(__m512d)(B), \
2371 (__v8df)(__m512d)(C), \
2372 (__mmask8)(U), (int)(R)))
2373
2374
2375#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2376 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2377 (__v8df)(__m512d)(B), \
2378 (__v8df)(__m512d)(C), \
2379 (__mmask8)(U), (int)(R)))
2380
2381
2382#define _mm512_fmsub_round_pd(A, B, C, R) \
2383 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2384 (__v8df)(__m512d)(B), \
2385 -(__v8df)(__m512d)(C), \
2386 (__mmask8)-1, (int)(R)))
2387
2388
2389#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2390 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2391 (__v8df)(__m512d)(B), \
2392 -(__v8df)(__m512d)(C), \
2393 (__mmask8)(U), (int)(R)))
2394
2395
2396#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2397 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2398 (__v8df)(__m512d)(B), \
2399 -(__v8df)(__m512d)(C), \
2400 (__mmask8)(U), (int)(R)))
2401
2402
2403#define _mm512_fnmadd_round_pd(A, B, C, R) \
2404 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2405 (__v8df)(__m512d)(B), \
2406 (__v8df)(__m512d)(C), \
2407 (__mmask8)-1, (int)(R)))
2408
2409
2410#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2411 ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2412 (__v8df)(__m512d)(B), \
2413 (__v8df)(__m512d)(C), \
2414 (__mmask8)(U), (int)(R)))
2415
2416
2417#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2418 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2419 (__v8df)(__m512d)(B), \
2420 (__v8df)(__m512d)(C), \
2421 (__mmask8)(U), (int)(R)))
2422
2423
2424#define _mm512_fnmsub_round_pd(A, B, C, R) \
2425 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2426 (__v8df)(__m512d)(B), \
2427 -(__v8df)(__m512d)(C), \
2428 (__mmask8)-1, (int)(R)))
2429
2430
2431#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2432 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2433 (__v8df)(__m512d)(B), \
2434 -(__v8df)(__m512d)(C), \
2435 (__mmask8)(U), (int)(R)))
2436
2437static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2438_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
2439 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2440 (__v8df)__C);
2441}
2442
2443static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2444_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2445 return (__m512d)__builtin_ia32_selectpd_512(
2446 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__A);
2447}
2448
2449static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2450_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2451 return (__m512d)__builtin_ia32_selectpd_512(
2452 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__C);
2453}
2454
2455static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2456_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2457 return (__m512d)__builtin_ia32_selectpd_512(
2458 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C),
2459 (__v8df)_mm512_setzero_pd());
2460}
2461
2462static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2463_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
2464 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2465 -(__v8df)__C);
2466}
2467
2468static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2469_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2470 return (__m512d)__builtin_ia32_selectpd_512(
2471 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__A);
2472}
2473
2474static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2475_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2476 return (__m512d)__builtin_ia32_selectpd_512(
2477 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__C);
2478}
2479
2480static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2481_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2482 return (__m512d)__builtin_ia32_selectpd_512(
2483 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C),
2484 (__v8df)_mm512_setzero_pd());
2485}
2486
2487static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2488_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
2489 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2490 (__v8df)__C);
2491}
2492
2493static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2494_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2495 return (__m512d)__builtin_ia32_selectpd_512(
2496 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__A);
2497}
2498
2499static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2500_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2501 return (__m512d)__builtin_ia32_selectpd_512(
2502 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__C);
2503}
2504
2505static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2506_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2507 return (__m512d)__builtin_ia32_selectpd_512(
2508 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C),
2509 (__v8df)_mm512_setzero_pd());
2510}
2511
2512static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2513_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
2514 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2515 -(__v8df)__C);
2516}
2517
2518static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2519_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2520 return (__m512d)__builtin_ia32_selectpd_512(
2521 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__A);
2522}
2523
2524static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2525_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2526 return (__m512d)__builtin_ia32_selectpd_512(
2527 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__C);
2528}
2529
2530static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2531_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2532 return (__m512d)__builtin_ia32_selectpd_512(
2533 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C),
2534 (__v8df)_mm512_setzero_pd());
2535}
2536
2537#define _mm512_fmadd_round_ps(A, B, C, R) \
2538 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2539 (__v16sf)(__m512)(B), \
2540 (__v16sf)(__m512)(C), \
2541 (__mmask16)-1, (int)(R)))
2542
2543
2544#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2545 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2546 (__v16sf)(__m512)(B), \
2547 (__v16sf)(__m512)(C), \
2548 (__mmask16)(U), (int)(R)))
2549
2550
2551#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2552 ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2553 (__v16sf)(__m512)(B), \
2554 (__v16sf)(__m512)(C), \
2555 (__mmask16)(U), (int)(R)))
2556
2557
2558#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2559 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2560 (__v16sf)(__m512)(B), \
2561 (__v16sf)(__m512)(C), \
2562 (__mmask16)(U), (int)(R)))
2563
2564
2565#define _mm512_fmsub_round_ps(A, B, C, R) \
2566 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2567 (__v16sf)(__m512)(B), \
2568 -(__v16sf)(__m512)(C), \
2569 (__mmask16)-1, (int)(R)))
2570
2571
2572#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2573 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2574 (__v16sf)(__m512)(B), \
2575 -(__v16sf)(__m512)(C), \
2576 (__mmask16)(U), (int)(R)))
2577
2578
2579#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2580 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2581 (__v16sf)(__m512)(B), \
2582 -(__v16sf)(__m512)(C), \
2583 (__mmask16)(U), (int)(R)))
2584
2585
2586#define _mm512_fnmadd_round_ps(A, B, C, R) \
2587 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2588 -(__v16sf)(__m512)(B), \
2589 (__v16sf)(__m512)(C), \
2590 (__mmask16)-1, (int)(R)))
2591
2592
2593#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2594 ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2595 (__v16sf)(__m512)(B), \
2596 (__v16sf)(__m512)(C), \
2597 (__mmask16)(U), (int)(R)))
2598
2599
2600#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2601 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2602 (__v16sf)(__m512)(B), \
2603 (__v16sf)(__m512)(C), \
2604 (__mmask16)(U), (int)(R)))
2605
2606
2607#define _mm512_fnmsub_round_ps(A, B, C, R) \
2608 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2609 -(__v16sf)(__m512)(B), \
2610 -(__v16sf)(__m512)(C), \
2611 (__mmask16)-1, (int)(R)))
2612
2613
2614#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2615 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2616 (__v16sf)(__m512)(B), \
2617 -(__v16sf)(__m512)(C), \
2618 (__mmask16)(U), (int)(R)))
2619
2620static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2621_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
2622 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2623 (__v16sf)__C);
2624}
2625
2626static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2627_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2628 return (__m512)__builtin_ia32_selectps_512(
2629 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__A);
2630}
2631
2632static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2633_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2634 return (__m512)__builtin_ia32_selectps_512(
2635 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__C);
2636}
2637
2638static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2639_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2640 return (__m512)__builtin_ia32_selectps_512(
2641 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C),
2642 (__v16sf)_mm512_setzero_ps());
2643}
2644
2645static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2646_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
2647 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2648 -(__v16sf)__C);
2649}
2650
2651static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2652_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2653 return (__m512)__builtin_ia32_selectps_512(
2654 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__A);
2655}
2656
2657static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2658_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2659 return (__m512)__builtin_ia32_selectps_512(
2660 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__C);
2661}
2662
2663static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2664_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2665 return (__m512)__builtin_ia32_selectps_512(
2666 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C),
2667 (__v16sf)_mm512_setzero_ps());
2668}
2669
2670static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2671_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
2672 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2673 (__v16sf)__C);
2674}
2675
2676static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2677_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2678 return (__m512)__builtin_ia32_selectps_512(
2679 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__A);
2680}
2681
2682static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2683_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2684 return (__m512)__builtin_ia32_selectps_512(
2685 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__C);
2686}
2687
2688static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2689_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2690 return (__m512)__builtin_ia32_selectps_512(
2691 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C),
2692 (__v16sf)_mm512_setzero_ps());
2693}
2694
2695static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2696_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
2697 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2698 -(__v16sf)__C);
2699}
2700
2701static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2702_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2703 return (__m512)__builtin_ia32_selectps_512(
2704 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__A);
2705}
2706
2707static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2708_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2709 return (__m512)__builtin_ia32_selectps_512(
2710 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__C);
2711}
2712
2713static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2714_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2715 return (__m512)__builtin_ia32_selectps_512(
2716 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C),
2717 (__v16sf)_mm512_setzero_ps());
2718}
2719
2720#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2721 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2722 (__v8df)(__m512d)(B), \
2723 (__v8df)(__m512d)(C), \
2724 (__mmask8)-1, (int)(R)))
2725
2726
2727#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2728 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2729 (__v8df)(__m512d)(B), \
2730 (__v8df)(__m512d)(C), \
2731 (__mmask8)(U), (int)(R)))
2732
2733
2734#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2735 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2736 (__v8df)(__m512d)(B), \
2737 (__v8df)(__m512d)(C), \
2738 (__mmask8)(U), (int)(R)))
2739
2740
2741#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2742 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2743 (__v8df)(__m512d)(B), \
2744 (__v8df)(__m512d)(C), \
2745 (__mmask8)(U), (int)(R)))
2746
2747
2748#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2749 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2750 (__v8df)(__m512d)(B), \
2751 -(__v8df)(__m512d)(C), \
2752 (__mmask8)-1, (int)(R)))
2753
2754
2755#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2756 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2757 (__v8df)(__m512d)(B), \
2758 -(__v8df)(__m512d)(C), \
2759 (__mmask8)(U), (int)(R)))
2760
2761
2762#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2763 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2764 (__v8df)(__m512d)(B), \
2765 -(__v8df)(__m512d)(C), \
2766 (__mmask8)(U), (int)(R)))
2767
2768
2769static __inline__ __m512d __DEFAULT_FN_ATTRS512
2770_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2771{
2772 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2773 (__v8df) __B,
2774 (__v8df) __C,
2775 (__mmask8) -1,
2777}
2778
2779static __inline__ __m512d __DEFAULT_FN_ATTRS512
2780_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2781{
2782 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2783 (__v8df) __B,
2784 (__v8df) __C,
2785 (__mmask8) __U,
2787}
2788
2789static __inline__ __m512d __DEFAULT_FN_ATTRS512
2790_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2791{
2792 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2793 (__v8df) __B,
2794 (__v8df) __C,
2795 (__mmask8) __U,
2797}
2798
2799static __inline__ __m512d __DEFAULT_FN_ATTRS512
2800_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2801{
2802 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2803 (__v8df) __B,
2804 (__v8df) __C,
2805 (__mmask8) __U,
2807}
2808
2809static __inline__ __m512d __DEFAULT_FN_ATTRS512
2810_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2811{
2812 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2813 (__v8df) __B,
2814 -(__v8df) __C,
2815 (__mmask8) -1,
2817}
2818
2819static __inline__ __m512d __DEFAULT_FN_ATTRS512
2820_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2821{
2822 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2823 (__v8df) __B,
2824 -(__v8df) __C,
2825 (__mmask8) __U,
2827}
2828
2829static __inline__ __m512d __DEFAULT_FN_ATTRS512
2830_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2831{
2832 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2833 (__v8df) __B,
2834 -(__v8df) __C,
2835 (__mmask8) __U,
2837}
2838
2839#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2840 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2841 (__v16sf)(__m512)(B), \
2842 (__v16sf)(__m512)(C), \
2843 (__mmask16)-1, (int)(R)))
2844
2845
2846#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2847 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2848 (__v16sf)(__m512)(B), \
2849 (__v16sf)(__m512)(C), \
2850 (__mmask16)(U), (int)(R)))
2851
2852
2853#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2854 ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2855 (__v16sf)(__m512)(B), \
2856 (__v16sf)(__m512)(C), \
2857 (__mmask16)(U), (int)(R)))
2858
2859
2860#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2861 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2862 (__v16sf)(__m512)(B), \
2863 (__v16sf)(__m512)(C), \
2864 (__mmask16)(U), (int)(R)))
2865
2866
2867#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2868 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2869 (__v16sf)(__m512)(B), \
2870 -(__v16sf)(__m512)(C), \
2871 (__mmask16)-1, (int)(R)))
2872
2873
2874#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2875 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2876 (__v16sf)(__m512)(B), \
2877 -(__v16sf)(__m512)(C), \
2878 (__mmask16)(U), (int)(R)))
2879
2880
2881#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2882 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2883 (__v16sf)(__m512)(B), \
2884 -(__v16sf)(__m512)(C), \
2885 (__mmask16)(U), (int)(R)))
2886
2887
2888static __inline__ __m512 __DEFAULT_FN_ATTRS512
2889_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
2890{
2891 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2892 (__v16sf) __B,
2893 (__v16sf) __C,
2894 (__mmask16) -1,
2896}
2897
2898static __inline__ __m512 __DEFAULT_FN_ATTRS512
2899_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2900{
2901 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2902 (__v16sf) __B,
2903 (__v16sf) __C,
2904 (__mmask16) __U,
2906}
2907
2908static __inline__ __m512 __DEFAULT_FN_ATTRS512
2909_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2910{
2911 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2912 (__v16sf) __B,
2913 (__v16sf) __C,
2914 (__mmask16) __U,
2916}
2917
2918static __inline__ __m512 __DEFAULT_FN_ATTRS512
2919_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2920{
2921 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2922 (__v16sf) __B,
2923 (__v16sf) __C,
2924 (__mmask16) __U,
2926}
2927
2928static __inline__ __m512 __DEFAULT_FN_ATTRS512
2929_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
2930{
2931 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2932 (__v16sf) __B,
2933 -(__v16sf) __C,
2934 (__mmask16) -1,
2936}
2937
2938static __inline__ __m512 __DEFAULT_FN_ATTRS512
2939_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2940{
2941 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2942 (__v16sf) __B,
2943 -(__v16sf) __C,
2944 (__mmask16) __U,
2946}
2947
2948static __inline__ __m512 __DEFAULT_FN_ATTRS512
2949_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2950{
2951 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2952 (__v16sf) __B,
2953 -(__v16sf) __C,
2954 (__mmask16) __U,
2956}
2957
2958#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
2959 ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
2960 (__v8df)(__m512d)(B), \
2961 (__v8df)(__m512d)(C), \
2962 (__mmask8)(U), (int)(R)))
2963
2964#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
2965 ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
2966 (__v16sf)(__m512)(B), \
2967 (__v16sf)(__m512)(C), \
2968 (__mmask16)(U), (int)(R)))
2969
2970#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
2971 ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
2972 (__v8df)(__m512d)(B), \
2973 (__v8df)(__m512d)(C), \
2974 (__mmask8)(U), (int)(R)))
2975
2976
2977static __inline__ __m512d __DEFAULT_FN_ATTRS512
2978_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2979{
2980 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2981 (__v8df) __B,
2982 (__v8df) __C,
2983 (__mmask8) __U,
2985}
2986
2987#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
2988 ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
2989 (__v16sf)(__m512)(B), \
2990 (__v16sf)(__m512)(C), \
2991 (__mmask16)(U), (int)(R)))
2992
2993
2994static __inline__ __m512 __DEFAULT_FN_ATTRS512
2995_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2996{
2997 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2998 (__v16sf) __B,
2999 (__v16sf) __C,
3000 (__mmask16) __U,
3002}
3003
3004#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3005 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3006 -(__v8df)(__m512d)(B), \
3007 (__v8df)(__m512d)(C), \
3008 (__mmask8)(U), (int)(R)))
3009
3010#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3011 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3012 -(__v16sf)(__m512)(B), \
3013 (__v16sf)(__m512)(C), \
3014 (__mmask16)(U), (int)(R)))
3015
3016#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3017 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3018 -(__v8df)(__m512d)(B), \
3019 -(__v8df)(__m512d)(C), \
3020 (__mmask8)(U), (int)(R)))
3021
3022
3023#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3024 ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3025 (__v8df)(__m512d)(B), \
3026 (__v8df)(__m512d)(C), \
3027 (__mmask8)(U), (int)(R)))
3028
3029#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3030 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3031 -(__v16sf)(__m512)(B), \
3032 -(__v16sf)(__m512)(C), \
3033 (__mmask16)(U), (int)(R)))
3034
3035
3036#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3037 ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3038 (__v16sf)(__m512)(B), \
3039 (__v16sf)(__m512)(C), \
3040 (__mmask16)(U), (int)(R)))
3041
3042/* Vector permutations */
3043
3044static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3045_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) {
3046 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3047 (__v16si) __B);
3048}
3049
3050static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3051_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3052 __m512i __B) {
3053 return (__m512i)__builtin_ia32_selectd_512(__U,
3054 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3055 (__v16si)__A);
3056}
3057
3058static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3059_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3060 __m512i __B) {
3061 return (__m512i)__builtin_ia32_selectd_512(__U,
3062 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3063 (__v16si)__I);
3064}
3065
3066static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3067_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3068 __m512i __B) {
3069 return (__m512i)__builtin_ia32_selectd_512(__U,
3070 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3071 (__v16si)_mm512_setzero_si512());
3072}
3073
3074static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3075_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) {
3076 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3077 (__v8di) __B);
3078}
3079
3080static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3081_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3082 __m512i __B) {
3083 return (__m512i)__builtin_ia32_selectq_512(__U,
3084 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3085 (__v8di)__A);
3086}
3087
3088static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3089_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3090 __m512i __B) {
3091 return (__m512i)__builtin_ia32_selectq_512(__U,
3092 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3093 (__v8di)__I);
3094}
3095
3096static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3097_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3098 __m512i __B) {
3099 return (__m512i)__builtin_ia32_selectq_512(__U,
3100 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3101 (__v8di)_mm512_setzero_si512());
3102}
3103
3104#define _mm512_alignr_epi64(A, B, I) \
3105 ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3106 (__v8di)(__m512i)(B), (int)(I)))
3107
3108#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3109 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3110 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3111 (__v8di)(__m512i)(W)))
3112
3113#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3114 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3115 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3116 (__v8di)_mm512_setzero_si512()))
3117
3118#define _mm512_alignr_epi32(A, B, I) \
3119 ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3120 (__v16si)(__m512i)(B), (int)(I)))
3121
3122#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3123 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3124 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3125 (__v16si)(__m512i)(W)))
3126
3127#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3128 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3129 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3130 (__v16si)_mm512_setzero_si512()))
3131/* Vector Extract */
3132
3133#define _mm512_extractf64x4_pd(A, I) \
3134 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3135 (__v4df)_mm256_setzero_pd(), \
3136 (__mmask8) - 1))
3137
3138#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3139 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3140 (__v4df)(__m256d)(W), \
3141 (__mmask8)(U)))
3142
3143#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3144 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3145 (__v4df)_mm256_setzero_pd(), \
3146 (__mmask8)(U)))
3147
3148#define _mm512_extractf32x4_ps(A, I) \
3149 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3150 (__v4sf)_mm_setzero_ps(), \
3151 (__mmask8) - 1))
3152
3153#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3154 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3155 (__v4sf)(__m128)(W), \
3156 (__mmask8)(U)))
3157
3158#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3159 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3160 (__v4sf)_mm_setzero_ps(), \
3161 (__mmask8)(U)))
3162
3163/* Vector Blend */
3164
3165static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3166_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
3167 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3168 (__v8df) __W,
3169 (__v8df) __A);
3170}
3171
3172static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3173_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) {
3174 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3175 (__v16sf) __W,
3176 (__v16sf) __A);
3177}
3178
3179static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3180_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
3181 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3182 (__v8di) __W,
3183 (__v8di) __A);
3184}
3185
3186static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3187_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
3188 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3189 (__v16si) __W,
3190 (__v16si) __A);
3191}
3192
3193/* Compare */
3194
3195#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3196 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3197 (__v16sf)(__m512)(B), (int)(P), \
3198 (__mmask16)-1, (int)(R)))
3199
3200#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3201 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3202 (__v16sf)(__m512)(B), (int)(P), \
3203 (__mmask16)(U), (int)(R)))
3204
3205#define _mm512_cmp_ps_mask(A, B, P) \
3206 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3207#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3208 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3209
3210#define _mm512_cmpeq_ps_mask(A, B) \
3211 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3212#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3213 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3214
3215#define _mm512_cmplt_ps_mask(A, B) \
3216 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3217#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3218 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3219
3220#define _mm512_cmple_ps_mask(A, B) \
3221 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3222#define _mm512_mask_cmple_ps_mask(k, A, B) \
3223 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3224
3225#define _mm512_cmpunord_ps_mask(A, B) \
3226 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3227#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3228 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3229
3230#define _mm512_cmpneq_ps_mask(A, B) \
3231 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3232#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3233 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3234
3235#define _mm512_cmpnlt_ps_mask(A, B) \
3236 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3237#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3238 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3239
3240#define _mm512_cmpnle_ps_mask(A, B) \
3241 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3242#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3243 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3244
3245#define _mm512_cmpord_ps_mask(A, B) \
3246 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3247#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3248 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3249
3250#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3251 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3252 (__v8df)(__m512d)(B), (int)(P), \
3253 (__mmask8)-1, (int)(R)))
3254
3255#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3256 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3257 (__v8df)(__m512d)(B), (int)(P), \
3258 (__mmask8)(U), (int)(R)))
3259
3260#define _mm512_cmp_pd_mask(A, B, P) \
3261 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3262#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3263 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3264
3265#define _mm512_cmpeq_pd_mask(A, B) \
3266 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3267#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3268 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3269
3270#define _mm512_cmplt_pd_mask(A, B) \
3271 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3272#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3273 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3274
3275#define _mm512_cmple_pd_mask(A, B) \
3276 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3277#define _mm512_mask_cmple_pd_mask(k, A, B) \
3278 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3279
3280#define _mm512_cmpunord_pd_mask(A, B) \
3281 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3282#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3283 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3284
3285#define _mm512_cmpneq_pd_mask(A, B) \
3286 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3287#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3288 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3289
3290#define _mm512_cmpnlt_pd_mask(A, B) \
3291 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3292#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3293 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3294
3295#define _mm512_cmpnle_pd_mask(A, B) \
3296 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3297#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3298 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3299
3300#define _mm512_cmpord_pd_mask(A, B) \
3301 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3302#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3303 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3304
3305/* Conversion */
3306
3307#define _mm512_cvtt_roundps_epu32(A, R) \
3308 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3309 (__v16si)_mm512_undefined_epi32(), \
3310 (__mmask16)-1, (int)(R)))
3311
3312#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3313 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3314 (__v16si)(__m512i)(W), \
3315 (__mmask16)(U), (int)(R)))
3316
3317#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3318 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3319 (__v16si)_mm512_setzero_si512(), \
3320 (__mmask16)(U), (int)(R)))
3321
3322
3323static __inline __m512i __DEFAULT_FN_ATTRS512
3325{
3326 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3327 (__v16si)
3329 (__mmask16) -1,
3331}
3332
3333static __inline__ __m512i __DEFAULT_FN_ATTRS512
3334_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3335{
3336 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3337 (__v16si) __W,
3338 (__mmask16) __U,
3340}
3341
3342static __inline__ __m512i __DEFAULT_FN_ATTRS512
3344{
3345 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3346 (__v16si) _mm512_setzero_si512 (),
3347 (__mmask16) __U,
3349}
3350
3351#define _mm512_cvt_roundepi32_ps(A, R) \
3352 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3353 (__v16sf)_mm512_setzero_ps(), \
3354 (__mmask16)-1, (int)(R)))
3355
3356#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3357 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3358 (__v16sf)(__m512)(W), \
3359 (__mmask16)(U), (int)(R)))
3360
3361#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3362 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3363 (__v16sf)_mm512_setzero_ps(), \
3364 (__mmask16)(U), (int)(R)))
3365
3366#define _mm512_cvt_roundepu32_ps(A, R) \
3367 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3368 (__v16sf)_mm512_setzero_ps(), \
3369 (__mmask16)-1, (int)(R)))
3370
3371#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3372 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3373 (__v16sf)(__m512)(W), \
3374 (__mmask16)(U), (int)(R)))
3375
3376#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3377 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3378 (__v16sf)_mm512_setzero_ps(), \
3379 (__mmask16)(U), (int)(R)))
3380
3381static __inline__ __m512
3383 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3384}
3385
3386static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3387_mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
3388 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3389 (__v16sf)_mm512_cvtepu32_ps(__A),
3390 (__v16sf)__W);
3391}
3392
3393static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3395 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3396 (__v16sf)_mm512_cvtepu32_ps(__A),
3397 (__v16sf)_mm512_setzero_ps());
3398}
3399
3400static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3402 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3403}
3404
3405static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3406_mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
3407 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3408 (__v8df)_mm512_cvtepi32_pd(__A),
3409 (__v8df)__W);
3410}
3411
3412static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3414 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3415 (__v8df)_mm512_cvtepi32_pd(__A),
3416 (__v8df)_mm512_setzero_pd());
3417}
3418
3419static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3421 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3422}
3423
3424static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3425_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
3426 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3427}
3428
3429static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3431 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3432}
3433
3434static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3435_mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
3436 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3437 (__v16sf)_mm512_cvtepi32_ps(__A),
3438 (__v16sf)__W);
3439}
3440
3441static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3443 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3444 (__v16sf)_mm512_cvtepi32_ps(__A),
3445 (__v16sf)_mm512_setzero_ps());
3446}
3447
3448static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3450 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3451}
3452
3453static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3454_mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
3455 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3456 (__v8df)_mm512_cvtepu32_pd(__A),
3457 (__v8df)__W);
3458}
3459
3460static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3462 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3463 (__v8df)_mm512_cvtepu32_pd(__A),
3464 (__v8df)_mm512_setzero_pd());
3465}
3466
3467static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3469 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3470}
3471
3472static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3473_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
3474 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3475}
3476
3477#define _mm512_cvt_roundpd_ps(A, R) \
3478 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3479 (__v8sf)_mm256_setzero_ps(), \
3480 (__mmask8)-1, (int)(R)))
3481
3482#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3483 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3484 (__v8sf)(__m256)(W), (__mmask8)(U), \
3485 (int)(R)))
3486
3487#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3488 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3489 (__v8sf)_mm256_setzero_ps(), \
3490 (__mmask8)(U), (int)(R)))
3491
3492static __inline__ __m256 __DEFAULT_FN_ATTRS512
3493_mm512_cvtpd_ps (__m512d __A)
3494{
3495 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3496 (__v8sf) _mm256_undefined_ps (),
3497 (__mmask8) -1,
3499}
3500
3501static __inline__ __m256 __DEFAULT_FN_ATTRS512
3502_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3503{
3504 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3505 (__v8sf) __W,
3506 (__mmask8) __U,
3508}
3509
3510static __inline__ __m256 __DEFAULT_FN_ATTRS512
3512{
3513 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3514 (__v8sf) _mm256_setzero_ps (),
3515 (__mmask8) __U,
3517}
3518
3519static __inline__ __m512 __DEFAULT_FN_ATTRS512
3521{
3522 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3523 (__v8sf) _mm256_setzero_ps (),
3524 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3525}
3526
3527static __inline__ __m512 __DEFAULT_FN_ATTRS512
3528_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3529{
3530 return (__m512) __builtin_shufflevector (
3532 __U, __A),
3533 (__v8sf) _mm256_setzero_ps (),
3534 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3535}
3536
3537#define _mm512_cvt_roundps_ph(A, I) \
3538 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3539 (__v16hi)_mm256_undefined_si256(), \
3540 (__mmask16)-1))
3541
3542#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3543 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3544 (__v16hi)(__m256i)(U), \
3545 (__mmask16)(W)))
3546
3547#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3548 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3549 (__v16hi)_mm256_setzero_si256(), \
3550 (__mmask16)(W)))
3551
3552#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3553#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3554#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3555
3556#define _mm512_cvt_roundph_ps(A, R) \
3557 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3558 (__v16sf)_mm512_undefined_ps(), \
3559 (__mmask16)-1, (int)(R)))
3560
3561#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3562 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3563 (__v16sf)(__m512)(W), \
3564 (__mmask16)(U), (int)(R)))
3565
3566#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3567 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3568 (__v16sf)_mm512_setzero_ps(), \
3569 (__mmask16)(U), (int)(R)))
3570
3571
3572static __inline __m512 __DEFAULT_FN_ATTRS512
3574{
3575 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3576 (__v16sf)
3578 (__mmask16) -1,
3580}
3581
3582static __inline__ __m512 __DEFAULT_FN_ATTRS512
3583_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3584{
3585 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3586 (__v16sf) __W,
3587 (__mmask16) __U,
3589}
3590
3591static __inline__ __m512 __DEFAULT_FN_ATTRS512
3593{
3594 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3595 (__v16sf) _mm512_setzero_ps (),
3596 (__mmask16) __U,
3598}
3599
3600#define _mm512_cvtt_roundpd_epi32(A, R) \
3601 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3602 (__v8si)_mm256_setzero_si256(), \
3603 (__mmask8)-1, (int)(R)))
3604
3605#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3606 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3607 (__v8si)(__m256i)(W), \
3608 (__mmask8)(U), (int)(R)))
3609
3610#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3611 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3612 (__v8si)_mm256_setzero_si256(), \
3613 (__mmask8)(U), (int)(R)))
3614
3615static __inline __m256i __DEFAULT_FN_ATTRS512
3617{
3618 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3619 (__v8si)_mm256_setzero_si256(),
3620 (__mmask8) -1,
3622}
3623
3624static __inline__ __m256i __DEFAULT_FN_ATTRS512
3625_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3626{
3627 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3628 (__v8si) __W,
3629 (__mmask8) __U,
3631}
3632
3633static __inline__ __m256i __DEFAULT_FN_ATTRS512
3635{
3636 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3637 (__v8si) _mm256_setzero_si256 (),
3638 (__mmask8) __U,
3640}
3641
3642#define _mm512_cvtt_roundps_epi32(A, R) \
3643 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3644 (__v16si)_mm512_setzero_si512(), \
3645 (__mmask16)-1, (int)(R)))
3646
3647#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3648 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3649 (__v16si)(__m512i)(W), \
3650 (__mmask16)(U), (int)(R)))
3651
3652#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3653 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3654 (__v16si)_mm512_setzero_si512(), \
3655 (__mmask16)(U), (int)(R)))
3656
3657static __inline __m512i __DEFAULT_FN_ATTRS512
3659{
3660 return (__m512i)
3661 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3662 (__v16si) _mm512_setzero_si512 (),
3664}
3665
3666static __inline__ __m512i __DEFAULT_FN_ATTRS512
3667_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3668{
3669 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3670 (__v16si) __W,
3671 (__mmask16) __U,
3673}
3674
3675static __inline__ __m512i __DEFAULT_FN_ATTRS512
3677{
3678 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3679 (__v16si) _mm512_setzero_si512 (),
3680 (__mmask16) __U,
3682}
3683
3684#define _mm512_cvt_roundps_epi32(A, R) \
3685 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3686 (__v16si)_mm512_setzero_si512(), \
3687 (__mmask16)-1, (int)(R)))
3688
3689#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3690 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3691 (__v16si)(__m512i)(W), \
3692 (__mmask16)(U), (int)(R)))
3693
3694#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3695 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3696 (__v16si)_mm512_setzero_si512(), \
3697 (__mmask16)(U), (int)(R)))
3698
3699static __inline__ __m512i __DEFAULT_FN_ATTRS512
3701{
3702 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3703 (__v16si) _mm512_undefined_epi32 (),
3704 (__mmask16) -1,
3706}
3707
3708static __inline__ __m512i __DEFAULT_FN_ATTRS512
3709_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3710{
3711 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3712 (__v16si) __W,
3713 (__mmask16) __U,
3715}
3716
3717static __inline__ __m512i __DEFAULT_FN_ATTRS512
3719{
3720 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3721 (__v16si)
3723 (__mmask16) __U,
3725}
3726
3727#define _mm512_cvt_roundpd_epi32(A, R) \
3728 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3729 (__v8si)_mm256_setzero_si256(), \
3730 (__mmask8)-1, (int)(R)))
3731
3732#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3733 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3734 (__v8si)(__m256i)(W), \
3735 (__mmask8)(U), (int)(R)))
3736
3737#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3738 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3739 (__v8si)_mm256_setzero_si256(), \
3740 (__mmask8)(U), (int)(R)))
3741
3742static __inline__ __m256i __DEFAULT_FN_ATTRS512
3744{
3745 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3746 (__v8si)
3748 (__mmask8) -1,
3750}
3751
3752static __inline__ __m256i __DEFAULT_FN_ATTRS512
3753_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3754{
3755 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3756 (__v8si) __W,
3757 (__mmask8) __U,
3759}
3760
3761static __inline__ __m256i __DEFAULT_FN_ATTRS512
3763{
3764 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3765 (__v8si)
3767 (__mmask8) __U,
3769}
3770
3771#define _mm512_cvt_roundps_epu32(A, R) \
3772 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3773 (__v16si)_mm512_setzero_si512(), \
3774 (__mmask16)-1, (int)(R)))
3775
3776#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
3777 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3778 (__v16si)(__m512i)(W), \
3779 (__mmask16)(U), (int)(R)))
3780
3781#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
3782 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3783 (__v16si)_mm512_setzero_si512(), \
3784 (__mmask16)(U), (int)(R)))
3785
3786static __inline__ __m512i __DEFAULT_FN_ATTRS512
3788{
3789 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
3790 (__v16si)\
3792 (__mmask16) -1,\
3794}
3795
3796static __inline__ __m512i __DEFAULT_FN_ATTRS512
3797_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3798{
3799 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3800 (__v16si) __W,
3801 (__mmask16) __U,
3803}
3804
3805static __inline__ __m512i __DEFAULT_FN_ATTRS512
3807{
3808 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3809 (__v16si)
3811 (__mmask16) __U ,
3813}
3814
3815#define _mm512_cvt_roundpd_epu32(A, R) \
3816 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3817 (__v8si)_mm256_setzero_si256(), \
3818 (__mmask8)-1, (int)(R)))
3819
3820#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
3821 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3822 (__v8si)(__m256i)(W), \
3823 (__mmask8)(U), (int)(R)))
3824
3825#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
3826 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3827 (__v8si)_mm256_setzero_si256(), \
3828 (__mmask8)(U), (int)(R)))
3829
3830static __inline__ __m256i __DEFAULT_FN_ATTRS512
3832{
3833 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3834 (__v8si)
3836 (__mmask8) -1,
3838}
3839
3840static __inline__ __m256i __DEFAULT_FN_ATTRS512
3841_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
3842{
3843 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3844 (__v8si) __W,
3845 (__mmask8) __U,
3847}
3848
3849static __inline__ __m256i __DEFAULT_FN_ATTRS512
3851{
3852 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3853 (__v8si)
3855 (__mmask8) __U,
3857}
3858
3859static __inline__ double __DEFAULT_FN_ATTRS512
3861{
3862 return __a[0];
3863}
3864
3865static __inline__ float __DEFAULT_FN_ATTRS512
3867{
3868 return __a[0];
3869}
3870
3871/* Unpack and Interleave */
3872
3873static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3874_mm512_unpackhi_pd(__m512d __a, __m512d __b) {
3875 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
3876 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
3877}
3878
3879static __inline__ __m512d __DEFAULT_FN_ATTRS512
3880_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
3881{
3882 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3883 (__v8df)_mm512_unpackhi_pd(__A, __B),
3884 (__v8df)__W);
3885}
3886
3887static __inline__ __m512d __DEFAULT_FN_ATTRS512
3888_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
3889{
3890 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3891 (__v8df)_mm512_unpackhi_pd(__A, __B),
3892 (__v8df)_mm512_setzero_pd());
3893}
3894
3895static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3896_mm512_unpacklo_pd(__m512d __a, __m512d __b) {
3897 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
3898 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
3899}
3900
3901static __inline__ __m512d __DEFAULT_FN_ATTRS512
3902_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
3903{
3904 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3905 (__v8df)_mm512_unpacklo_pd(__A, __B),
3906 (__v8df)__W);
3907}
3908
3909static __inline__ __m512d __DEFAULT_FN_ATTRS512
3910_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
3911{
3912 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3913 (__v8df)_mm512_unpacklo_pd(__A, __B),
3914 (__v8df)_mm512_setzero_pd());
3915}
3916
3917static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3918_mm512_unpackhi_ps(__m512 __a, __m512 __b) {
3919 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
3920 2, 18, 3, 19,
3921 2+4, 18+4, 3+4, 19+4,
3922 2+8, 18+8, 3+8, 19+8,
3923 2+12, 18+12, 3+12, 19+12);
3924}
3925
3926static __inline__ __m512 __DEFAULT_FN_ATTRS512
3927_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
3928{
3929 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3930 (__v16sf)_mm512_unpackhi_ps(__A, __B),
3931 (__v16sf)__W);
3932}
3933
3934static __inline__ __m512 __DEFAULT_FN_ATTRS512
3935_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
3936{
3937 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3938 (__v16sf)_mm512_unpackhi_ps(__A, __B),
3939 (__v16sf)_mm512_setzero_ps());
3940}
3941
3942static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3943_mm512_unpacklo_ps(__m512 __a, __m512 __b) {
3944 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
3945 0, 16, 1, 17,
3946 0+4, 16+4, 1+4, 17+4,
3947 0+8, 16+8, 1+8, 17+8,
3948 0+12, 16+12, 1+12, 17+12);
3949}
3950
3951static __inline__ __m512 __DEFAULT_FN_ATTRS512
3952_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
3953{
3954 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3955 (__v16sf)_mm512_unpacklo_ps(__A, __B),
3956 (__v16sf)__W);
3957}
3958
3959static __inline__ __m512 __DEFAULT_FN_ATTRS512
3960_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
3961{
3962 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3963 (__v16sf)_mm512_unpacklo_ps(__A, __B),
3964 (__v16sf)_mm512_setzero_ps());
3965}
3966
3967static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3968_mm512_unpackhi_epi32(__m512i __A, __m512i __B) {
3969 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3970 2, 18, 3, 19,
3971 2+4, 18+4, 3+4, 19+4,
3972 2+8, 18+8, 3+8, 19+8,
3973 2+12, 18+12, 3+12, 19+12);
3974}
3975
3976static __inline__ __m512i __DEFAULT_FN_ATTRS512
3977_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
3978{
3979 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3980 (__v16si)_mm512_unpackhi_epi32(__A, __B),
3981 (__v16si)__W);
3982}
3983
3984static __inline__ __m512i __DEFAULT_FN_ATTRS512
3985_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
3986{
3987 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3988 (__v16si)_mm512_unpackhi_epi32(__A, __B),
3989 (__v16si)_mm512_setzero_si512());
3990}
3991
3992static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3993_mm512_unpacklo_epi32(__m512i __A, __m512i __B) {
3994 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3995 0, 16, 1, 17,
3996 0+4, 16+4, 1+4, 17+4,
3997 0+8, 16+8, 1+8, 17+8,
3998 0+12, 16+12, 1+12, 17+12);
3999}
4000
4001static __inline__ __m512i __DEFAULT_FN_ATTRS512
4002_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4003{
4004 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4005 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4006 (__v16si)__W);
4007}
4008
4009static __inline__ __m512i __DEFAULT_FN_ATTRS512
4010_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4011{
4012 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4013 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4014 (__v16si)_mm512_setzero_si512());
4015}
4016
4017static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4018_mm512_unpackhi_epi64(__m512i __A, __m512i __B) {
4019 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4020 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4021}
4022
4023static __inline__ __m512i __DEFAULT_FN_ATTRS512
4024_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4025{
4026 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4027 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4028 (__v8di)__W);
4029}
4030
4031static __inline__ __m512i __DEFAULT_FN_ATTRS512
4032_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4033{
4034 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4035 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4036 (__v8di)_mm512_setzero_si512());
4037}
4038
4039static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4040_mm512_unpacklo_epi64(__m512i __A, __m512i __B) {
4041 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4042 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4043}
4044
4045static __inline__ __m512i __DEFAULT_FN_ATTRS512
4046_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4047{
4048 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4049 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4050 (__v8di)__W);
4051}
4052
4053static __inline__ __m512i __DEFAULT_FN_ATTRS512
4054_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4055{
4056 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4057 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4058 (__v8di)_mm512_setzero_si512());
4059}
4060
4061
4062/* SIMD load ops */
4063
4064static __inline __m512i __DEFAULT_FN_ATTRS512
4066{
4067 struct __loadu_si512 {
4068 __m512i_u __v;
4069 } __attribute__((__packed__, __may_alias__));
4070 return ((const struct __loadu_si512*)__P)->__v;
4071}
4072
4073static __inline __m512i __DEFAULT_FN_ATTRS512
4075{
4076 struct __loadu_epi32 {
4077 __m512i_u __v;
4078 } __attribute__((__packed__, __may_alias__));
4079 return ((const struct __loadu_epi32*)__P)->__v;
4080}
4081
4082static __inline __m512i __DEFAULT_FN_ATTRS512
4083_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4084{
4085 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4086 (__v16si) __W,
4087 (__mmask16) __U);
4088}
4089
4090
4091static __inline __m512i __DEFAULT_FN_ATTRS512
4093{
4094 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4095 (__v16si)
4097 (__mmask16) __U);
4098}
4099
4100static __inline __m512i __DEFAULT_FN_ATTRS512
4102{
4103 struct __loadu_epi64 {
4104 __m512i_u __v;
4105 } __attribute__((__packed__, __may_alias__));
4106 return ((const struct __loadu_epi64*)__P)->__v;
4107}
4108
4109static __inline __m512i __DEFAULT_FN_ATTRS512
4110_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4111{
4112 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4113 (__v8di) __W,
4114 (__mmask8) __U);
4115}
4116
4117static __inline __m512i __DEFAULT_FN_ATTRS512
4119{
4120 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4121 (__v8di)
4123 (__mmask8) __U);
4124}
4125
4126static __inline __m512 __DEFAULT_FN_ATTRS512
4127_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4128{
4129 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4130 (__v16sf) __W,
4131 (__mmask16) __U);
4132}
4133
4134static __inline __m512 __DEFAULT_FN_ATTRS512
4136{
4137 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4138 (__v16sf)
4140 (__mmask16) __U);
4141}
4142
4143static __inline __m512d __DEFAULT_FN_ATTRS512
4144_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4145{
4146 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4147 (__v8df) __W,
4148 (__mmask8) __U);
4149}
4150
4151static __inline __m512d __DEFAULT_FN_ATTRS512
4153{
4154 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4155 (__v8df)
4157 (__mmask8) __U);
4158}
4159
4160static __inline __m512d __DEFAULT_FN_ATTRS512
4162{
4163 struct __loadu_pd {
4164 __m512d_u __v;
4165 } __attribute__((__packed__, __may_alias__));
4166 return ((const struct __loadu_pd*)__p)->__v;
4167}
4168
4169static __inline __m512 __DEFAULT_FN_ATTRS512
4171{
4172 struct __loadu_ps {
4173 __m512_u __v;
4174 } __attribute__((__packed__, __may_alias__));
4175 return ((const struct __loadu_ps*)__p)->__v;
4176}
4177
4178static __inline __m512 __DEFAULT_FN_ATTRS512
4180{
4181 return *(const __m512*)__p;
4182}
4183
4184static __inline __m512 __DEFAULT_FN_ATTRS512
4185_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4186{
4187 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4188 (__v16sf) __W,
4189 (__mmask16) __U);
4190}
4191
4192static __inline __m512 __DEFAULT_FN_ATTRS512
4194{
4195 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4196 (__v16sf)
4198 (__mmask16) __U);
4199}
4200
4201static __inline __m512d __DEFAULT_FN_ATTRS512
4203{
4204 return *(const __m512d*)__p;
4205}
4206
4207static __inline __m512d __DEFAULT_FN_ATTRS512
4208_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4209{
4210 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4211 (__v8df) __W,
4212 (__mmask8) __U);
4213}
4214
4215static __inline __m512d __DEFAULT_FN_ATTRS512
4217{
4218 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4219 (__v8df)
4221 (__mmask8) __U);
4222}
4223
4224static __inline __m512i __DEFAULT_FN_ATTRS512
4226{
4227 return *(const __m512i *) __P;
4228}
4229
4230static __inline __m512i __DEFAULT_FN_ATTRS512
4232{
4233 return *(const __m512i *) __P;
4234}
4235
4236static __inline __m512i __DEFAULT_FN_ATTRS512
4238{
4239 return *(const __m512i *) __P;
4240}
4241
4242/* SIMD store ops */
4243
4244static __inline void __DEFAULT_FN_ATTRS512
4245_mm512_storeu_epi64 (void *__P, __m512i __A)
4246{
4247 struct __storeu_epi64 {
4248 __m512i_u __v;
4249 } __attribute__((__packed__, __may_alias__));
4250 ((struct __storeu_epi64*)__P)->__v = __A;
4251}
4252
4253static __inline void __DEFAULT_FN_ATTRS512
4254_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4255{
4256 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4257 (__mmask8) __U);
4258}
4259
4260static __inline void __DEFAULT_FN_ATTRS512
4261_mm512_storeu_si512 (void *__P, __m512i __A)
4262{
4263 struct __storeu_si512 {
4264 __m512i_u __v;
4265 } __attribute__((__packed__, __may_alias__));
4266 ((struct __storeu_si512*)__P)->__v = __A;
4267}
4268
4269static __inline void __DEFAULT_FN_ATTRS512
4270_mm512_storeu_epi32 (void *__P, __m512i __A)
4271{
4272 struct __storeu_epi32 {
4273 __m512i_u __v;
4274 } __attribute__((__packed__, __may_alias__));
4275 ((struct __storeu_epi32*)__P)->__v = __A;
4276}
4277
4278static __inline void __DEFAULT_FN_ATTRS512
4280{
4281 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4282 (__mmask16) __U);
4283}
4284
4285static __inline void __DEFAULT_FN_ATTRS512
4286_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4287{
4288 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4289}
4290
4291static __inline void __DEFAULT_FN_ATTRS512
4292_mm512_storeu_pd(void *__P, __m512d __A)
4293{
4294 struct __storeu_pd {
4295 __m512d_u __v;
4296 } __attribute__((__packed__, __may_alias__));
4297 ((struct __storeu_pd*)__P)->__v = __A;
4298}
4299
4300static __inline void __DEFAULT_FN_ATTRS512
4301_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4302{
4303 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4304 (__mmask16) __U);
4305}
4306
4307static __inline void __DEFAULT_FN_ATTRS512
4308_mm512_storeu_ps(void *__P, __m512 __A)
4309{
4310 struct __storeu_ps {
4311 __m512_u __v;
4312 } __attribute__((__packed__, __may_alias__));
4313 ((struct __storeu_ps*)__P)->__v = __A;
4314}
4315
4316static __inline void __DEFAULT_FN_ATTRS512
4317_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4318{
4319 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4320}
4321
4322static __inline void __DEFAULT_FN_ATTRS512
4323_mm512_store_pd(void *__P, __m512d __A)
4324{
4325 *(__m512d*)__P = __A;
4326}
4327
4328static __inline void __DEFAULT_FN_ATTRS512
4329_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4330{
4331 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4332 (__mmask16) __U);
4333}
4334
4335static __inline void __DEFAULT_FN_ATTRS512
4336_mm512_store_ps(void *__P, __m512 __A)
4337{
4338 *(__m512*)__P = __A;
4339}
4340
4341static __inline void __DEFAULT_FN_ATTRS512
4342_mm512_store_si512 (void *__P, __m512i __A)
4343{
4344 *(__m512i *) __P = __A;
4345}
4346
4347static __inline void __DEFAULT_FN_ATTRS512
4348_mm512_store_epi32 (void *__P, __m512i __A)
4349{
4350 *(__m512i *) __P = __A;
4351}
4352
4353static __inline void __DEFAULT_FN_ATTRS512
4354_mm512_store_epi64 (void *__P, __m512i __A)
4355{
4356 *(__m512i *) __P = __A;
4357}
4358
4359/* Mask ops */
4360
4363 return __builtin_ia32_knothi(__M);
4364}
4365
4366/* Integer compare */
4367
4368#define _mm512_cmpeq_epi32_mask(A, B) \
4369 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4370#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4371 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4372#define _mm512_cmpge_epi32_mask(A, B) \
4373 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4374#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4375 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4376#define _mm512_cmpgt_epi32_mask(A, B) \
4377 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4378#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4379 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4380#define _mm512_cmple_epi32_mask(A, B) \
4381 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4382#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4383 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4384#define _mm512_cmplt_epi32_mask(A, B) \
4385 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4386#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4387 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4388#define _mm512_cmpneq_epi32_mask(A, B) \
4389 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4390#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4391 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4392
4393#define _mm512_cmpeq_epu32_mask(A, B) \
4394 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4395#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4396 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4397#define _mm512_cmpge_epu32_mask(A, B) \
4398 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4399#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4400 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4401#define _mm512_cmpgt_epu32_mask(A, B) \
4402 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4403#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4404 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4405#define _mm512_cmple_epu32_mask(A, B) \
4406 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4407#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4408 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4409#define _mm512_cmplt_epu32_mask(A, B) \
4410 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4411#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4412 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4413#define _mm512_cmpneq_epu32_mask(A, B) \
4414 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4415#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4416 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4417
4418#define _mm512_cmpeq_epi64_mask(A, B) \
4419 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4420#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4421 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4422#define _mm512_cmpge_epi64_mask(A, B) \
4423 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4424#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4425 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4426#define _mm512_cmpgt_epi64_mask(A, B) \
4427 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4428#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4429 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4430#define _mm512_cmple_epi64_mask(A, B) \
4431 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4432#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4433 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4434#define _mm512_cmplt_epi64_mask(A, B) \
4435 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4436#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4437 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4438#define _mm512_cmpneq_epi64_mask(A, B) \
4439 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4440#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4441 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4442
4443#define _mm512_cmpeq_epu64_mask(A, B) \
4444 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4445#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4446 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4447#define _mm512_cmpge_epu64_mask(A, B) \
4448 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4449#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4450 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4451#define _mm512_cmpgt_epu64_mask(A, B) \
4452 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4453#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4454 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4455#define _mm512_cmple_epu64_mask(A, B) \
4456 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4457#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4458 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4459#define _mm512_cmplt_epu64_mask(A, B) \
4460 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4461#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4462 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4463#define _mm512_cmpneq_epu64_mask(A, B) \
4464 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4465#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4466 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4467
4468static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4470 /* This function always performs a signed extension, but __v16qi is a char
4471 which may be signed or unsigned, so use __v16qs. */
4472 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4473}
4474
4475static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4476_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
4477 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4478 (__v16si)_mm512_cvtepi8_epi32(__A),
4479 (__v16si)__W);
4480}
4481
4482static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4484 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4485 (__v16si)_mm512_cvtepi8_epi32(__A),
4486 (__v16si)_mm512_setzero_si512());
4487}
4488
4489static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4491 /* This function always performs a signed extension, but __v16qi is a char
4492 which may be signed or unsigned, so use __v16qs. */
4493 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4494}
4495
4496static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4497_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4498 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4499 (__v8di)_mm512_cvtepi8_epi64(__A),
4500 (__v8di)__W);
4501}
4502
4503static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4505 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4506 (__v8di)_mm512_cvtepi8_epi64(__A),
4507 (__v8di)_mm512_setzero_si512 ());
4508}
4509
4510static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4512 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4513}
4514
4515static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4516_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
4517 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4518 (__v8di)_mm512_cvtepi32_epi64(__X),
4519 (__v8di)__W);
4520}
4521
4522static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4524 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4525 (__v8di)_mm512_cvtepi32_epi64(__X),
4526 (__v8di)_mm512_setzero_si512());
4527}
4528
4529static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4531 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4532}
4533
4534static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4535_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
4536 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4537 (__v16si)_mm512_cvtepi16_epi32(__A),
4538 (__v16si)__W);
4539}
4540
4541static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4543 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4544 (__v16si)_mm512_cvtepi16_epi32(__A),
4545 (__v16si)_mm512_setzero_si512 ());
4546}
4547
4548static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4550 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4551}
4552
4553static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4554_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4555 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4556 (__v8di)_mm512_cvtepi16_epi64(__A),
4557 (__v8di)__W);
4558}
4559
4560static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4562 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4563 (__v8di)_mm512_cvtepi16_epi64(__A),
4564 (__v8di)_mm512_setzero_si512());
4565}
4566
4567static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4569 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4570}
4571
4572static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4573_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
4574 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4575 (__v16si)_mm512_cvtepu8_epi32(__A),
4576 (__v16si)__W);
4577}
4578
4579static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4581 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4582 (__v16si)_mm512_cvtepu8_epi32(__A),
4583 (__v16si)_mm512_setzero_si512());
4584}
4585
4586static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4588 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4589}
4590
4591static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4592_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4593 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4594 (__v8di)_mm512_cvtepu8_epi64(__A),
4595 (__v8di)__W);
4596}
4597
4598static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4600 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4601 (__v8di)_mm512_cvtepu8_epi64(__A),
4602 (__v8di)_mm512_setzero_si512());
4603}
4604
4605static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4607 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4608}
4609
4610static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4611_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
4612 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4613 (__v8di)_mm512_cvtepu32_epi64(__X),
4614 (__v8di)__W);
4615}
4616
4617static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4619 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4620 (__v8di)_mm512_cvtepu32_epi64(__X),
4621 (__v8di)_mm512_setzero_si512());
4622}
4623
4624static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4626 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4627}
4628
4629static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4630_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
4631 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4632 (__v16si)_mm512_cvtepu16_epi32(__A),
4633 (__v16si)__W);
4634}
4635
4636static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4638 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4639 (__v16si)_mm512_cvtepu16_epi32(__A),
4640 (__v16si)_mm512_setzero_si512());
4641}
4642
4643static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4645 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4646}
4647
4648static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4649_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4650 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4651 (__v8di)_mm512_cvtepu16_epi64(__A),
4652 (__v8di)__W);
4653}
4654
4655static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4657 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4658 (__v8di)_mm512_cvtepu16_epi64(__A),
4659 (__v8di)_mm512_setzero_si512());
4660}
4661
4662static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4663_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4664{
4665 return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B);
4666}
4667
4668static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4669_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4670{
4671 return (__m512i)__builtin_ia32_selectd_512(__U,
4672 (__v16si)_mm512_rorv_epi32(__A, __B),
4673 (__v16si)__W);
4674}
4675
4676static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4677_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4678{
4679 return (__m512i)__builtin_ia32_selectd_512(__U,
4680 (__v16si)_mm512_rorv_epi32(__A, __B),
4681 (__v16si)_mm512_setzero_si512());
4682}
4683
4684static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4685_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4686{
4687 return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B);
4688}
4689
4690static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4691_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4692{
4693 return (__m512i)__builtin_ia32_selectq_512(__U,
4694 (__v8di)_mm512_rorv_epi64(__A, __B),
4695 (__v8di)__W);
4696}
4697
4698static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4699_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4700{
4701 return (__m512i)__builtin_ia32_selectq_512(__U,
4702 (__v8di)_mm512_rorv_epi64(__A, __B),
4703 (__v8di)_mm512_setzero_si512());
4704}
4705
4706
4707
4708#define _mm512_cmp_epi32_mask(a, b, p) \
4709 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4710 (__v16si)(__m512i)(b), (int)(p), \
4711 (__mmask16)-1))
4712
4713#define _mm512_cmp_epu32_mask(a, b, p) \
4714 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4715 (__v16si)(__m512i)(b), (int)(p), \
4716 (__mmask16)-1))
4717
4718#define _mm512_cmp_epi64_mask(a, b, p) \
4719 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4720 (__v8di)(__m512i)(b), (int)(p), \
4721 (__mmask8)-1))
4722
4723#define _mm512_cmp_epu64_mask(a, b, p) \
4724 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4725 (__v8di)(__m512i)(b), (int)(p), \
4726 (__mmask8)-1))
4727
4728#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
4729 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4730 (__v16si)(__m512i)(b), (int)(p), \
4731 (__mmask16)(m)))
4732
4733#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
4734 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4735 (__v16si)(__m512i)(b), (int)(p), \
4736 (__mmask16)(m)))
4737
4738#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
4739 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4740 (__v8di)(__m512i)(b), (int)(p), \
4741 (__mmask8)(m)))
4742
4743#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
4744 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4745 (__v8di)(__m512i)(b), (int)(p), \
4746 (__mmask8)(m)))
4747
4748#define _mm512_rol_epi32(a, b) \
4749 ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
4750
4751#define _mm512_mask_rol_epi32(W, U, a, b) \
4752 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4753 (__v16si)_mm512_rol_epi32((a), (b)), \
4754 (__v16si)(__m512i)(W)))
4755
4756#define _mm512_maskz_rol_epi32(U, a, b) \
4757 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4758 (__v16si)_mm512_rol_epi32((a), (b)), \
4759 (__v16si)_mm512_setzero_si512()))
4760
4761#define _mm512_rol_epi64(a, b) \
4762 ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
4763
4764#define _mm512_mask_rol_epi64(W, U, a, b) \
4765 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4766 (__v8di)_mm512_rol_epi64((a), (b)), \
4767 (__v8di)(__m512i)(W)))
4768
4769#define _mm512_maskz_rol_epi64(U, a, b) \
4770 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4771 (__v8di)_mm512_rol_epi64((a), (b)), \
4772 (__v8di)_mm512_setzero_si512()))
4773
4774static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4775_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4776{
4777 return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B);
4778}
4779
4780static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4781_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4782{
4783 return (__m512i)__builtin_ia32_selectd_512(__U,
4784 (__v16si)_mm512_rolv_epi32(__A, __B),
4785 (__v16si)__W);
4786}
4787
4788static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4789_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4790{
4791 return (__m512i)__builtin_ia32_selectd_512(__U,
4792 (__v16si)_mm512_rolv_epi32(__A, __B),
4793 (__v16si)_mm512_setzero_si512());
4794}
4795
4796static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4797_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4798{
4799 return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B);
4800}
4801
4802static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4803_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4804{
4805 return (__m512i)__builtin_ia32_selectq_512(__U,
4806 (__v8di)_mm512_rolv_epi64(__A, __B),
4807 (__v8di)__W);
4808}
4809
4810static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4811_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4812{
4813 return (__m512i)__builtin_ia32_selectq_512(__U,
4814 (__v8di)_mm512_rolv_epi64(__A, __B),
4815 (__v8di)_mm512_setzero_si512());
4816}
4817
4818#define _mm512_ror_epi32(A, B) \
4819 ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
4820
4821#define _mm512_mask_ror_epi32(W, U, A, B) \
4822 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4823 (__v16si)_mm512_ror_epi32((A), (B)), \
4824 (__v16si)(__m512i)(W)))
4825
4826#define _mm512_maskz_ror_epi32(U, A, B) \
4827 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4828 (__v16si)_mm512_ror_epi32((A), (B)), \
4829 (__v16si)_mm512_setzero_si512()))
4830
4831#define _mm512_ror_epi64(A, B) \
4832 ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
4833
4834#define _mm512_mask_ror_epi64(W, U, A, B) \
4835 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4836 (__v8di)_mm512_ror_epi64((A), (B)), \
4837 (__v8di)(__m512i)(W)))
4838
4839#define _mm512_maskz_ror_epi64(U, A, B) \
4840 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4841 (__v8di)_mm512_ror_epi64((A), (B)), \
4842 (__v8di)_mm512_setzero_si512()))
4843
4844static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4845_mm512_slli_epi32(__m512i __A, unsigned int __B) {
4846 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B);
4847}
4848
4849static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4850_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
4851 unsigned int __B) {
4852 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4853 (__v16si)_mm512_slli_epi32(__A, __B),
4854 (__v16si)__W);
4855}
4856
4857static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4858_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
4859 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4860 (__v16si)_mm512_slli_epi32(__A, __B),
4861 (__v16si)_mm512_setzero_si512());
4862}
4863
4864static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4865_mm512_slli_epi64(__m512i __A, unsigned int __B) {
4866 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B);
4867}
4868
4869static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4870_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
4871 unsigned int __B) {
4872 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4873 (__v8di)_mm512_slli_epi64(__A, __B),
4874 (__v8di)__W);
4875}
4876
4877static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4878_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
4879 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4880 (__v8di)_mm512_slli_epi64(__A, __B),
4881 (__v8di)_mm512_setzero_si512());
4882}
4883
4884static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4885_mm512_srli_epi32(__m512i __A, unsigned int __B) {
4886 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B);
4887}
4888
4889static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4890_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
4891 unsigned int __B) {
4892 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4893 (__v16si)_mm512_srli_epi32(__A, __B),
4894 (__v16si)__W);
4895}
4896
4897static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4898_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
4899 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4900 (__v16si)_mm512_srli_epi32(__A, __B),
4901 (__v16si)_mm512_setzero_si512());
4902}
4903
4904static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4905_mm512_srli_epi64(__m512i __A, unsigned int __B) {
4906 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B);
4907}
4908
4909static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4910_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
4911 unsigned int __B) {
4912 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4913 (__v8di)_mm512_srli_epi64(__A, __B),
4914 (__v8di)__W);
4915}
4916
4917static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4918_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
4919 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4920 (__v8di)_mm512_srli_epi64(__A, __B),
4921 (__v8di)_mm512_setzero_si512());
4922}
4923
4924static __inline__ __m512i __DEFAULT_FN_ATTRS512
4925_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4926{
4927 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4928 (__v16si) __W,
4929 (__mmask16) __U);
4930}
4931
4932static __inline__ __m512i __DEFAULT_FN_ATTRS512
4934{
4935 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4936 (__v16si)
4938 (__mmask16) __U);
4939}
4940
4941static __inline__ void __DEFAULT_FN_ATTRS512
4942_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
4943{
4944 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
4945 (__mmask16) __U);
4946}
4947
4948static __inline__ __m512i __DEFAULT_FN_ATTRS512
4949_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
4950{
4951 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
4952 (__v16si) __A,
4953 (__v16si) __W);
4954}
4955
4956static __inline__ __m512i __DEFAULT_FN_ATTRS512
4958{
4959 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
4960 (__v16si) __A,
4961 (__v16si) _mm512_setzero_si512 ());
4962}
4963
4964static __inline__ __m512i __DEFAULT_FN_ATTRS512
4965_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
4966{
4967 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
4968 (__v8di) __A,
4969 (__v8di) __W);
4970}
4971
4972static __inline__ __m512i __DEFAULT_FN_ATTRS512
4974{
4975 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
4976 (__v8di) __A,
4977 (__v8di) _mm512_setzero_si512 ());
4978}
4979
4980static __inline__ __m512i __DEFAULT_FN_ATTRS512
4981_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4982{
4983 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4984 (__v8di) __W,
4985 (__mmask8) __U);
4986}
4987
4988static __inline__ __m512i __DEFAULT_FN_ATTRS512
4990{
4991 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4992 (__v8di)
4994 (__mmask8) __U);
4995}
4996
4997static __inline__ void __DEFAULT_FN_ATTRS512
4998_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
4999{
5000 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5001 (__mmask8) __U);
5002}
5003
5004static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5006{
5007 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5008 0, 0, 2, 2, 4, 4, 6, 6);
5009}
5010
5011static __inline__ __m512d __DEFAULT_FN_ATTRS512
5012_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5013{
5014 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5015 (__v8df)_mm512_movedup_pd(__A),
5016 (__v8df)__W);
5017}
5018
5019static __inline__ __m512d __DEFAULT_FN_ATTRS512
5021{
5022 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5023 (__v8df)_mm512_movedup_pd(__A),
5024 (__v8df)_mm512_setzero_pd());
5025}
5026
5027#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5028 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5029 (__v8df)(__m512d)(B), \
5030 (__v8di)(__m512i)(C), (int)(imm), \
5031 (__mmask8)-1, (int)(R)))
5032
5033#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5034 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5035 (__v8df)(__m512d)(B), \
5036 (__v8di)(__m512i)(C), (int)(imm), \
5037 (__mmask8)(U), (int)(R)))
5038
5039#define _mm512_fixupimm_pd(A, B, C, imm) \
5040 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5041 (__v8df)(__m512d)(B), \
5042 (__v8di)(__m512i)(C), (int)(imm), \
5043 (__mmask8)-1, \
5044 _MM_FROUND_CUR_DIRECTION))
5045
5046#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5047 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5048 (__v8df)(__m512d)(B), \
5049 (__v8di)(__m512i)(C), (int)(imm), \
5050 (__mmask8)(U), \
5051 _MM_FROUND_CUR_DIRECTION))
5052
5053#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5054 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5055 (__v8df)(__m512d)(B), \
5056 (__v8di)(__m512i)(C), \
5057 (int)(imm), (__mmask8)(U), \
5058 (int)(R)))
5059
5060#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5061 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5062 (__v8df)(__m512d)(B), \
5063 (__v8di)(__m512i)(C), \
5064 (int)(imm), (__mmask8)(U), \
5065 _MM_FROUND_CUR_DIRECTION))
5066
5067#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5068 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5069 (__v16sf)(__m512)(B), \
5070 (__v16si)(__m512i)(C), (int)(imm), \
5071 (__mmask16)-1, (int)(R)))
5072
5073#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5074 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5075 (__v16sf)(__m512)(B), \
5076 (__v16si)(__m512i)(C), (int)(imm), \
5077 (__mmask16)(U), (int)(R)))
5078
5079#define _mm512_fixupimm_ps(A, B, C, imm) \
5080 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5081 (__v16sf)(__m512)(B), \
5082 (__v16si)(__m512i)(C), (int)(imm), \
5083 (__mmask16)-1, \
5084 _MM_FROUND_CUR_DIRECTION))
5085
5086#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5087 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5088 (__v16sf)(__m512)(B), \
5089 (__v16si)(__m512i)(C), (int)(imm), \
5090 (__mmask16)(U), \
5091 _MM_FROUND_CUR_DIRECTION))
5092
5093#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5094 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5095 (__v16sf)(__m512)(B), \
5096 (__v16si)(__m512i)(C), \
5097 (int)(imm), (__mmask16)(U), \
5098 (int)(R)))
5099
5100#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5101 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5102 (__v16sf)(__m512)(B), \
5103 (__v16si)(__m512i)(C), \
5104 (int)(imm), (__mmask16)(U), \
5105 _MM_FROUND_CUR_DIRECTION))
5106
5107#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5108 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5109 (__v2df)(__m128d)(B), \
5110 (__v2di)(__m128i)(C), (int)(imm), \
5111 (__mmask8)-1, (int)(R)))
5112
5113#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5114 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5115 (__v2df)(__m128d)(B), \
5116 (__v2di)(__m128i)(C), (int)(imm), \
5117 (__mmask8)(U), (int)(R)))
5118
5119#define _mm_fixupimm_sd(A, B, C, imm) \
5120 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5121 (__v2df)(__m128d)(B), \
5122 (__v2di)(__m128i)(C), (int)(imm), \
5123 (__mmask8)-1, \
5124 _MM_FROUND_CUR_DIRECTION))
5125
5126#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5127 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5128 (__v2df)(__m128d)(B), \
5129 (__v2di)(__m128i)(C), (int)(imm), \
5130 (__mmask8)(U), \
5131 _MM_FROUND_CUR_DIRECTION))
5132
5133#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5134 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5135 (__v2df)(__m128d)(B), \
5136 (__v2di)(__m128i)(C), (int)(imm), \
5137 (__mmask8)(U), (int)(R)))
5138
5139#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5140 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5141 (__v2df)(__m128d)(B), \
5142 (__v2di)(__m128i)(C), (int)(imm), \
5143 (__mmask8)(U), \
5144 _MM_FROUND_CUR_DIRECTION))
5145
5146#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5147 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5148 (__v4sf)(__m128)(B), \
5149 (__v4si)(__m128i)(C), (int)(imm), \
5150 (__mmask8)-1, (int)(R)))
5151
5152#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5153 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5154 (__v4sf)(__m128)(B), \
5155 (__v4si)(__m128i)(C), (int)(imm), \
5156 (__mmask8)(U), (int)(R)))
5157
5158#define _mm_fixupimm_ss(A, B, C, imm) \
5159 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5160 (__v4sf)(__m128)(B), \
5161 (__v4si)(__m128i)(C), (int)(imm), \
5162 (__mmask8)-1, \
5163 _MM_FROUND_CUR_DIRECTION))
5164
5165#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5166 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5167 (__v4sf)(__m128)(B), \
5168 (__v4si)(__m128i)(C), (int)(imm), \
5169 (__mmask8)(U), \
5170 _MM_FROUND_CUR_DIRECTION))
5171
5172#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5173 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5174 (__v4sf)(__m128)(B), \
5175 (__v4si)(__m128i)(C), (int)(imm), \
5176 (__mmask8)(U), (int)(R)))
5177
5178#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5179 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5180 (__v4sf)(__m128)(B), \
5181 (__v4si)(__m128i)(C), (int)(imm), \
5182 (__mmask8)(U), \
5183 _MM_FROUND_CUR_DIRECTION))
5184
5185#define _mm_getexp_round_sd(A, B, R) \
5186 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5187 (__v2df)(__m128d)(B), \
5188 (__v2df)_mm_setzero_pd(), \
5189 (__mmask8)-1, (int)(R)))
5190
5191
5192static __inline__ __m128d __DEFAULT_FN_ATTRS128
5193_mm_getexp_sd (__m128d __A, __m128d __B)
5194{
5195 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5196 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5197}
5198
5199static __inline__ __m128d __DEFAULT_FN_ATTRS128
5200_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5201{
5202 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5203 (__v2df) __B,
5204 (__v2df) __W,
5205 (__mmask8) __U,
5207}
5208
5209#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5210 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5211 (__v2df)(__m128d)(B), \
5212 (__v2df)(__m128d)(W), \
5213 (__mmask8)(U), (int)(R)))
5214
5215static __inline__ __m128d __DEFAULT_FN_ATTRS128
5216_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5217{
5218 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5219 (__v2df) __B,
5220 (__v2df) _mm_setzero_pd (),
5221 (__mmask8) __U,
5223}
5224
5225#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5226 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5227 (__v2df)(__m128d)(B), \
5228 (__v2df)_mm_setzero_pd(), \
5229 (__mmask8)(U), (int)(R)))
5230
5231#define _mm_getexp_round_ss(A, B, R) \
5232 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5233 (__v4sf)(__m128)(B), \
5234 (__v4sf)_mm_setzero_ps(), \
5235 (__mmask8)-1, (int)(R)))
5236
5237static __inline__ __m128 __DEFAULT_FN_ATTRS128
5238_mm_getexp_ss (__m128 __A, __m128 __B)
5239{
5240 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5241 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5242}
5243
5244static __inline__ __m128 __DEFAULT_FN_ATTRS128
5245_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5246{
5247 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5248 (__v4sf) __B,
5249 (__v4sf) __W,
5250 (__mmask8) __U,
5252}
5253
5254#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5255 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5256 (__v4sf)(__m128)(B), \
5257 (__v4sf)(__m128)(W), \
5258 (__mmask8)(U), (int)(R)))
5259
5260static __inline__ __m128 __DEFAULT_FN_ATTRS128
5261_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5262{
5263 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5264 (__v4sf) __B,
5265 (__v4sf) _mm_setzero_ps (),
5266 (__mmask8) __U,
5268}
5269
5270#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5271 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5272 (__v4sf)(__m128)(B), \
5273 (__v4sf)_mm_setzero_ps(), \
5274 (__mmask8)(U), (int)(R)))
5275
5276#define _mm_getmant_round_sd(A, B, C, D, R) \
5277 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5278 (__v2df)(__m128d)(B), \
5279 (int)(((D)<<2) | (C)), \
5280 (__v2df)_mm_setzero_pd(), \
5281 (__mmask8)-1, (int)(R)))
5282
5283#define _mm_getmant_sd(A, B, C, D) \
5284 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5285 (__v2df)(__m128d)(B), \
5286 (int)(((D)<<2) | (C)), \
5287 (__v2df)_mm_setzero_pd(), \
5288 (__mmask8)-1, \
5289 _MM_FROUND_CUR_DIRECTION))
5290
5291#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5292 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5293 (__v2df)(__m128d)(B), \
5294 (int)(((D)<<2) | (C)), \
5295 (__v2df)(__m128d)(W), \
5296 (__mmask8)(U), \
5297 _MM_FROUND_CUR_DIRECTION))
5298
5299#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5300 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5301 (__v2df)(__m128d)(B), \
5302 (int)(((D)<<2) | (C)), \
5303 (__v2df)(__m128d)(W), \
5304 (__mmask8)(U), (int)(R)))
5305
5306#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5307 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5308 (__v2df)(__m128d)(B), \
5309 (int)(((D)<<2) | (C)), \
5310 (__v2df)_mm_setzero_pd(), \
5311 (__mmask8)(U), \
5312 _MM_FROUND_CUR_DIRECTION))
5313
5314#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5315 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5316 (__v2df)(__m128d)(B), \
5317 (int)(((D)<<2) | (C)), \
5318 (__v2df)_mm_setzero_pd(), \
5319 (__mmask8)(U), (int)(R)))
5320
5321#define _mm_getmant_round_ss(A, B, C, D, R) \
5322 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5323 (__v4sf)(__m128)(B), \
5324 (int)(((D)<<2) | (C)), \
5325 (__v4sf)_mm_setzero_ps(), \
5326 (__mmask8)-1, (int)(R)))
5327
5328#define _mm_getmant_ss(A, B, C, D) \
5329 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5330 (__v4sf)(__m128)(B), \
5331 (int)(((D)<<2) | (C)), \
5332 (__v4sf)_mm_setzero_ps(), \
5333 (__mmask8)-1, \
5334 _MM_FROUND_CUR_DIRECTION))
5335
5336#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5337 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5338 (__v4sf)(__m128)(B), \
5339 (int)(((D)<<2) | (C)), \
5340 (__v4sf)(__m128)(W), \
5341 (__mmask8)(U), \
5342 _MM_FROUND_CUR_DIRECTION))
5343
5344#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5345 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5346 (__v4sf)(__m128)(B), \
5347 (int)(((D)<<2) | (C)), \
5348 (__v4sf)(__m128)(W), \
5349 (__mmask8)(U), (int)(R)))
5350
5351#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5352 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5353 (__v4sf)(__m128)(B), \
5354 (int)(((D)<<2) | (C)), \
5355 (__v4sf)_mm_setzero_ps(), \
5356 (__mmask8)(U), \
5357 _MM_FROUND_CUR_DIRECTION))
5358
5359#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5360 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5361 (__v4sf)(__m128)(B), \
5362 (int)(((D)<<2) | (C)), \
5363 (__v4sf)_mm_setzero_ps(), \
5364 (__mmask8)(U), (int)(R)))
5365
5366static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5368{
5369 return __A;
5370}
5371
5372#define _mm_comi_round_sd(A, B, P, R) \
5373 ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5374 (int)(P), (int)(R)))
5375
5376#define _mm_comi_round_ss(A, B, P, R) \
5377 ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5378 (int)(P), (int)(R)))
5379
5380#ifdef __x86_64__
5381#define _mm_cvt_roundsd_si64(A, R) \
5382 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5383#endif
5384
5385static __inline__ __m512i
5387 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5388}
5389
5390static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5391_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
5392 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5393 (__v16si)_mm512_sll_epi32(__A, __B),
5394 (__v16si)__W);
5395}
5396
5397static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5398_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
5399 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5400 (__v16si)_mm512_sll_epi32(__A, __B),
5401 (__v16si)_mm512_setzero_si512());
5402}
5403
5404static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5405_mm512_sll_epi64(__m512i __A, __m128i __B) {
5406 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5407}
5408
5409static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5410_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
5411 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5412 (__v8di)_mm512_sll_epi64(__A, __B),
5413 (__v8di)__W);
5414}
5415
5416static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5417_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
5418 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5419 (__v8di)_mm512_sll_epi64(__A, __B),
5420 (__v8di)_mm512_setzero_si512());
5421}
5422
5423static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5424_mm512_sllv_epi32(__m512i __X, __m512i __Y) {
5425 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5426}
5427
5428static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5429_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5430 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5431 (__v16si)_mm512_sllv_epi32(__X, __Y),
5432 (__v16si)__W);
5433}
5434
5435static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5436_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5437 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5438 (__v16si)_mm512_sllv_epi32(__X, __Y),
5439 (__v16si)_mm512_setzero_si512());
5440}
5441
5442static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5443_mm512_sllv_epi64(__m512i __X, __m512i __Y)
5444{
5445 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5446}
5447
5448static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5449_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5450{
5451 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5452 (__v8di)_mm512_sllv_epi64(__X, __Y),
5453 (__v8di)__W);
5454}
5455
5456static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5457_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5458{
5459 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5460 (__v8di)_mm512_sllv_epi64(__X, __Y),
5461 (__v8di)_mm512_setzero_si512());
5462}
5463
5464static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5465_mm512_sra_epi32(__m512i __A, __m128i __B) {
5466 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5467}
5468
5469static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5470_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
5471 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5472 (__v16si)_mm512_sra_epi32(__A, __B),
5473 (__v16si)__W);
5474}
5475
5476static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5477_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
5478 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5479 (__v16si)_mm512_sra_epi32(__A, __B),
5480 (__v16si)_mm512_setzero_si512());
5481}
5482
5483static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5484_mm512_sra_epi64(__m512i __A, __m128i __B) {
5485 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5486}
5487
5488static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5489_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
5490 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5491 (__v8di)_mm512_sra_epi64(__A, __B),
5492 (__v8di)__W);
5493}
5494
5495static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5496_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
5497 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5498 (__v8di)_mm512_sra_epi64(__A, __B),
5499 (__v8di)_mm512_setzero_si512());
5500}
5501
5502static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5503_mm512_srav_epi32(__m512i __X, __m512i __Y) {
5504 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5505}
5506
5507static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5508_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5509 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5510 (__v16si)_mm512_srav_epi32(__X, __Y),
5511 (__v16si)__W);
5512}
5513
5514static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5515_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5516 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5517 (__v16si)_mm512_srav_epi32(__X, __Y),
5518 (__v16si)_mm512_setzero_si512());
5519}
5520
5521static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5522_mm512_srav_epi64(__m512i __X, __m512i __Y)
5523{
5524 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5525}
5526
5527static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5528_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5529{
5530 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5531 (__v8di)_mm512_srav_epi64(__X, __Y),
5532 (__v8di)__W);
5533}
5534
5535static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5536_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5537{
5538 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5539 (__v8di)_mm512_srav_epi64(__X, __Y),
5540 (__v8di)_mm512_setzero_si512());
5541}
5542
5543static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5544_mm512_srl_epi32(__m512i __A, __m128i __B) {
5545 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5546}
5547
5548static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5549_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
5550 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5551 (__v16si)_mm512_srl_epi32(__A, __B),
5552 (__v16si)__W);
5553}
5554
5555static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5556_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
5557 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5558 (__v16si)_mm512_srl_epi32(__A, __B),
5559 (__v16si)_mm512_setzero_si512());
5560}
5561
5562static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5563_mm512_srl_epi64(__m512i __A, __m128i __B) {
5564 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5565}
5566
5567static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5568_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
5569 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5570 (__v8di)_mm512_srl_epi64(__A, __B),
5571 (__v8di)__W);
5572}
5573
5574static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5575_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
5576 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5577 (__v8di)_mm512_srl_epi64(__A, __B),
5578 (__v8di)_mm512_setzero_si512());
5579}
5580
5581static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5582_mm512_srlv_epi32(__m512i __X, __m512i __Y) {
5583 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5584}
5585
5586static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5587_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5588 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5589 (__v16si)_mm512_srlv_epi32(__X, __Y),
5590 (__v16si)__W);
5591}
5592
5593static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5594_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5595 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5596 (__v16si)_mm512_srlv_epi32(__X, __Y),
5597 (__v16si)_mm512_setzero_si512());
5598}
5599
5600static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5601_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5602{
5603 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5604}
5605
5606static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5607_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5608{
5609 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5610 (__v8di)_mm512_srlv_epi64(__X, __Y),
5611 (__v8di)__W);
5612}
5613
5614static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5615_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5616{
5617 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5618 (__v8di)_mm512_srlv_epi64(__X, __Y),
5619 (__v8di)_mm512_setzero_si512());
5620}
5621
5622/// \enum _MM_TERNLOG_ENUM
5623/// A helper to represent the ternary logic operations among vector \a A,
5624/// \a B and \a C. The representation is passed to \a imm.
5630
5631#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5632 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5633 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5634 (unsigned char)(imm), (__mmask16)-1))
5635
5636#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5637 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5638 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5639 (unsigned char)(imm), (__mmask16)(U)))
5640
5641#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5642 ((__m512i)__builtin_ia32_pternlogd512_maskz( \
5643 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5644 (unsigned char)(imm), (__mmask16)(U)))
5645
5646#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5647 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5648 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5649 (unsigned char)(imm), (__mmask8)-1))
5650
5651#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5652 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5653 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5654 (unsigned char)(imm), (__mmask8)(U)))
5655
5656#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5657 ((__m512i)__builtin_ia32_pternlogq512_maskz( \
5658 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5659 (unsigned char)(imm), (__mmask8)(U)))
5660
5661#ifdef __x86_64__
5662#define _mm_cvt_roundsd_i64(A, R) \
5663 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5664#endif
5665
5666#define _mm_cvt_roundsd_si32(A, R) \
5667 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5668
5669#define _mm_cvt_roundsd_i32(A, R) \
5670 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5671
5672#define _mm_cvt_roundsd_u32(A, R) \
5673 ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5674
5675static __inline__ unsigned __DEFAULT_FN_ATTRS128
5676_mm_cvtsd_u32 (__m128d __A)
5677{
5678 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5680}
5681
5682#ifdef __x86_64__
5683#define _mm_cvt_roundsd_u64(A, R) \
5684 ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5685 (int)(R)))
5686
5687static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5688_mm_cvtsd_u64 (__m128d __A)
5689{
5690 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5691 __A,
5693}
5694#endif
5695
5696#define _mm_cvt_roundss_si32(A, R) \
5697 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5698
5699#define _mm_cvt_roundss_i32(A, R) \
5700 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5701
5702#ifdef __x86_64__
5703#define _mm_cvt_roundss_si64(A, R) \
5704 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5705
5706#define _mm_cvt_roundss_i64(A, R) \
5707 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5708#endif
5709
5710#define _mm_cvt_roundss_u32(A, R) \
5711 ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
5712
5713static __inline__ unsigned __DEFAULT_FN_ATTRS128
5714_mm_cvtss_u32 (__m128 __A)
5715{
5716 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
5718}
5719
5720#ifdef __x86_64__
5721#define _mm_cvt_roundss_u64(A, R) \
5722 ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
5723 (int)(R)))
5724
5725static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5726_mm_cvtss_u64 (__m128 __A)
5727{
5728 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
5729 __A,
5731}
5732#endif
5733
5734#define _mm_cvtt_roundsd_i32(A, R) \
5735 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5736
5737#define _mm_cvtt_roundsd_si32(A, R) \
5738 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5739
5740static __inline__ int __DEFAULT_FN_ATTRS128
5741_mm_cvttsd_i32 (__m128d __A)
5742{
5743 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
5745}
5746
5747#ifdef __x86_64__
5748#define _mm_cvtt_roundsd_si64(A, R) \
5749 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5750
5751#define _mm_cvtt_roundsd_i64(A, R) \
5752 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5753
5754static __inline__ long long __DEFAULT_FN_ATTRS128
5755_mm_cvttsd_i64 (__m128d __A)
5756{
5757 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
5759}
5760#endif
5761
5762#define _mm_cvtt_roundsd_u32(A, R) \
5763 ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5764
5765static __inline__ unsigned __DEFAULT_FN_ATTRS128
5766_mm_cvttsd_u32 (__m128d __A)
5767{
5768 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
5770}
5771
5772#ifdef __x86_64__
5773#define _mm_cvtt_roundsd_u64(A, R) \
5774 ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
5775 (int)(R)))
5776
5777static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5778_mm_cvttsd_u64 (__m128d __A)
5779{
5780 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
5781 __A,
5783}
5784#endif
5785
5786#define _mm_cvtt_roundss_i32(A, R) \
5787 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5788
5789#define _mm_cvtt_roundss_si32(A, R) \
5790 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5791
5792static __inline__ int __DEFAULT_FN_ATTRS128
5793_mm_cvttss_i32 (__m128 __A)
5794{
5795 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
5797}
5798
5799#ifdef __x86_64__
5800#define _mm_cvtt_roundss_i64(A, R) \
5801 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5802
5803#define _mm_cvtt_roundss_si64(A, R) \
5804 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5805
5806static __inline__ long long __DEFAULT_FN_ATTRS128
5807_mm_cvttss_i64 (__m128 __A)
5808{
5809 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
5811}
5812#endif
5813
5814#define _mm_cvtt_roundss_u32(A, R) \
5815 ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
5816
5817static __inline__ unsigned __DEFAULT_FN_ATTRS128
5818_mm_cvttss_u32 (__m128 __A)
5819{
5820 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
5822}
5823
5824#ifdef __x86_64__
5825#define _mm_cvtt_roundss_u64(A, R) \
5826 ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
5827 (int)(R)))
5828
5829static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5830_mm_cvttss_u64 (__m128 __A)
5831{
5832 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
5833 __A,
5835}
5836#endif
5837
5838#define _mm512_permute_pd(X, C) \
5839 ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
5840
5841#define _mm512_mask_permute_pd(W, U, X, C) \
5842 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5843 (__v8df)_mm512_permute_pd((X), (C)), \
5844 (__v8df)(__m512d)(W)))
5845
5846#define _mm512_maskz_permute_pd(U, X, C) \
5847 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5848 (__v8df)_mm512_permute_pd((X), (C)), \
5849 (__v8df)_mm512_setzero_pd()))
5850
5851#define _mm512_permute_ps(X, C) \
5852 ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
5853
5854#define _mm512_mask_permute_ps(W, U, X, C) \
5855 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5856 (__v16sf)_mm512_permute_ps((X), (C)), \
5857 (__v16sf)(__m512)(W)))
5858
5859#define _mm512_maskz_permute_ps(U, X, C) \
5860 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5861 (__v16sf)_mm512_permute_ps((X), (C)), \
5862 (__v16sf)_mm512_setzero_ps()))
5863
5864static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5865_mm512_permutevar_pd(__m512d __A, __m512i __C) {
5866 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
5867}
5868
5869static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5870_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) {
5871 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5872 (__v8df)_mm512_permutevar_pd(__A, __C),
5873 (__v8df)__W);
5874}
5875
5876static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5877_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
5878 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5879 (__v8df)_mm512_permutevar_pd(__A, __C),
5880 (__v8df)_mm512_setzero_pd());
5881}
5882
5883static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5884_mm512_permutevar_ps(__m512 __A, __m512i __C) {
5885 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
5886}
5887
5888static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5889_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) {
5890 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
5891 (__v16sf)_mm512_permutevar_ps(__A, __C),
5892 (__v16sf)__W);
5893}
5894
5895static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5896_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
5897 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
5898 (__v16sf)_mm512_permutevar_ps(__A, __C),
5899 (__v16sf)_mm512_setzero_ps());
5900}
5901
5902static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5903_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) {
5904 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
5905 (__v8df)__B);
5906}
5907
5908static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5909_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I,
5910 __m512d __B) {
5911 return (__m512d)__builtin_ia32_selectpd_512(__U,
5912 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5913 (__v8df)__A);
5914}
5915
5916static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5917_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
5918 __m512d __B) {
5919 return (__m512d)__builtin_ia32_selectpd_512(__U,
5920 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5921 (__v8df)(__m512d)__I);
5922}
5923
5924static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5925_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
5926 __m512d __B) {
5927 return (__m512d)__builtin_ia32_selectpd_512(__U,
5928 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5929 (__v8df)_mm512_setzero_pd());
5930}
5931
5932static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5933_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) {
5934 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
5935 (__v16sf) __B);
5936}
5937
5938static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5939_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I,
5940 __m512 __B) {
5941 return (__m512)__builtin_ia32_selectps_512(__U,
5942 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5943 (__v16sf)__A);
5944}
5945
5946static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5947_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U,
5948 __m512 __B) {
5949 return (__m512)__builtin_ia32_selectps_512(__U,
5950 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5951 (__v16sf)(__m512)__I);
5952}
5953
5954static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5955_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I,
5956 __m512 __B) {
5957 return (__m512)__builtin_ia32_selectps_512(__U,
5958 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5959 (__v16sf)_mm512_setzero_ps());
5960}
5961
5962#define _mm512_cvtt_roundpd_epu32(A, R) \
5963 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5964 (__v8si)_mm256_undefined_si256(), \
5965 (__mmask8)-1, (int)(R)))
5966
5967#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
5968 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5969 (__v8si)(__m256i)(W), \
5970 (__mmask8)(U), (int)(R)))
5971
5972#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
5973 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5974 (__v8si)_mm256_setzero_si256(), \
5975 (__mmask8)(U), (int)(R)))
5976
5977static __inline__ __m256i __DEFAULT_FN_ATTRS512
5979{
5980 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5981 (__v8si)
5983 (__mmask8) -1,
5985}
5986
5987static __inline__ __m256i __DEFAULT_FN_ATTRS512
5988_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
5989{
5990 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5991 (__v8si) __W,
5992 (__mmask8) __U,
5994}
5995
5996static __inline__ __m256i __DEFAULT_FN_ATTRS512
5998{
5999 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6000 (__v8si)
6002 (__mmask8) __U,
6004}
6005
6006#define _mm_roundscale_round_sd(A, B, imm, R) \
6007 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6008 (__v2df)(__m128d)(B), \
6009 (__v2df)_mm_setzero_pd(), \
6010 (__mmask8)-1, (int)(imm), \
6011 (int)(R)))
6012
6013#define _mm_roundscale_sd(A, B, imm) \
6014 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6015 (__v2df)(__m128d)(B), \
6016 (__v2df)_mm_setzero_pd(), \
6017 (__mmask8)-1, (int)(imm), \
6018 _MM_FROUND_CUR_DIRECTION))
6019
6020#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6021 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6022 (__v2df)(__m128d)(B), \
6023 (__v2df)(__m128d)(W), \
6024 (__mmask8)(U), (int)(imm), \
6025 _MM_FROUND_CUR_DIRECTION))
6026
6027#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6028 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6029 (__v2df)(__m128d)(B), \
6030 (__v2df)(__m128d)(W), \
6031 (__mmask8)(U), (int)(I), \
6032 (int)(R)))
6033
6034#define _mm_maskz_roundscale_sd(U, A, B, I) \
6035 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6036 (__v2df)(__m128d)(B), \
6037 (__v2df)_mm_setzero_pd(), \
6038 (__mmask8)(U), (int)(I), \
6039 _MM_FROUND_CUR_DIRECTION))
6040
6041#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6042 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6043 (__v2df)(__m128d)(B), \
6044 (__v2df)_mm_setzero_pd(), \
6045 (__mmask8)(U), (int)(I), \
6046 (int)(R)))
6047
6048#define _mm_roundscale_round_ss(A, B, imm, R) \
6049 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6050 (__v4sf)(__m128)(B), \
6051 (__v4sf)_mm_setzero_ps(), \
6052 (__mmask8)-1, (int)(imm), \
6053 (int)(R)))
6054
6055#define _mm_roundscale_ss(A, B, imm) \
6056 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6057 (__v4sf)(__m128)(B), \
6058 (__v4sf)_mm_setzero_ps(), \
6059 (__mmask8)-1, (int)(imm), \
6060 _MM_FROUND_CUR_DIRECTION))
6061
6062#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6063 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6064 (__v4sf)(__m128)(B), \
6065 (__v4sf)(__m128)(W), \
6066 (__mmask8)(U), (int)(I), \
6067 _MM_FROUND_CUR_DIRECTION))
6068
6069#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6070 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6071 (__v4sf)(__m128)(B), \
6072 (__v4sf)(__m128)(W), \
6073 (__mmask8)(U), (int)(I), \
6074 (int)(R)))
6075
6076#define _mm_maskz_roundscale_ss(U, A, B, I) \
6077 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6078 (__v4sf)(__m128)(B), \
6079 (__v4sf)_mm_setzero_ps(), \
6080 (__mmask8)(U), (int)(I), \
6081 _MM_FROUND_CUR_DIRECTION))
6082
6083#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6084 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6085 (__v4sf)(__m128)(B), \
6086 (__v4sf)_mm_setzero_ps(), \
6087 (__mmask8)(U), (int)(I), \
6088 (int)(R)))
6089
6090#define _mm512_scalef_round_pd(A, B, R) \
6091 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6092 (__v8df)(__m512d)(B), \
6093 (__v8df)_mm512_undefined_pd(), \
6094 (__mmask8)-1, (int)(R)))
6095
6096#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6097 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6098 (__v8df)(__m512d)(B), \
6099 (__v8df)(__m512d)(W), \
6100 (__mmask8)(U), (int)(R)))
6101
6102#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6103 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6104 (__v8df)(__m512d)(B), \
6105 (__v8df)_mm512_setzero_pd(), \
6106 (__mmask8)(U), (int)(R)))
6107
6108static __inline__ __m512d __DEFAULT_FN_ATTRS512
6109_mm512_scalef_pd (__m512d __A, __m512d __B)
6110{
6111 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6112 (__v8df) __B,
6113 (__v8df)
6115 (__mmask8) -1,
6117}
6118
6119static __inline__ __m512d __DEFAULT_FN_ATTRS512
6120_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6121{
6122 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6123 (__v8df) __B,
6124 (__v8df) __W,
6125 (__mmask8) __U,
6127}
6128
6129static __inline__ __m512d __DEFAULT_FN_ATTRS512
6130_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6131{
6132 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6133 (__v8df) __B,
6134 (__v8df)
6136 (__mmask8) __U,
6138}
6139
6140#define _mm512_scalef_round_ps(A, B, R) \
6141 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6142 (__v16sf)(__m512)(B), \
6143 (__v16sf)_mm512_undefined_ps(), \
6144 (__mmask16)-1, (int)(R)))
6145
6146#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6147 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6148 (__v16sf)(__m512)(B), \
6149 (__v16sf)(__m512)(W), \
6150 (__mmask16)(U), (int)(R)))
6151
6152#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6153 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6154 (__v16sf)(__m512)(B), \
6155 (__v16sf)_mm512_setzero_ps(), \
6156 (__mmask16)(U), (int)(R)))
6157
6158static __inline__ __m512 __DEFAULT_FN_ATTRS512
6159_mm512_scalef_ps (__m512 __A, __m512 __B)
6160{
6161 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6162 (__v16sf) __B,
6163 (__v16sf)
6165 (__mmask16) -1,
6167}
6168
6169static __inline__ __m512 __DEFAULT_FN_ATTRS512
6170_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6171{
6172 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6173 (__v16sf) __B,
6174 (__v16sf) __W,
6175 (__mmask16) __U,
6177}
6178
6179static __inline__ __m512 __DEFAULT_FN_ATTRS512
6180_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6181{
6182 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6183 (__v16sf) __B,
6184 (__v16sf)
6186 (__mmask16) __U,
6188}
6189
6190#define _mm_scalef_round_sd(A, B, R) \
6191 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6192 (__v2df)(__m128d)(B), \
6193 (__v2df)_mm_setzero_pd(), \
6194 (__mmask8)-1, (int)(R)))
6195
6196static __inline__ __m128d __DEFAULT_FN_ATTRS128
6197_mm_scalef_sd (__m128d __A, __m128d __B)
6198{
6199 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6200 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6201 (__mmask8) -1,
6203}
6204
6205static __inline__ __m128d __DEFAULT_FN_ATTRS128
6206_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6207{
6208 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6209 (__v2df) __B,
6210 (__v2df) __W,
6211 (__mmask8) __U,
6213}
6214
6215#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6216 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6217 (__v2df)(__m128d)(B), \
6218 (__v2df)(__m128d)(W), \
6219 (__mmask8)(U), (int)(R)))
6220
6221static __inline__ __m128d __DEFAULT_FN_ATTRS128
6222_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6223{
6224 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6225 (__v2df) __B,
6226 (__v2df) _mm_setzero_pd (),
6227 (__mmask8) __U,
6229}
6230
6231#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6232 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6233 (__v2df)(__m128d)(B), \
6234 (__v2df)_mm_setzero_pd(), \
6235 (__mmask8)(U), (int)(R)))
6236
6237#define _mm_scalef_round_ss(A, B, R) \
6238 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6239 (__v4sf)(__m128)(B), \
6240 (__v4sf)_mm_setzero_ps(), \
6241 (__mmask8)-1, (int)(R)))
6242
6243static __inline__ __m128 __DEFAULT_FN_ATTRS128
6244_mm_scalef_ss (__m128 __A, __m128 __B)
6245{
6246 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6247 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6248 (__mmask8) -1,
6250}
6251
6252static __inline__ __m128 __DEFAULT_FN_ATTRS128
6253_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6254{
6255 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6256 (__v4sf) __B,
6257 (__v4sf) __W,
6258 (__mmask8) __U,
6260}
6261
6262#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6263 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6264 (__v4sf)(__m128)(B), \
6265 (__v4sf)(__m128)(W), \
6266 (__mmask8)(U), (int)(R)))
6267
6268static __inline__ __m128 __DEFAULT_FN_ATTRS128
6269_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6270{
6271 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6272 (__v4sf) __B,
6273 (__v4sf) _mm_setzero_ps (),
6274 (__mmask8) __U,
6276}
6277
6278#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6279 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6280 (__v4sf)(__m128)(B), \
6281 (__v4sf)_mm_setzero_ps(), \
6282 (__mmask8)(U), \
6283 (int)(R)))
6284
6285static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6286_mm512_srai_epi32(__m512i __A, unsigned int __B) {
6287 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B);
6288}
6289
6290static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6291_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
6292 unsigned int __B) {
6293 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6294 (__v16si)_mm512_srai_epi32(__A, __B),
6295 (__v16si)__W);
6296}
6297
6298static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6299_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
6300 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6301 (__v16si)_mm512_srai_epi32(__A, __B),
6302 (__v16si)_mm512_setzero_si512());
6303}
6304
6305static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6306_mm512_srai_epi64(__m512i __A, unsigned int __B) {
6307 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B);
6308}
6309
6310static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6311_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A,
6312 unsigned int __B) {
6313 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6314 (__v8di)_mm512_srai_epi64(__A, __B),
6315 (__v8di)__W);
6316}
6317
6318static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6319_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
6320 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6321 (__v8di)_mm512_srai_epi64(__A, __B),
6322 (__v8di)_mm512_setzero_si512());
6323}
6324
6325#define _mm512_shuffle_f32x4(A, B, imm) \
6326 ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6327 (__v16sf)(__m512)(B), (int)(imm)))
6328
6329#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6330 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6331 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6332 (__v16sf)(__m512)(W)))
6333
6334#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6335 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6336 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6337 (__v16sf)_mm512_setzero_ps()))
6338
6339#define _mm512_shuffle_f64x2(A, B, imm) \
6340 ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6341 (__v8df)(__m512d)(B), (int)(imm)))
6342
6343#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6344 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6345 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6346 (__v8df)(__m512d)(W)))
6347
6348#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6349 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6350 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6351 (__v8df)_mm512_setzero_pd()))
6352
6353#define _mm512_shuffle_i32x4(A, B, imm) \
6354 ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6355 (__v16si)(__m512i)(B), (int)(imm)))
6356
6357#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6358 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6359 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6360 (__v16si)(__m512i)(W)))
6361
6362#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6363 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6364 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6365 (__v16si)_mm512_setzero_si512()))
6366
6367#define _mm512_shuffle_i64x2(A, B, imm) \
6368 ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6369 (__v8di)(__m512i)(B), (int)(imm)))
6370
6371#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6372 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6373 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6374 (__v8di)(__m512i)(W)))
6375
6376#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6377 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6378 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6379 (__v8di)_mm512_setzero_si512()))
6380
6381#define _mm512_shuffle_pd(A, B, M) \
6382 ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6383 (__v8df)(__m512d)(B), (int)(M)))
6384
6385#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6386 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6387 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6388 (__v8df)(__m512d)(W)))
6389
6390#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6391 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6392 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6393 (__v8df)_mm512_setzero_pd()))
6394
6395#define _mm512_shuffle_ps(A, B, M) \
6396 ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6397 (__v16sf)(__m512)(B), (int)(M)))
6398
6399#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6400 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6401 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6402 (__v16sf)(__m512)(W)))
6403
6404#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6405 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6406 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6407 (__v16sf)_mm512_setzero_ps()))
6408
6409#define _mm_sqrt_round_sd(A, B, R) \
6410 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6411 (__v2df)(__m128d)(B), \
6412 (__v2df)_mm_setzero_pd(), \
6413 (__mmask8)-1, (int)(R)))
6414
6415static __inline__ __m128d __DEFAULT_FN_ATTRS128
6416_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6417{
6418 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6419 (__v2df) __B,
6420 (__v2df) __W,
6421 (__mmask8) __U,
6423}
6424
6425#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6426 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6427 (__v2df)(__m128d)(B), \
6428 (__v2df)(__m128d)(W), \
6429 (__mmask8)(U), (int)(R)))
6430
6431static __inline__ __m128d __DEFAULT_FN_ATTRS128
6432_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6433{
6434 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6435 (__v2df) __B,
6436 (__v2df) _mm_setzero_pd (),
6437 (__mmask8) __U,
6439}
6440
6441#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6442 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6443 (__v2df)(__m128d)(B), \
6444 (__v2df)_mm_setzero_pd(), \
6445 (__mmask8)(U), (int)(R)))
6446
6447#define _mm_sqrt_round_ss(A, B, R) \
6448 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6449 (__v4sf)(__m128)(B), \
6450 (__v4sf)_mm_setzero_ps(), \
6451 (__mmask8)-1, (int)(R)))
6452
6453static __inline__ __m128 __DEFAULT_FN_ATTRS128
6454_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6455{
6456 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6457 (__v4sf) __B,
6458 (__v4sf) __W,
6459 (__mmask8) __U,
6461}
6462
6463#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6464 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6465 (__v4sf)(__m128)(B), \
6466 (__v4sf)(__m128)(W), (__mmask8)(U), \
6467 (int)(R)))
6468
6469static __inline__ __m128 __DEFAULT_FN_ATTRS128
6470_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6471{
6472 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6473 (__v4sf) __B,
6474 (__v4sf) _mm_setzero_ps (),
6475 (__mmask8) __U,
6477}
6478
6479#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6480 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6481 (__v4sf)(__m128)(B), \
6482 (__v4sf)_mm_setzero_ps(), \
6483 (__mmask8)(U), (int)(R)))
6484
6485static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6487 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6488 0, 1, 2, 3, 0, 1, 2, 3,
6489 0, 1, 2, 3, 0, 1, 2, 3);
6490}
6491
6492static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6493_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) {
6494 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6495 (__v16sf)_mm512_broadcast_f32x4(__A),
6496 (__v16sf)__O);
6497}
6498
6499static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6501 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6502 (__v16sf)_mm512_broadcast_f32x4(__A),
6503 (__v16sf)_mm512_setzero_ps());
6504}
6505
6506static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6508 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6509 0, 1, 2, 3, 0, 1, 2, 3);
6510}
6511
6512static __inline__ __m512d __DEFAULT_FN_ATTRS512
6513_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
6514{
6515 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6516 (__v8df)_mm512_broadcast_f64x4(__A),
6517 (__v8df)__O);
6518}
6519
6520static __inline__ __m512d __DEFAULT_FN_ATTRS512
6522{
6523 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6524 (__v8df)_mm512_broadcast_f64x4(__A),
6525 (__v8df)_mm512_setzero_pd());
6526}
6527
6528static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6530 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6531 0, 1, 2, 3, 0, 1, 2, 3,
6532 0, 1, 2, 3, 0, 1, 2, 3);
6533}
6534
6535static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6536_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) {
6537 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6538 (__v16si)_mm512_broadcast_i32x4(__A),
6539 (__v16si)__O);
6540}
6541
6542static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6544 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6545 (__v16si)_mm512_broadcast_i32x4(__A),
6546 (__v16si)_mm512_setzero_si512());
6547}
6548
6549static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6551 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6552 0, 1, 2, 3, 0, 1, 2, 3);
6553}
6554
6555static __inline__ __m512i __DEFAULT_FN_ATTRS512
6556_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
6557{
6558 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6559 (__v8di)_mm512_broadcast_i64x4(__A),
6560 (__v8di)__O);
6561}
6562
6563static __inline__ __m512i __DEFAULT_FN_ATTRS512
6565{
6566 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6567 (__v8di)_mm512_broadcast_i64x4(__A),
6568 (__v8di)_mm512_setzero_si512());
6569}
6570
6571static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6572_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) {
6573 return (__m512d)__builtin_ia32_selectpd_512(__M,
6574 (__v8df) _mm512_broadcastsd_pd(__A),
6575 (__v8df) __O);
6576}
6577
6578static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6580 return (__m512d)__builtin_ia32_selectpd_512(__M,
6581 (__v8df) _mm512_broadcastsd_pd(__A),
6582 (__v8df) _mm512_setzero_pd());
6583}
6584
6585static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6586_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) {
6587 return (__m512)__builtin_ia32_selectps_512(__M,
6588 (__v16sf) _mm512_broadcastss_ps(__A),
6589 (__v16sf) __O);
6590}
6591
6592static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6594 return (__m512)__builtin_ia32_selectps_512(__M,
6595 (__v16sf) _mm512_broadcastss_ps(__A),
6596 (__v16sf) _mm512_setzero_ps());
6597}
6598
6599static __inline__ __m128i __DEFAULT_FN_ATTRS512
6601{
6602 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6603 (__v16qi) _mm_undefined_si128 (),
6604 (__mmask16) -1);
6605}
6606
6607static __inline__ __m128i __DEFAULT_FN_ATTRS512
6608_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6609{
6610 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6611 (__v16qi) __O, __M);
6612}
6613
6614static __inline__ __m128i __DEFAULT_FN_ATTRS512
6616{
6617 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6618 (__v16qi) _mm_setzero_si128 (),
6619 __M);
6620}
6621
6622static __inline__ void __DEFAULT_FN_ATTRS512
6624{
6625 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6626}
6627
6628static __inline__ __m256i __DEFAULT_FN_ATTRS512
6630{
6631 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6632 (__v16hi) _mm256_undefined_si256 (),
6633 (__mmask16) -1);
6634}
6635
6636static __inline__ __m256i __DEFAULT_FN_ATTRS512
6637_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6638{
6639 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6640 (__v16hi) __O, __M);
6641}
6642
6643static __inline__ __m256i __DEFAULT_FN_ATTRS512
6645{
6646 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6647 (__v16hi) _mm256_setzero_si256 (),
6648 __M);
6649}
6650
6651static __inline__ void __DEFAULT_FN_ATTRS512
6653{
6654 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6655}
6656
6657static __inline__ __m128i __DEFAULT_FN_ATTRS512
6659{
6660 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6661 (__v16qi) _mm_undefined_si128 (),
6662 (__mmask8) -1);
6663}
6664
6665static __inline__ __m128i __DEFAULT_FN_ATTRS512
6666_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6667{
6668 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6669 (__v16qi) __O, __M);
6670}
6671
6672static __inline__ __m128i __DEFAULT_FN_ATTRS512
6674{
6675 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6676 (__v16qi) _mm_setzero_si128 (),
6677 __M);
6678}
6679
6680static __inline__ void __DEFAULT_FN_ATTRS512
6682{
6683 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6684}
6685
6686static __inline__ __m256i __DEFAULT_FN_ATTRS512
6688{
6689 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6690 (__v8si) _mm256_undefined_si256 (),
6691 (__mmask8) -1);
6692}
6693
6694static __inline__ __m256i __DEFAULT_FN_ATTRS512
6695_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6696{
6697 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6698 (__v8si) __O, __M);
6699}
6700
6701static __inline__ __m256i __DEFAULT_FN_ATTRS512
6703{
6704 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6705 (__v8si) _mm256_setzero_si256 (),
6706 __M);
6707}
6708
6709static __inline__ void __DEFAULT_FN_ATTRS512
6711{
6712 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
6713}
6714
6715static __inline__ __m128i __DEFAULT_FN_ATTRS512
6717{
6718 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6719 (__v8hi) _mm_undefined_si128 (),
6720 (__mmask8) -1);
6721}
6722
6723static __inline__ __m128i __DEFAULT_FN_ATTRS512
6724_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6725{
6726 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6727 (__v8hi) __O, __M);
6728}
6729
6730static __inline__ __m128i __DEFAULT_FN_ATTRS512
6732{
6733 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6734 (__v8hi) _mm_setzero_si128 (),
6735 __M);
6736}
6737
6738static __inline__ void __DEFAULT_FN_ATTRS512
6740{
6741 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
6742}
6743
6744static __inline__ __m128i __DEFAULT_FN_ATTRS512
6746{
6747 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6748 (__v16qi) _mm_undefined_si128 (),
6749 (__mmask16) -1);
6750}
6751
6752static __inline__ __m128i __DEFAULT_FN_ATTRS512
6753_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6754{
6755 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6756 (__v16qi) __O,
6757 __M);
6758}
6759
6760static __inline__ __m128i __DEFAULT_FN_ATTRS512
6762{
6763 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6764 (__v16qi) _mm_setzero_si128 (),
6765 __M);
6766}
6767
6768static __inline__ void __DEFAULT_FN_ATTRS512
6770{
6771 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6772}
6773
6774static __inline__ __m256i __DEFAULT_FN_ATTRS512
6776{
6777 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6778 (__v16hi) _mm256_undefined_si256 (),
6779 (__mmask16) -1);
6780}
6781
6782static __inline__ __m256i __DEFAULT_FN_ATTRS512
6783_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6784{
6785 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6786 (__v16hi) __O,
6787 __M);
6788}
6789
6790static __inline__ __m256i __DEFAULT_FN_ATTRS512
6792{
6793 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6794 (__v16hi) _mm256_setzero_si256 (),
6795 __M);
6796}
6797
6798static __inline__ void __DEFAULT_FN_ATTRS512
6800{
6801 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6802}
6803
6804static __inline__ __m128i __DEFAULT_FN_ATTRS512
6806{
6807 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6808 (__v16qi) _mm_undefined_si128 (),
6809 (__mmask8) -1);
6810}
6811
6812static __inline__ __m128i __DEFAULT_FN_ATTRS512
6813_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6814{
6815 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6816 (__v16qi) __O,
6817 __M);
6818}
6819
6820static __inline__ __m128i __DEFAULT_FN_ATTRS512
6822{
6823 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6824 (__v16qi) _mm_setzero_si128 (),
6825 __M);
6826}
6827
6828static __inline__ void __DEFAULT_FN_ATTRS512
6830{
6831 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6832}
6833
6834static __inline__ __m256i __DEFAULT_FN_ATTRS512
6836{
6837 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6838 (__v8si) _mm256_undefined_si256 (),
6839 (__mmask8) -1);
6840}
6841
6842static __inline__ __m256i __DEFAULT_FN_ATTRS512
6843_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6844{
6845 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6846 (__v8si) __O, __M);
6847}
6848
6849static __inline__ __m256i __DEFAULT_FN_ATTRS512
6851{
6852 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6853 (__v8si) _mm256_setzero_si256 (),
6854 __M);
6855}
6856
6857static __inline__ void __DEFAULT_FN_ATTRS512
6859{
6860 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
6861}
6862
6863static __inline__ __m128i __DEFAULT_FN_ATTRS512
6865{
6866 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6867 (__v8hi) _mm_undefined_si128 (),
6868 (__mmask8) -1);
6869}
6870
6871static __inline__ __m128i __DEFAULT_FN_ATTRS512
6872_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6873{
6874 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6875 (__v8hi) __O, __M);
6876}
6877
6878static __inline__ __m128i __DEFAULT_FN_ATTRS512
6880{
6881 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6882 (__v8hi) _mm_setzero_si128 (),
6883 __M);
6884}
6885
6886static __inline__ void __DEFAULT_FN_ATTRS512
6888{
6889 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
6890}
6891
6892static __inline__ __m128i __DEFAULT_FN_ATTRS512
6894{
6895 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6896 (__v16qi) _mm_undefined_si128 (),
6897 (__mmask16) -1);
6898}
6899
6900static __inline__ __m128i __DEFAULT_FN_ATTRS512
6901_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6902{
6903 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6904 (__v16qi) __O, __M);
6905}
6906
6907static __inline__ __m128i __DEFAULT_FN_ATTRS512
6909{
6910 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6911 (__v16qi) _mm_setzero_si128 (),
6912 __M);
6913}
6914
6915static __inline__ void __DEFAULT_FN_ATTRS512
6917{
6918 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6919}
6920
6921static __inline__ __m256i __DEFAULT_FN_ATTRS512
6923{
6924 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6925 (__v16hi) _mm256_undefined_si256 (),
6926 (__mmask16) -1);
6927}
6928
6929static __inline__ __m256i __DEFAULT_FN_ATTRS512
6930_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6931{
6932 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6933 (__v16hi) __O, __M);
6934}
6935
6936static __inline__ __m256i __DEFAULT_FN_ATTRS512
6938{
6939 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6940 (__v16hi) _mm256_setzero_si256 (),
6941 __M);
6942}
6943
6944static __inline__ void __DEFAULT_FN_ATTRS512
6946{
6947 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
6948}
6949
6950static __inline__ __m128i __DEFAULT_FN_ATTRS512
6952{
6953 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6954 (__v16qi) _mm_undefined_si128 (),
6955 (__mmask8) -1);
6956}
6957
6958static __inline__ __m128i __DEFAULT_FN_ATTRS512
6959_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6960{
6961 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6962 (__v16qi) __O, __M);
6963}
6964
6965static __inline__ __m128i __DEFAULT_FN_ATTRS512
6967{
6968 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6969 (__v16qi) _mm_setzero_si128 (),
6970 __M);
6971}
6972
6973static __inline__ void __DEFAULT_FN_ATTRS512
6975{
6976 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6977}
6978
6979static __inline__ __m256i __DEFAULT_FN_ATTRS512
6981{
6982 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6983 (__v8si) _mm256_undefined_si256 (),
6984 (__mmask8) -1);
6985}
6986
6987static __inline__ __m256i __DEFAULT_FN_ATTRS512
6988_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6989{
6990 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6991 (__v8si) __O, __M);
6992}
6993
6994static __inline__ __m256i __DEFAULT_FN_ATTRS512
6996{
6997 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6998 (__v8si) _mm256_setzero_si256 (),
6999 __M);
7000}
7001
7002static __inline__ void __DEFAULT_FN_ATTRS512
7004{
7005 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7006}
7007
7008static __inline__ __m128i __DEFAULT_FN_ATTRS512
7010{
7011 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7012 (__v8hi) _mm_undefined_si128 (),
7013 (__mmask8) -1);
7014}
7015
7016static __inline__ __m128i __DEFAULT_FN_ATTRS512
7017_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7018{
7019 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7020 (__v8hi) __O, __M);
7021}
7022
7023static __inline__ __m128i __DEFAULT_FN_ATTRS512
7025{
7026 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7027 (__v8hi) _mm_setzero_si128 (),
7028 __M);
7029}
7030
7031static __inline__ void __DEFAULT_FN_ATTRS512
7033{
7034 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7035}
7036
7037#define _mm512_extracti32x4_epi32(A, imm) \
7038 ((__m128i)__builtin_ia32_extracti32x4_mask( \
7039 (__v16si)(__m512i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \
7040 (__mmask8) - 1))
7041
7042#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7043 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7044 (__v4si)(__m128i)(W), \
7045 (__mmask8)(U)))
7046
7047#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7048 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7049 (__v4si)_mm_setzero_si128(), \
7050 (__mmask8)(U)))
7051
7052#define _mm512_extracti64x4_epi64(A, imm) \
7053 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7054 (__v4di)_mm256_setzero_si256(), \
7055 (__mmask8) - 1))
7056
7057#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7058 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7059 (__v4di)(__m256i)(W), \
7060 (__mmask8)(U)))
7061
7062#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7063 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7064 (__v4di)_mm256_setzero_si256(), \
7065 (__mmask8)(U)))
7066
7067#define _mm512_insertf64x4(A, B, imm) \
7068 ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7069 (__v4df)(__m256d)(B), (int)(imm)))
7070
7071#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7072 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7073 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7074 (__v8df)(__m512d)(W)))
7075
7076#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7077 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7078 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7079 (__v8df)_mm512_setzero_pd()))
7080
7081#define _mm512_inserti64x4(A, B, imm) \
7082 ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7083 (__v4di)(__m256i)(B), (int)(imm)))
7084
7085#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7086 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7087 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7088 (__v8di)(__m512i)(W)))
7089
7090#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7091 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7092 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7093 (__v8di)_mm512_setzero_si512()))
7094
7095#define _mm512_insertf32x4(A, B, imm) \
7096 ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7097 (__v4sf)(__m128)(B), (int)(imm)))
7098
7099#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7100 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7101 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7102 (__v16sf)(__m512)(W)))
7103
7104#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7105 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7106 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7107 (__v16sf)_mm512_setzero_ps()))
7108
7109#define _mm512_inserti32x4(A, B, imm) \
7110 ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7111 (__v4si)(__m128i)(B), (int)(imm)))
7112
7113#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7114 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7115 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7116 (__v16si)(__m512i)(W)))
7117
7118#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7119 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7120 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7121 (__v16si)_mm512_setzero_si512()))
7122
7123#define _mm512_getmant_round_pd(A, B, C, R) \
7124 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7125 (int)(((C)<<2) | (B)), \
7126 (__v8df)_mm512_undefined_pd(), \
7127 (__mmask8)-1, (int)(R)))
7128
7129#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7130 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7131 (int)(((C)<<2) | (B)), \
7132 (__v8df)(__m512d)(W), \
7133 (__mmask8)(U), (int)(R)))
7134
7135#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7136 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7137 (int)(((C)<<2) | (B)), \
7138 (__v8df)_mm512_setzero_pd(), \
7139 (__mmask8)(U), (int)(R)))
7140
7141#define _mm512_getmant_pd(A, B, C) \
7142 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7143 (int)(((C)<<2) | (B)), \
7144 (__v8df)_mm512_setzero_pd(), \
7145 (__mmask8)-1, \
7146 _MM_FROUND_CUR_DIRECTION))
7147
7148#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7149 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7150 (int)(((C)<<2) | (B)), \
7151 (__v8df)(__m512d)(W), \
7152 (__mmask8)(U), \
7153 _MM_FROUND_CUR_DIRECTION))
7154
7155#define _mm512_maskz_getmant_pd(U, A, B, C) \
7156 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7157 (int)(((C)<<2) | (B)), \
7158 (__v8df)_mm512_setzero_pd(), \
7159 (__mmask8)(U), \
7160 _MM_FROUND_CUR_DIRECTION))
7161
7162#define _mm512_getmant_round_ps(A, B, C, R) \
7163 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7164 (int)(((C)<<2) | (B)), \
7165 (__v16sf)_mm512_undefined_ps(), \
7166 (__mmask16)-1, (int)(R)))
7167
7168#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7169 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7170 (int)(((C)<<2) | (B)), \
7171 (__v16sf)(__m512)(W), \
7172 (__mmask16)(U), (int)(R)))
7173
7174#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7175 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7176 (int)(((C)<<2) | (B)), \
7177 (__v16sf)_mm512_setzero_ps(), \
7178 (__mmask16)(U), (int)(R)))
7179
7180#define _mm512_getmant_ps(A, B, C) \
7181 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7182 (int)(((C)<<2)|(B)), \
7183 (__v16sf)_mm512_undefined_ps(), \
7184 (__mmask16)-1, \
7185 _MM_FROUND_CUR_DIRECTION))
7186
7187#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7188 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7189 (int)(((C)<<2)|(B)), \
7190 (__v16sf)(__m512)(W), \
7191 (__mmask16)(U), \
7192 _MM_FROUND_CUR_DIRECTION))
7193
7194#define _mm512_maskz_getmant_ps(U, A, B, C) \
7195 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7196 (int)(((C)<<2)|(B)), \
7197 (__v16sf)_mm512_setzero_ps(), \
7198 (__mmask16)(U), \
7199 _MM_FROUND_CUR_DIRECTION))
7200
7201#define _mm512_getexp_round_pd(A, R) \
7202 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7203 (__v8df)_mm512_undefined_pd(), \
7204 (__mmask8)-1, (int)(R)))
7205
7206#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7207 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7208 (__v8df)(__m512d)(W), \
7209 (__mmask8)(U), (int)(R)))
7210
7211#define _mm512_maskz_getexp_round_pd(U, A, R) \
7212 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7213 (__v8df)_mm512_setzero_pd(), \
7214 (__mmask8)(U), (int)(R)))
7215
7216static __inline__ __m512d __DEFAULT_FN_ATTRS512
7217_mm512_getexp_pd (__m512d __A)
7218{
7219 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7220 (__v8df) _mm512_undefined_pd (),
7221 (__mmask8) -1,
7223}
7224
7225static __inline__ __m512d __DEFAULT_FN_ATTRS512
7226_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7227{
7228 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7229 (__v8df) __W,
7230 (__mmask8) __U,
7232}
7233
7234static __inline__ __m512d __DEFAULT_FN_ATTRS512
7236{
7237 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7238 (__v8df) _mm512_setzero_pd (),
7239 (__mmask8) __U,
7241}
7242
7243#define _mm512_getexp_round_ps(A, R) \
7244 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7245 (__v16sf)_mm512_undefined_ps(), \
7246 (__mmask16)-1, (int)(R)))
7247
7248#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7249 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7250 (__v16sf)(__m512)(W), \
7251 (__mmask16)(U), (int)(R)))
7252
7253#define _mm512_maskz_getexp_round_ps(U, A, R) \
7254 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7255 (__v16sf)_mm512_setzero_ps(), \
7256 (__mmask16)(U), (int)(R)))
7257
7258static __inline__ __m512 __DEFAULT_FN_ATTRS512
7260{
7261 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7262 (__v16sf) _mm512_undefined_ps (),
7263 (__mmask16) -1,
7265}
7266
7267static __inline__ __m512 __DEFAULT_FN_ATTRS512
7268_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7269{
7270 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7271 (__v16sf) __W,
7272 (__mmask16) __U,
7274}
7275
7276static __inline__ __m512 __DEFAULT_FN_ATTRS512
7278{
7279 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7280 (__v16sf) _mm512_setzero_ps (),
7281 (__mmask16) __U,
7283}
7284
7285#define _mm512_i64gather_ps(index, addr, scale) \
7286 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7287 (void const *)(addr), \
7288 (__v8di)(__m512i)(index), (__mmask8)-1, \
7289 (int)(scale)))
7290
7291#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7292 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7293 (void const *)(addr), \
7294 (__v8di)(__m512i)(index), \
7295 (__mmask8)(mask), (int)(scale)))
7296
7297#define _mm512_i64gather_epi32(index, addr, scale) \
7298 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7299 (void const *)(addr), \
7300 (__v8di)(__m512i)(index), \
7301 (__mmask8)-1, (int)(scale)))
7302
7303#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7304 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7305 (void const *)(addr), \
7306 (__v8di)(__m512i)(index), \
7307 (__mmask8)(mask), (int)(scale)))
7308
7309#define _mm512_i64gather_pd(index, addr, scale) \
7310 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7311 (void const *)(addr), \
7312 (__v8di)(__m512i)(index), (__mmask8)-1, \
7313 (int)(scale)))
7314
7315#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7316 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7317 (void const *)(addr), \
7318 (__v8di)(__m512i)(index), \
7319 (__mmask8)(mask), (int)(scale)))
7320
7321#define _mm512_i64gather_epi64(index, addr, scale) \
7322 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7323 (void const *)(addr), \
7324 (__v8di)(__m512i)(index), (__mmask8)-1, \
7325 (int)(scale)))
7326
7327#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7328 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7329 (void const *)(addr), \
7330 (__v8di)(__m512i)(index), \
7331 (__mmask8)(mask), (int)(scale)))
7332
7333#define _mm512_i32gather_ps(index, addr, scale) \
7334 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7335 (void const *)(addr), \
7336 (__v16si)(__m512)(index), \
7337 (__mmask16)-1, (int)(scale)))
7338
7339#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7340 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7341 (void const *)(addr), \
7342 (__v16si)(__m512)(index), \
7343 (__mmask16)(mask), (int)(scale)))
7344
7345#define _mm512_i32gather_epi32(index, addr, scale) \
7346 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7347 (void const *)(addr), \
7348 (__v16si)(__m512i)(index), \
7349 (__mmask16)-1, (int)(scale)))
7350
7351#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7352 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7353 (void const *)(addr), \
7354 (__v16si)(__m512i)(index), \
7355 (__mmask16)(mask), (int)(scale)))
7356
7357#define _mm512_i32gather_pd(index, addr, scale) \
7358 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7359 (void const *)(addr), \
7360 (__v8si)(__m256i)(index), (__mmask8)-1, \
7361 (int)(scale)))
7362
7363#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7364 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7365 (void const *)(addr), \
7366 (__v8si)(__m256i)(index), \
7367 (__mmask8)(mask), (int)(scale)))
7368
7369#define _mm512_i32gather_epi64(index, addr, scale) \
7370 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7371 (void const *)(addr), \
7372 (__v8si)(__m256i)(index), (__mmask8)-1, \
7373 (int)(scale)))
7374
7375#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7376 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7377 (void const *)(addr), \
7378 (__v8si)(__m256i)(index), \
7379 (__mmask8)(mask), (int)(scale)))
7380
7381#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7382 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7383 (__v8di)(__m512i)(index), \
7384 (__v8sf)(__m256)(v1), (int)(scale))
7385
7386#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7387 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7388 (__v8di)(__m512i)(index), \
7389 (__v8sf)(__m256)(v1), (int)(scale))
7390
7391#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7392 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7393 (__v8di)(__m512i)(index), \
7394 (__v8si)(__m256i)(v1), (int)(scale))
7395
7396#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7397 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7398 (__v8di)(__m512i)(index), \
7399 (__v8si)(__m256i)(v1), (int)(scale))
7400
7401#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7402 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7403 (__v8di)(__m512i)(index), \
7404 (__v8df)(__m512d)(v1), (int)(scale))
7405
7406#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7407 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7408 (__v8di)(__m512i)(index), \
7409 (__v8df)(__m512d)(v1), (int)(scale))
7410
7411#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7412 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7413 (__v8di)(__m512i)(index), \
7414 (__v8di)(__m512i)(v1), (int)(scale))
7415
7416#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7417 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7418 (__v8di)(__m512i)(index), \
7419 (__v8di)(__m512i)(v1), (int)(scale))
7420
7421#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7422 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7423 (__v16si)(__m512i)(index), \
7424 (__v16sf)(__m512)(v1), (int)(scale))
7425
7426#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7427 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7428 (__v16si)(__m512i)(index), \
7429 (__v16sf)(__m512)(v1), (int)(scale))
7430
7431#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7432 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7433 (__v16si)(__m512i)(index), \
7434 (__v16si)(__m512i)(v1), (int)(scale))
7435
7436#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7437 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7438 (__v16si)(__m512i)(index), \
7439 (__v16si)(__m512i)(v1), (int)(scale))
7440
7441#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7442 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7443 (__v8si)(__m256i)(index), \
7444 (__v8df)(__m512d)(v1), (int)(scale))
7445
7446#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7447 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7448 (__v8si)(__m256i)(index), \
7449 (__v8df)(__m512d)(v1), (int)(scale))
7450
7451#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7452 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7453 (__v8si)(__m256i)(index), \
7454 (__v8di)(__m512i)(v1), (int)(scale))
7455
7456#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7457 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7458 (__v8si)(__m256i)(index), \
7459 (__v8di)(__m512i)(v1), (int)(scale))
7460
7461static __inline__ __m128 __DEFAULT_FN_ATTRS128
7462_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7463{
7464 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7465 (__v4sf)__A,
7466 (__v4sf)__B,
7467 (__mmask8)__U,
7469}
7470
7471#define _mm_fmadd_round_ss(A, B, C, R) \
7472 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7473 (__v4sf)(__m128)(B), \
7474 (__v4sf)(__m128)(C), (__mmask8)-1, \
7475 (int)(R)))
7476
7477#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7478 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7479 (__v4sf)(__m128)(A), \
7480 (__v4sf)(__m128)(B), (__mmask8)(U), \
7481 (int)(R)))
7482
7483static __inline__ __m128 __DEFAULT_FN_ATTRS128
7484_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7485{
7486 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7487 (__v4sf)__B,
7488 (__v4sf)__C,
7489 (__mmask8)__U,
7491}
7492
7493#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7494 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7495 (__v4sf)(__m128)(B), \
7496 (__v4sf)(__m128)(C), (__mmask8)(U), \
7497 (int)(R)))
7498
7499static __inline__ __m128 __DEFAULT_FN_ATTRS128
7500_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7501{
7502 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7503 (__v4sf)__X,
7504 (__v4sf)__Y,
7505 (__mmask8)__U,
7507}
7508
7509#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7510 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7511 (__v4sf)(__m128)(X), \
7512 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7513 (int)(R)))
7514
7515static __inline__ __m128 __DEFAULT_FN_ATTRS128
7516_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7517{
7518 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7519 (__v4sf)__A,
7520 -(__v4sf)__B,
7521 (__mmask8)__U,
7523}
7524
7525#define _mm_fmsub_round_ss(A, B, C, R) \
7526 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7527 (__v4sf)(__m128)(B), \
7528 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7529 (int)(R)))
7530
7531#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7532 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7533 (__v4sf)(__m128)(A), \
7534 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7535 (int)(R)))
7536
7537static __inline__ __m128 __DEFAULT_FN_ATTRS128
7538_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7539{
7540 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7541 (__v4sf)__B,
7542 -(__v4sf)__C,
7543 (__mmask8)__U,
7545}
7546
7547#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7548 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7549 (__v4sf)(__m128)(B), \
7550 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7551 (int)(R)))
7552
7553static __inline__ __m128 __DEFAULT_FN_ATTRS128
7554_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7555{
7556 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7557 (__v4sf)__X,
7558 (__v4sf)__Y,
7559 (__mmask8)__U,
7561}
7562
7563#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7564 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7565 (__v4sf)(__m128)(X), \
7566 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7567 (int)(R)))
7568
7569static __inline__ __m128 __DEFAULT_FN_ATTRS128
7570_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7571{
7572 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7573 -(__v4sf)__A,
7574 (__v4sf)__B,
7575 (__mmask8)__U,
7577}
7578
7579#define _mm_fnmadd_round_ss(A, B, C, R) \
7580 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7581 -(__v4sf)(__m128)(B), \
7582 (__v4sf)(__m128)(C), (__mmask8)-1, \
7583 (int)(R)))
7584
7585#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7586 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7587 -(__v4sf)(__m128)(A), \
7588 (__v4sf)(__m128)(B), (__mmask8)(U), \
7589 (int)(R)))
7590
7591static __inline__ __m128 __DEFAULT_FN_ATTRS128
7592_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7593{
7594 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7595 -(__v4sf)__B,
7596 (__v4sf)__C,
7597 (__mmask8)__U,
7599}
7600
7601#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7602 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7603 -(__v4sf)(__m128)(B), \
7604 (__v4sf)(__m128)(C), (__mmask8)(U), \
7605 (int)(R)))
7606
7607static __inline__ __m128 __DEFAULT_FN_ATTRS128
7608_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7609{
7610 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7611 -(__v4sf)__X,
7612 (__v4sf)__Y,
7613 (__mmask8)__U,
7615}
7616
7617#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7618 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7619 -(__v4sf)(__m128)(X), \
7620 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7621 (int)(R)))
7622
7623static __inline__ __m128 __DEFAULT_FN_ATTRS128
7624_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7625{
7626 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7627 -(__v4sf)__A,
7628 -(__v4sf)__B,
7629 (__mmask8)__U,
7631}
7632
7633#define _mm_fnmsub_round_ss(A, B, C, R) \
7634 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7635 -(__v4sf)(__m128)(B), \
7636 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7637 (int)(R)))
7638
7639#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7640 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7641 -(__v4sf)(__m128)(A), \
7642 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7643 (int)(R)))
7644
7645static __inline__ __m128 __DEFAULT_FN_ATTRS128
7646_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7647{
7648 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7649 -(__v4sf)__B,
7650 -(__v4sf)__C,
7651 (__mmask8)__U,
7653}
7654
7655#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
7656 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7657 -(__v4sf)(__m128)(B), \
7658 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7659 (int)(R)))
7660
7661static __inline__ __m128 __DEFAULT_FN_ATTRS128
7662_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7663{
7664 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7665 -(__v4sf)__X,
7666 (__v4sf)__Y,
7667 (__mmask8)__U,
7669}
7670
7671#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
7672 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7673 -(__v4sf)(__m128)(X), \
7674 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7675 (int)(R)))
7676
7677static __inline__ __m128d __DEFAULT_FN_ATTRS128
7678_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7679{
7680 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7681 (__v2df)__A,
7682 (__v2df)__B,
7683 (__mmask8)__U,
7685}
7686
7687#define _mm_fmadd_round_sd(A, B, C, R) \
7688 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7689 (__v2df)(__m128d)(B), \
7690 (__v2df)(__m128d)(C), (__mmask8)-1, \
7691 (int)(R)))
7692
7693#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
7694 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7695 (__v2df)(__m128d)(A), \
7696 (__v2df)(__m128d)(B), (__mmask8)(U), \
7697 (int)(R)))
7698
7699static __inline__ __m128d __DEFAULT_FN_ATTRS128
7700_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7701{
7702 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7703 (__v2df)__B,
7704 (__v2df)__C,
7705 (__mmask8)__U,
7707}
7708
7709#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
7710 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7711 (__v2df)(__m128d)(B), \
7712 (__v2df)(__m128d)(C), (__mmask8)(U), \
7713 (int)(R)))
7714
7715static __inline__ __m128d __DEFAULT_FN_ATTRS128
7716_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7717{
7718 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7719 (__v2df)__X,
7720 (__v2df)__Y,
7721 (__mmask8)__U,
7723}
7724
7725#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
7726 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7727 (__v2df)(__m128d)(X), \
7728 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7729 (int)(R)))
7730
7731static __inline__ __m128d __DEFAULT_FN_ATTRS128
7732_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7733{
7734 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7735 (__v2df)__A,
7736 -(__v2df)__B,
7737 (__mmask8)__U,
7739}
7740
7741#define _mm_fmsub_round_sd(A, B, C, R) \
7742 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7743 (__v2df)(__m128d)(B), \
7744 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7745 (int)(R)))
7746
7747#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
7748 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7749 (__v2df)(__m128d)(A), \
7750 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7751 (int)(R)))
7752
7753static __inline__ __m128d __DEFAULT_FN_ATTRS128
7754_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7755{
7756 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7757 (__v2df)__B,
7758 -(__v2df)__C,
7759 (__mmask8)__U,
7761}
7762
7763#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
7764 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7765 (__v2df)(__m128d)(B), \
7766 -(__v2df)(__m128d)(C), \
7767 (__mmask8)(U), (int)(R)))
7768
7769static __inline__ __m128d __DEFAULT_FN_ATTRS128
7770_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7771{
7772 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7773 (__v2df)__X,
7774 (__v2df)__Y,
7775 (__mmask8)__U,
7777}
7778
7779#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
7780 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7781 (__v2df)(__m128d)(X), \
7782 (__v2df)(__m128d)(Y), \
7783 (__mmask8)(U), (int)(R)))
7784
7785static __inline__ __m128d __DEFAULT_FN_ATTRS128
7786_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7787{
7788 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7789 -(__v2df)__A,
7790 (__v2df)__B,
7791 (__mmask8)__U,
7793}
7794
7795#define _mm_fnmadd_round_sd(A, B, C, R) \
7796 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7797 -(__v2df)(__m128d)(B), \
7798 (__v2df)(__m128d)(C), (__mmask8)-1, \
7799 (int)(R)))
7800
7801#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
7802 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7803 -(__v2df)(__m128d)(A), \
7804 (__v2df)(__m128d)(B), (__mmask8)(U), \
7805 (int)(R)))
7806
7807static __inline__ __m128d __DEFAULT_FN_ATTRS128
7808_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7809{
7810 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7811 -(__v2df)__B,
7812 (__v2df)__C,
7813 (__mmask8)__U,
7815}
7816
7817#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
7818 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7819 -(__v2df)(__m128d)(B), \
7820 (__v2df)(__m128d)(C), (__mmask8)(U), \
7821 (int)(R)))
7822
7823static __inline__ __m128d __DEFAULT_FN_ATTRS128
7824_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7825{
7826 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7827 -(__v2df)__X,
7828 (__v2df)__Y,
7829 (__mmask8)__U,
7831}
7832
7833#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
7834 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7835 -(__v2df)(__m128d)(X), \
7836 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7837 (int)(R)))
7838
7839static __inline__ __m128d __DEFAULT_FN_ATTRS128
7840_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7841{
7842 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7843 -(__v2df)__A,
7844 -(__v2df)__B,
7845 (__mmask8)__U,
7847}
7848
7849#define _mm_fnmsub_round_sd(A, B, C, R) \
7850 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7851 -(__v2df)(__m128d)(B), \
7852 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7853 (int)(R)))
7854
7855#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
7856 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7857 -(__v2df)(__m128d)(A), \
7858 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7859 (int)(R)))
7860
7861static __inline__ __m128d __DEFAULT_FN_ATTRS128
7862_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7863{
7864 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7865 -(__v2df)__B,
7866 -(__v2df)__C,
7867 (__mmask8)__U,
7869}
7870
7871#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
7872 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7873 -(__v2df)(__m128d)(B), \
7874 -(__v2df)(__m128d)(C), \
7875 (__mmask8)(U), \
7876 (int)(R)))
7877
7878static __inline__ __m128d __DEFAULT_FN_ATTRS128
7879_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7880{
7881 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7882 -(__v2df)__X,
7883 (__v2df)__Y,
7884 (__mmask8)__U,
7886}
7887
7888#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
7889 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7890 -(__v2df)(__m128d)(X), \
7891 (__v2df)(__m128d)(Y), \
7892 (__mmask8)(U), (int)(R)))
7893
7894#define _mm512_permutex_pd(X, C) \
7895 ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
7896
7897#define _mm512_mask_permutex_pd(W, U, X, C) \
7898 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7899 (__v8df)_mm512_permutex_pd((X), (C)), \
7900 (__v8df)(__m512d)(W)))
7901
7902#define _mm512_maskz_permutex_pd(U, X, C) \
7903 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7904 (__v8df)_mm512_permutex_pd((X), (C)), \
7905 (__v8df)_mm512_setzero_pd()))
7906
7907#define _mm512_permutex_epi64(X, C) \
7908 ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
7909
7910#define _mm512_mask_permutex_epi64(W, U, X, C) \
7911 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7912 (__v8di)_mm512_permutex_epi64((X), (C)), \
7913 (__v8di)(__m512i)(W)))
7914
7915#define _mm512_maskz_permutex_epi64(U, X, C) \
7916 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7917 (__v8di)_mm512_permutex_epi64((X), (C)), \
7918 (__v8di)_mm512_setzero_si512()))
7919
7920static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
7921_mm512_permutexvar_pd(__m512i __X, __m512d __Y) {
7922 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
7923}
7924
7925static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
7926_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X,
7927 __m512d __Y) {
7928 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
7929 (__v8df)_mm512_permutexvar_pd(__X, __Y),
7930 (__v8df)__W);
7931}
7932
7933static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
7934_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) {
7935 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
7936 (__v8df)_mm512_permutexvar_pd(__X, __Y),
7937 (__v8df)_mm512_setzero_pd());
7938}
7939
7940static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7941_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) {
7942 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
7943}
7944
7945static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7946_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) {
7947 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7948 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
7949 (__v8di)_mm512_setzero_si512());
7950}
7951
7952static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7953_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X,
7954 __m512i __Y) {
7955 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7956 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
7957 (__v8di)__W);
7958}
7959
7960static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
7961_mm512_permutexvar_ps(__m512i __X, __m512 __Y) {
7962 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
7963}
7964
7965static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
7966_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) {
7967 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
7968 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
7969 (__v16sf)__W);
7970}
7971
7972static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
7973_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) {
7974 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
7975 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
7976 (__v16sf)_mm512_setzero_ps());
7977}
7978
7979static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7980_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) {
7981 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
7982}
7983
7984#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
7985
7986static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7988 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7989 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
7990 (__v16si)_mm512_setzero_si512());
7991}
7992
7993static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7994_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X,
7995 __m512i __Y) {
7996 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7997 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
7998 (__v16si)__W);
7999}
8000
8001#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8002
8003static __inline__ __mmask16
8005 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8006}
8007
8010 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8011}
8012
8015 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8016}
8017
8018static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
8020 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8021}
8022
8023static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
8025 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8026}
8027
8028static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
8030 return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8031}
8032
8033static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
8035 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8036}
8037
8038static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
8039_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8040 *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8041 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8042}
8043
8046 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8047}
8048
8051 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8052}
8053
8056 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8057}
8058
8059#define _kand_mask16 _mm512_kand
8060#define _kandn_mask16 _mm512_kandn
8061#define _knot_mask16 _mm512_knot
8062#define _kor_mask16 _mm512_kor
8063#define _kxnor_mask16 _mm512_kxnor
8064#define _kxor_mask16 _mm512_kxor
8065
8066#define _kshiftli_mask16(A, I) \
8067 ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
8068
8069#define _kshiftri_mask16(A, I) \
8070 ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
8071
8072static __inline__ unsigned int __DEFAULT_FN_ATTRS
8074 return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8075}
8076
8077static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8078_cvtu32_mask16(unsigned int __A) {
8079 return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8080}
8081
8082static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8084 return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8085}
8086
8087static __inline__ void __DEFAULT_FN_ATTRS
8089 *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8090}
8091
8092static __inline__ void __DEFAULT_FN_ATTRS512
8093_mm512_stream_si512 (void * __P, __m512i __A)
8094{
8095 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8096 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8097}
8098
8099static __inline__ __m512i __DEFAULT_FN_ATTRS512
8101{
8102 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8103 return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8104}
8105
8106static __inline__ void __DEFAULT_FN_ATTRS512
8107_mm512_stream_pd (void *__P, __m512d __A)
8108{
8109 typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8110 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8111}
8112
8113static __inline__ void __DEFAULT_FN_ATTRS512
8114_mm512_stream_ps (void *__P, __m512 __A)
8115{
8116 typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8117 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8118}
8119
8120static __inline__ __m512d __DEFAULT_FN_ATTRS512
8121_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8122{
8123 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8124 (__v8df) __W,
8125 (__mmask8) __U);
8126}
8127
8128static __inline__ __m512d __DEFAULT_FN_ATTRS512
8130{
8131 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8132 (__v8df)
8134 (__mmask8) __U);
8135}
8136
8137static __inline__ __m512i __DEFAULT_FN_ATTRS512
8138_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8139{
8140 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8141 (__v8di) __W,
8142 (__mmask8) __U);
8143}
8144
8145static __inline__ __m512i __DEFAULT_FN_ATTRS512
8147{
8148 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8149 (__v8di)
8151 (__mmask8) __U);
8152}
8153
8154static __inline__ __m512 __DEFAULT_FN_ATTRS512
8155_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8156{
8157 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8158 (__v16sf) __W,
8159 (__mmask16) __U);
8160}
8161
8162static __inline__ __m512 __DEFAULT_FN_ATTRS512
8164{
8165 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8166 (__v16sf)
8168 (__mmask16) __U);
8169}
8170
8171static __inline__ __m512i __DEFAULT_FN_ATTRS512
8172_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8173{
8174 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8175 (__v16si) __W,
8176 (__mmask16) __U);
8177}
8178
8179static __inline__ __m512i __DEFAULT_FN_ATTRS512
8181{
8182 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8183 (__v16si)
8185 (__mmask16) __U);
8186}
8187
8188#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8189 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8190 (__v4sf)(__m128)(Y), (int)(P), \
8191 (__mmask8)-1, (int)(R)))
8192
8193#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8194 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8195 (__v4sf)(__m128)(Y), (int)(P), \
8196 (__mmask8)(M), (int)(R)))
8197
8198#define _mm_cmp_ss_mask(X, Y, P) \
8199 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8200 (__v4sf)(__m128)(Y), (int)(P), \
8201 (__mmask8)-1, \
8202 _MM_FROUND_CUR_DIRECTION))
8203
8204#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8205 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8206 (__v4sf)(__m128)(Y), (int)(P), \
8207 (__mmask8)(M), \
8208 _MM_FROUND_CUR_DIRECTION))
8209
8210#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8211 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8212 (__v2df)(__m128d)(Y), (int)(P), \
8213 (__mmask8)-1, (int)(R)))
8214
8215#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8216 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8217 (__v2df)(__m128d)(Y), (int)(P), \
8218 (__mmask8)(M), (int)(R)))
8219
8220#define _mm_cmp_sd_mask(X, Y, P) \
8221 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8222 (__v2df)(__m128d)(Y), (int)(P), \
8223 (__mmask8)-1, \
8224 _MM_FROUND_CUR_DIRECTION))
8225
8226#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8227 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8228 (__v2df)(__m128d)(Y), (int)(P), \
8229 (__mmask8)(M), \
8230 _MM_FROUND_CUR_DIRECTION))
8231
8232/* Bit Test */
8233
8234static __inline __mmask16 __DEFAULT_FN_ATTRS512
8235_mm512_test_epi32_mask (__m512i __A, __m512i __B)
8236{
8239}
8240
8241static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8242_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8243{
8244 return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8246}
8247
8248static __inline __mmask8 __DEFAULT_FN_ATTRS512
8249_mm512_test_epi64_mask (__m512i __A, __m512i __B)
8250{
8251 return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8253}
8254
8255static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8256_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8257{
8258 return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8260}
8261
8262static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8263_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8264{
8265 return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8267}
8268
8269static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8270_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8271{
8272 return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8274}
8275
8276static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8277_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8278{
8279 return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8281}
8282
8283static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8284_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8285{
8286 return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8288}
8289
8290static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8292{
8293 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8294 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8295}
8296
8297static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8298_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8299 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8300 (__v16sf)_mm512_movehdup_ps(__A),
8301 (__v16sf)__W);
8302}
8303
8304static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8306 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8307 (__v16sf)_mm512_movehdup_ps(__A),
8308 (__v16sf)_mm512_setzero_ps());
8309}
8310
8311static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8313{
8314 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8315 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8316}
8317
8318static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8319_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8320 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8321 (__v16sf)_mm512_moveldup_ps(__A),
8322 (__v16sf)__W);
8323}
8324
8325static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8327 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8328 (__v16sf)_mm512_moveldup_ps(__A),
8329 (__v16sf)_mm512_setzero_ps());
8330}
8331
8332static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8333_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
8334 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8335}
8336
8337static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8338_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
8339 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8340 _mm_setzero_ps());
8341}
8342
8343static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8344_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
8345 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8346}
8347
8348static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8349_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
8350 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8351 _mm_setzero_pd());
8352}
8353
8354static __inline__ void __DEFAULT_FN_ATTRS128
8355_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8356{
8357 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8358}
8359
8360static __inline__ void __DEFAULT_FN_ATTRS128
8361_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8362{
8363 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8364}
8365
8366static __inline__ __m128 __DEFAULT_FN_ATTRS128
8367_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8368{
8369 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8370 (__v4sf)_mm_setzero_ps(),
8371 0, 4, 4, 4);
8372
8373 return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
8374}
8375
8376static __inline__ __m128 __DEFAULT_FN_ATTRS128
8377_mm_maskz_load_ss (__mmask8 __U, const float* __A)
8378{
8379 return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
8380 (__v4sf) _mm_setzero_ps(),
8381 __U & 1);
8382}
8383
8384static __inline__ __m128d __DEFAULT_FN_ATTRS128
8385_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8386{
8387 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8388 (__v2df)_mm_setzero_pd(),
8389 0, 2);
8390
8391 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
8392}
8393
8394static __inline__ __m128d __DEFAULT_FN_ATTRS128
8395_mm_maskz_load_sd (__mmask8 __U, const double* __A)
8396{
8397 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
8398 (__v2df) _mm_setzero_pd(),
8399 __U & 1);
8400}
8401
8402#define _mm512_shuffle_epi32(A, I) \
8403 ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
8404
8405#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8406 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8407 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8408 (__v16si)(__m512i)(W)))
8409
8410#define _mm512_maskz_shuffle_epi32(U, A, I) \
8411 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8412 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8413 (__v16si)_mm512_setzero_si512()))
8414
8415static __inline__ __m512d __DEFAULT_FN_ATTRS512
8416_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8417{
8418 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8419 (__v8df) __W,
8420 (__mmask8) __U);
8421}
8422
8423static __inline__ __m512d __DEFAULT_FN_ATTRS512
8425{
8426 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8427 (__v8df) _mm512_setzero_pd (),
8428 (__mmask8) __U);
8429}
8430
8431static __inline__ __m512i __DEFAULT_FN_ATTRS512
8432_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8433{
8434 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8435 (__v8di) __W,
8436 (__mmask8) __U);
8437}
8438
8439static __inline__ __m512i __DEFAULT_FN_ATTRS512
8441{
8442 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8443 (__v8di) _mm512_setzero_si512 (),
8444 (__mmask8) __U);
8445}
8446
8447static __inline__ __m512d __DEFAULT_FN_ATTRS512
8448_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8449{
8450 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8451 (__v8df) __W,
8452 (__mmask8) __U);
8453}
8454
8455static __inline__ __m512d __DEFAULT_FN_ATTRS512
8457{
8458 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8459 (__v8df) _mm512_setzero_pd(),
8460 (__mmask8) __U);
8461}
8462
8463static __inline__ __m512i __DEFAULT_FN_ATTRS512
8464_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8465{
8466 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8467 (__v8di) __W,
8468 (__mmask8) __U);
8469}
8470
8471static __inline__ __m512i __DEFAULT_FN_ATTRS512
8473{
8474 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8475 (__v8di) _mm512_setzero_si512(),
8476 (__mmask8) __U);
8477}
8478
8479static __inline__ __m512 __DEFAULT_FN_ATTRS512
8480_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8481{
8482 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8483 (__v16sf) __W,
8484 (__mmask16) __U);
8485}
8486
8487static __inline__ __m512 __DEFAULT_FN_ATTRS512
8489{
8490 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8491 (__v16sf) _mm512_setzero_ps(),
8492 (__mmask16) __U);
8493}
8494
8495static __inline__ __m512i __DEFAULT_FN_ATTRS512
8496_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8497{
8498 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8499 (__v16si) __W,
8500 (__mmask16) __U);
8501}
8502
8503static __inline__ __m512i __DEFAULT_FN_ATTRS512
8505{
8506 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8507 (__v16si) _mm512_setzero_si512(),
8508 (__mmask16) __U);
8509}
8510
8511static __inline__ __m512 __DEFAULT_FN_ATTRS512
8512_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8513{
8514 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8515 (__v16sf) __W,
8516 (__mmask16) __U);
8517}
8518
8519static __inline__ __m512 __DEFAULT_FN_ATTRS512
8521{
8522 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8523 (__v16sf) _mm512_setzero_ps(),
8524 (__mmask16) __U);
8525}
8526
8527static __inline__ __m512i __DEFAULT_FN_ATTRS512
8528_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8529{
8530 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8531 (__v16si) __W,
8532 (__mmask16) __U);
8533}
8534
8535static __inline__ __m512i __DEFAULT_FN_ATTRS512
8537{
8538 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8539 (__v16si) _mm512_setzero_si512(),
8540 (__mmask16) __U);
8541}
8542
8543#define _mm512_cvt_roundps_pd(A, R) \
8544 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8545 (__v8df)_mm512_undefined_pd(), \
8546 (__mmask8)-1, (int)(R)))
8547
8548#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8549 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8550 (__v8df)(__m512d)(W), \
8551 (__mmask8)(U), (int)(R)))
8552
8553#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8554 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8555 (__v8df)_mm512_setzero_pd(), \
8556 (__mmask8)(U), (int)(R)))
8557
8558static __inline__ __m512d
8560 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8561}
8562
8563static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8564_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) {
8565 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8566 (__v8df)_mm512_cvtps_pd(__A),
8567 (__v8df)__W);
8568}
8569
8570static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8572 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8573 (__v8df)_mm512_cvtps_pd(__A),
8574 (__v8df)_mm512_setzero_pd());
8575}
8576
8577static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8579 return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8580}
8581
8582static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8583_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) {
8584 return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8585}
8586
8587static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8588_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) {
8589 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8590 (__v8df)__W);
8591}
8592
8593static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8595 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8596 (__v8df)_mm512_setzero_pd());
8597}
8598
8599static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8600_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8601 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8602 (__v16sf)__W);
8603}
8604
8605static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8607 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8608 (__v16sf)_mm512_setzero_ps());
8609}
8610
8611static __inline__ void __DEFAULT_FN_ATTRS512
8613{
8614 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8615 (__mmask8) __U);
8616}
8617
8618static __inline__ void __DEFAULT_FN_ATTRS512
8620{
8621 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8622 (__mmask8) __U);
8623}
8624
8625static __inline__ void __DEFAULT_FN_ATTRS512
8627{
8628 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
8629 (__mmask16) __U);
8630}
8631
8632static __inline__ void __DEFAULT_FN_ATTRS512
8634{
8635 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
8636 (__mmask16) __U);
8637}
8638
8639#define _mm_cvt_roundsd_ss(A, B, R) \
8640 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8641 (__v2df)(__m128d)(B), \
8642 (__v4sf)_mm_undefined_ps(), \
8643 (__mmask8)-1, (int)(R)))
8644
8645#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
8646 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8647 (__v2df)(__m128d)(B), \
8648 (__v4sf)(__m128)(W), \
8649 (__mmask8)(U), (int)(R)))
8650
8651#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
8652 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8653 (__v2df)(__m128d)(B), \
8654 (__v4sf)_mm_setzero_ps(), \
8655 (__mmask8)(U), (int)(R)))
8656
8657static __inline__ __m128 __DEFAULT_FN_ATTRS128
8658_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
8659{
8660 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8661 (__v2df)__B,
8662 (__v4sf)__W,
8664}
8665
8666static __inline__ __m128 __DEFAULT_FN_ATTRS128
8667_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
8668{
8669 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8670 (__v2df)__B,
8671 (__v4sf)_mm_setzero_ps(),
8673}
8674
8675#define _mm_cvtss_i32 _mm_cvtss_si32
8676#define _mm_cvtsd_i32 _mm_cvtsd_si32
8677#define _mm_cvti32_sd _mm_cvtsi32_sd
8678#define _mm_cvti32_ss _mm_cvtsi32_ss
8679#ifdef __x86_64__
8680#define _mm_cvtss_i64 _mm_cvtss_si64
8681#define _mm_cvtsd_i64 _mm_cvtsd_si64
8682#define _mm_cvti64_sd _mm_cvtsi64_sd
8683#define _mm_cvti64_ss _mm_cvtsi64_ss
8684#endif
8685
8686#ifdef __x86_64__
8687#define _mm_cvt_roundi64_sd(A, B, R) \
8688 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8689 (int)(R)))
8690
8691#define _mm_cvt_roundsi64_sd(A, B, R) \
8692 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8693 (int)(R)))
8694#endif
8695
8696#define _mm_cvt_roundsi32_ss(A, B, R) \
8697 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8698
8699#define _mm_cvt_roundi32_ss(A, B, R) \
8700 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8701
8702#ifdef __x86_64__
8703#define _mm_cvt_roundsi64_ss(A, B, R) \
8704 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8705 (int)(R)))
8706
8707#define _mm_cvt_roundi64_ss(A, B, R) \
8708 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8709 (int)(R)))
8710#endif
8711
8712#define _mm_cvt_roundss_sd(A, B, R) \
8713 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8714 (__v4sf)(__m128)(B), \
8715 (__v2df)_mm_undefined_pd(), \
8716 (__mmask8)-1, (int)(R)))
8717
8718#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
8719 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8720 (__v4sf)(__m128)(B), \
8721 (__v2df)(__m128d)(W), \
8722 (__mmask8)(U), (int)(R)))
8723
8724#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
8725 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8726 (__v4sf)(__m128)(B), \
8727 (__v2df)_mm_setzero_pd(), \
8728 (__mmask8)(U), (int)(R)))
8729
8730static __inline__ __m128d __DEFAULT_FN_ATTRS128
8731_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
8732{
8733 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8734 (__v4sf)__B,
8735 (__v2df)__W,
8737}
8738
8739static __inline__ __m128d __DEFAULT_FN_ATTRS128
8740_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
8741{
8742 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8743 (__v4sf)__B,
8744 (__v2df)_mm_setzero_pd(),
8746}
8747
8748static __inline__ __m128d __DEFAULT_FN_ATTRS128
8749_mm_cvtu32_sd (__m128d __A, unsigned __B)
8750{
8751 __A[0] = __B;
8752 return __A;
8753}
8754
8755#ifdef __x86_64__
8756#define _mm_cvt_roundu64_sd(A, B, R) \
8757 ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
8758 (unsigned long long)(B), (int)(R)))
8759
8760static __inline__ __m128d __DEFAULT_FN_ATTRS128
8761_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
8762{
8763 __A[0] = __B;
8764 return __A;
8765}
8766#endif
8767
8768#define _mm_cvt_roundu32_ss(A, B, R) \
8769 ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
8770 (int)(R)))
8771
8772static __inline__ __m128 __DEFAULT_FN_ATTRS128
8773_mm_cvtu32_ss (__m128 __A, unsigned __B)
8774{
8775 __A[0] = __B;
8776 return __A;
8777}
8778
8779#ifdef __x86_64__
8780#define _mm_cvt_roundu64_ss(A, B, R) \
8781 ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
8782 (unsigned long long)(B), (int)(R)))
8783
8784static __inline__ __m128 __DEFAULT_FN_ATTRS128
8785_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
8786{
8787 __A[0] = __B;
8788 return __A;
8789}
8790#endif
8791
8792static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8793_mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A) {
8794 return (__m512i) __builtin_ia32_selectd_512(__M,
8795 (__v16si) _mm512_set1_epi32(__A),
8796 (__v16si) __O);
8797}
8798
8799static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8800_mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A) {
8801 return (__m512i) __builtin_ia32_selectq_512(__M,
8802 (__v8di) _mm512_set1_epi64(__A),
8803 (__v8di) __O);
8804}
8805
8807 char __e63, char __e62, char __e61, char __e60, char __e59, char __e58,
8808 char __e57, char __e56, char __e55, char __e54, char __e53, char __e52,
8809 char __e51, char __e50, char __e49, char __e48, char __e47, char __e46,
8810 char __e45, char __e44, char __e43, char __e42, char __e41, char __e40,
8811 char __e39, char __e38, char __e37, char __e36, char __e35, char __e34,
8812 char __e33, char __e32, char __e31, char __e30, char __e29, char __e28,
8813 char __e27, char __e26, char __e25, char __e24, char __e23, char __e22,
8814 char __e21, char __e20, char __e19, char __e18, char __e17, char __e16,
8815 char __e15, char __e14, char __e13, char __e12, char __e11, char __e10,
8816 char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3,
8817 char __e2, char __e1, char __e0) {
8818
8819 return __extension__ (__m512i)(__v64qi)
8820 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8821 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8822 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8823 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
8824 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
8825 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
8826 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
8827 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
8828}
8829
8831 short __e31, short __e30, short __e29, short __e28, short __e27,
8832 short __e26, short __e25, short __e24, short __e23, short __e22,
8833 short __e21, short __e20, short __e19, short __e18, short __e17,
8834 short __e16, short __e15, short __e14, short __e13, short __e12,
8835 short __e11, short __e10, short __e9, short __e8, short __e7, short __e6,
8836 short __e5, short __e4, short __e3, short __e2, short __e1, short __e0) {
8837 return __extension__ (__m512i)(__v32hi)
8838 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8839 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8840 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8841 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
8842}
8843
8845 int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H,
8846 int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P) {
8847 return __extension__ (__m512i)(__v16si)
8848 { __P, __O, __N, __M, __L, __K, __J, __I,
8849 __H, __G, __F, __E, __D, __C, __B, __A };
8850}
8851
8853 int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8,
8854 int e9, int e10, int e11, int e12, int e13, int e14, int e15) {
8855 return _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4,
8856 e3, e2, e1, e0);
8857}
8858
8859static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8860_mm512_set_epi64(long long __A, long long __B, long long __C, long long __D,
8861 long long __E, long long __F, long long __G, long long __H) {
8862 return __extension__ (__m512i) (__v8di)
8863 { __H, __G, __F, __E, __D, __C, __B, __A };
8864}
8865
8866static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8867_mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3,
8868 long long e4, long long e5, long long e6, long long e7) {
8869 return _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0);
8870}
8871
8872static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8873_mm512_set_pd(double __A, double __B, double __C, double __D, double __E,
8874 double __F, double __G, double __H) {
8875 return __extension__ (__m512d)
8876 { __H, __G, __F, __E, __D, __C, __B, __A };
8877}
8878
8879static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8880_mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5,
8881 double e6, double e7) {
8882 return _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0);
8883}
8884
8885static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8886_mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F,
8887 float __G, float __H, float __I, float __J, float __K, float __L,
8888 float __M, float __N, float __O, float __P) {
8889 return __extension__ (__m512)
8890 { __P, __O, __N, __M, __L, __K, __J, __I,
8891 __H, __G, __F, __E, __D, __C, __B, __A };
8892}
8893
8894static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8895_mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5,
8896 float e6, float e7, float e8, float e9, float e10, float e11,
8897 float e12, float e13, float e14, float e15) {
8898 return _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
8899 e2, e1, e0);
8900}
8901
8902static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8903_mm512_abs_ps(__m512 __A) {
8904 return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
8905}
8906
8907static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8908_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) {
8909 return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
8910}
8911
8912static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8913_mm512_abs_pd(__m512d __A) {
8914 return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
8915}
8916
8917static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8918_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) {
8919 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
8920}
8921
8922/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
8923 * outputs. This class of vector operation forms the basis of many scientific
8924 * computations. In vector-reduction arithmetic, the evaluation order is
8925 * independent of the order of the input elements of V.
8926
8927 * For floating-point intrinsics:
8928 * 1. When using fadd/fmul intrinsics, the order of operations within the
8929 * vector is unspecified (associative math).
8930 * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector
8931 * produce unspecified results.
8932
8933 * Used bisection method. At each step, we partition the vector with previous
8934 * step in half, and the operation is performed on its two halves.
8935 * This takes log2(n) steps where n is the number of elements in the vector.
8936 */
8937
8938static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8940 return __builtin_reduce_add((__v8di)__W);
8941}
8942
8943static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8945 return __builtin_reduce_mul((__v8di)__W);
8946}
8947
8948static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8950 return __builtin_reduce_and((__v8di)__W);
8951}
8952
8953static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8955 return __builtin_reduce_or((__v8di)__W);
8956}
8957
8958static __inline__ long long __DEFAULT_FN_ATTRS512
8960 __W = _mm512_maskz_mov_epi64(__M, __W);
8961 return __builtin_reduce_add((__v8di)__W);
8962}
8963
8964static __inline__ long long __DEFAULT_FN_ATTRS512
8966 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
8967 return __builtin_reduce_mul((__v8di)__W);
8968}
8969
8970static __inline__ long long __DEFAULT_FN_ATTRS512
8972 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __W);
8973 return __builtin_reduce_and((__v8di)__W);
8974}
8975
8976static __inline__ long long __DEFAULT_FN_ATTRS512
8978 __W = _mm512_maskz_mov_epi64(__M, __W);
8979 return __builtin_reduce_or((__v8di)__W);
8980}
8981
8982// -0.0 is used to ignore the start value since it is the neutral value of
8983// floating point addition. For more information, please refer to
8984// https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic
8985static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
8986 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
8987}
8988
8989static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
8990 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
8991}
8992
8993static __inline__ double __DEFAULT_FN_ATTRS512
8995 __W = _mm512_maskz_mov_pd(__M, __W);
8996 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
8997}
8998
8999static __inline__ double __DEFAULT_FN_ATTRS512
9001 __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
9002 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9003}
9004
9005static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9007 return __builtin_reduce_add((__v16si)__W);
9008}
9009
9010static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9012 return __builtin_reduce_mul((__v16si)__W);
9013}
9014
9015static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9017 return __builtin_reduce_and((__v16si)__W);
9018}
9019
9020static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9022 return __builtin_reduce_or((__v16si)__W);
9023}
9024
9025static __inline__ int __DEFAULT_FN_ATTRS512
9027 __W = _mm512_maskz_mov_epi32(__M, __W);
9028 return __builtin_reduce_add((__v16si)__W);
9029}
9030
9031static __inline__ int __DEFAULT_FN_ATTRS512
9033 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
9034 return __builtin_reduce_mul((__v16si)__W);
9035}
9036
9037static __inline__ int __DEFAULT_FN_ATTRS512
9039 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __W);
9040 return __builtin_reduce_and((__v16si)__W);
9041}
9042
9043static __inline__ int __DEFAULT_FN_ATTRS512
9045 __W = _mm512_maskz_mov_epi32(__M, __W);
9046 return __builtin_reduce_or((__v16si)__W);
9047}
9048
9049static __inline__ float __DEFAULT_FN_ATTRS512
9051 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9052}
9053
9054static __inline__ float __DEFAULT_FN_ATTRS512
9056 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9057}
9058
9059static __inline__ float __DEFAULT_FN_ATTRS512
9061 __W = _mm512_maskz_mov_ps(__M, __W);
9062 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9063}
9064
9065static __inline__ float __DEFAULT_FN_ATTRS512
9067 __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9068 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9069}
9070
9071static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9073 return __builtin_reduce_max((__v8di)__V);
9074}
9075
9076static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9078 return __builtin_reduce_max((__v8du)__V);
9079}
9080
9081static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9083 return __builtin_reduce_min((__v8di)__V);
9084}
9085
9086static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9088 return __builtin_reduce_min((__v8du)__V);
9089}
9090
9091static __inline__ long long __DEFAULT_FN_ATTRS512
9093 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9094 return __builtin_reduce_max((__v8di)__V);
9095}
9096
9097static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9099 __V = _mm512_maskz_mov_epi64(__M, __V);
9100 return __builtin_reduce_max((__v8du)__V);
9101}
9102
9103static __inline__ long long __DEFAULT_FN_ATTRS512
9105 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9106 return __builtin_reduce_min((__v8di)__V);
9107}
9108
9109static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9111 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V);
9112 return __builtin_reduce_min((__v8du)__V);
9113}
9114static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9116 return __builtin_reduce_max((__v16si)__V);
9117}
9118
9119static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9121 return __builtin_reduce_max((__v16su)__V);
9122}
9123
9124static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9126 return __builtin_reduce_min((__v16si)__V);
9127}
9128
9129static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9131 return __builtin_reduce_min((__v16su)__V);
9132}
9133
9134static __inline__ int __DEFAULT_FN_ATTRS512
9136 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9137 return __builtin_reduce_max((__v16si)__V);
9138}
9139
9140static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9142 __V = _mm512_maskz_mov_epi32(__M, __V);
9143 return __builtin_reduce_max((__v16su)__V);
9144}
9145
9146static __inline__ int __DEFAULT_FN_ATTRS512
9148 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9149 return __builtin_reduce_min((__v16si)__V);
9150}
9151
9152static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9154 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __V);
9155 return __builtin_reduce_min((__v16su)__V);
9156}
9157
9158static __inline__ double __DEFAULT_FN_ATTRS512
9160 return __builtin_ia32_reduce_fmax_pd512(__V);
9161}
9162
9163static __inline__ double __DEFAULT_FN_ATTRS512
9165 return __builtin_ia32_reduce_fmin_pd512(__V);
9166}
9167
9168static __inline__ double __DEFAULT_FN_ATTRS512
9170 __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9171 return __builtin_ia32_reduce_fmax_pd512(__V);
9172}
9173
9174static __inline__ double __DEFAULT_FN_ATTRS512
9176 __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9177 return __builtin_ia32_reduce_fmin_pd512(__V);
9178}
9179
9180static __inline__ float __DEFAULT_FN_ATTRS512
9182 return __builtin_ia32_reduce_fmax_ps512(__V);
9183}
9184
9185static __inline__ float __DEFAULT_FN_ATTRS512
9187 return __builtin_ia32_reduce_fmin_ps512(__V);
9188}
9189
9190static __inline__ float __DEFAULT_FN_ATTRS512
9192 __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9193 return __builtin_ia32_reduce_fmax_ps512(__V);
9194}
9195
9196static __inline__ float __DEFAULT_FN_ATTRS512
9198 __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9199 return __builtin_ia32_reduce_fmin_ps512(__V);
9200}
9201
9202/// Moves the least significant 32 bits of a vector of [16 x i32] to a
9203/// 32-bit signed integer value.
9204///
9205/// \headerfile <x86intrin.h>
9206///
9207/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
9208///
9209/// \param __A
9210/// A vector of [16 x i32]. The least significant 32 bits are moved to the
9211/// destination.
9212/// \returns A 32-bit signed integer containing the moved value.
9213static __inline__ int __DEFAULT_FN_ATTRS512
9215 __v16si __b = (__v16si)__A;
9216 return __b[0];
9217}
9218
9219/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
9220/// locations starting at location \a base_addr at packed 32-bit integer indices
9221/// stored in the lower half of \a vindex scaled by \a scale them in dst.
9222///
9223/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9224///
9225/// \code{.operation}
9226/// FOR j := 0 to 7
9227/// i := j*64
9228/// m := j*32
9229/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9230/// dst[i+63:i] := MEM[addr+63:addr]
9231/// ENDFOR
9232/// dst[MAX:512] := 0
9233/// \endcode
9234#define _mm512_i32logather_pd(vindex, base_addr, scale) \
9235 _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9236
9237/// Loads 8 double-precision (64-bit) floating-point elements from memory
9238/// starting at location \a base_addr at packed 32-bit integer indices stored in
9239/// the lower half of \a vindex scaled by \a scale into dst using writemask
9240/// \a mask (elements are copied from \a src when the corresponding mask bit is
9241/// not set).
9242///
9243/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9244///
9245/// \code{.operation}
9246/// FOR j := 0 to 7
9247/// i := j*64
9248/// m := j*32
9249/// IF mask[j]
9250/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9251/// dst[i+63:i] := MEM[addr+63:addr]
9252/// ELSE
9253/// dst[i+63:i] := src[i+63:i]
9254/// FI
9255/// ENDFOR
9256/// dst[MAX:512] := 0
9257/// \endcode
9258#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \
9259 _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \
9260 (base_addr), (scale))
9261
9262/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9263/// at packed 32-bit integer indices stored in the lower half of \a vindex
9264/// scaled by \a scale and stores them in dst.
9265///
9266/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9267///
9268/// \code{.operation}
9269/// FOR j := 0 to 7
9270/// i := j*64
9271/// m := j*32
9272/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9273/// dst[i+63:i] := MEM[addr+63:addr]
9274/// ENDFOR
9275/// dst[MAX:512] := 0
9276/// \endcode
9277#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
9278 _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9279
9280/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9281/// at packed 32-bit integer indices stored in the lower half of \a vindex
9282/// scaled by \a scale and stores them in dst using writemask \a mask (elements
9283/// are copied from \a src when the corresponding mask bit is not set).
9284///
9285/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9286///
9287/// \code{.operation}
9288/// FOR j := 0 to 7
9289/// i := j*64
9290/// m := j*32
9291/// IF mask[j]
9292/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9293/// dst[i+63:i] := MEM[addr+63:addr]
9294/// ELSE
9295/// dst[i+63:i] := src[i+63:i]
9296/// FI
9297/// ENDFOR
9298/// dst[MAX:512] := 0
9299/// \endcode
9300#define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \
9301 _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \
9302 (base_addr), (scale))
9303
9304/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9305/// and to memory locations starting at location \a base_addr at packed 32-bit
9306/// integer indices stored in \a vindex scaled by \a scale.
9307///
9308/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9309///
9310/// \code{.operation}
9311/// FOR j := 0 to 7
9312/// i := j*64
9313/// m := j*32
9314/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9315/// MEM[addr+63:addr] := v1[i+63:i]
9316/// ENDFOR
9317/// \endcode
9318#define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \
9319 _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9320
9321/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9322/// to memory locations starting at location \a base_addr at packed 32-bit
9323/// integer indices stored in \a vindex scaled by \a scale. Only those elements
9324/// whose corresponding mask bit is set in writemask \a mask are written to
9325/// memory.
9326///
9327/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9328///
9329/// \code{.operation}
9330/// FOR j := 0 to 7
9331/// i := j*64
9332/// m := j*32
9333/// IF mask[j]
9334/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9335/// MEM[addr+63:addr] := a[i+63:i]
9336/// FI
9337/// ENDFOR
9338/// \endcode
9339#define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \
9340 _mm512_mask_i32scatter_pd((base_addr), (mask), \
9341 _mm512_castsi512_si256(vindex), (v1), (scale))
9342
9343/// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in
9344/// memory locations starting at location \a base_addr at packed 32-bit integer
9345/// indices stored in \a vindex scaled by \a scale.
9346///
9347/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9348///
9349/// \code{.operation}
9350/// FOR j := 0 to 7
9351/// i := j*64
9352/// m := j*32
9353/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9354/// MEM[addr+63:addr] := a[i+63:i]
9355/// ENDFOR
9356/// \endcode
9357#define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \
9358 _mm512_i32scatter_epi64((base_addr), \
9359 _mm512_castsi512_si256(vindex), (v1), (scale))
9360
9361/// Stores 8 packed 64-bit integer elements located in a and stores them in
9362/// memory locations starting at location \a base_addr at packed 32-bit integer
9363/// indices stored in \a vindex scaled by scale using writemask \a mask (elements
9364/// whose corresponding mask bit is not set are not written to memory).
9365///
9366/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9367///
9368/// \code{.operation}
9369/// FOR j := 0 to 7
9370/// i := j*64
9371/// m := j*32
9372/// IF mask[j]
9373/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9374/// MEM[addr+63:addr] := a[i+63:i]
9375/// FI
9376/// ENDFOR
9377/// \endcode
9378#define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \
9379 _mm512_mask_i32scatter_epi64((base_addr), (mask), \
9380 _mm512_castsi512_si256(vindex), (v1), (scale))
9381
9382#undef __DEFAULT_FN_ATTRS512
9383#undef __DEFAULT_FN_ATTRS128
9384#undef __DEFAULT_FN_ATTRS
9385#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
9386#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
9387#undef __DEFAULT_FN_ATTRS_CONSTEXPR
9388
9389#endif /* __AVX512FINTRIN_H */
#define __L(__X)
#define __DEFAULT_FN_ATTRS
static __inline__ vector float vector float __b
Definition altivec.h:578
static __inline__ uint32_t volatile uint32_t * __p
Definition arm_acle.h:57
return __v
Definition arm_acle.h:88
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
Definition avx2intrin.h:30
#define __DEFAULT_FN_ATTRS512_CONSTEXPR
#define __DEFAULT_FN_ATTRS512
#define __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi64(long long __d)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi64(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f32x4(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi64(__m512i __A, unsigned int __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov(__mmask16 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A)
Moves the least significant 32 bits of a vector of [16 x i32] to a 32-bit signed integer value.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_ps(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi64(__m512i __W)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(void *__P, __m512d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi32(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi32(int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8, int e9, int e10, int e11, int e12, int e13, int e14, int e15)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi64(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_si256(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movehdup_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5, float e6, float e7, float e8, float e9, float e10, float e11, float e12, float e13, float e14, float e15)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_pd(double __w)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
_MM_MANTISSA_NORM_ENUM
@ _MM_MANT_NORM_p5_1
@ _MM_MANT_NORM_p5_2
@ _MM_MANT_NORM_1_2
@ _MM_MANT_NORM_p75_1p5
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i64x4(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5, double e6, double e7)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps512_ps256(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi8(char __w)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3, long long e4, long long e5, long long e6, long long e7)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_int2mask(int __a)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask2int(__mmask16 __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_pd(__m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi64(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movedup_pd(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_pd(__m512d __a, __m512d __b)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtps_pd(__m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
_MM_CMPINT_ENUM
@ _MM_CMPINT_NE
@ _MM_CMPINT_NLT
@ _MM_CMPINT_LE
@ _MM_CMPINT_EQ
@ _MM_CMPINT_LT
@ _MM_CMPINT_UNUSED
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_knot(__mmask16 __M)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastq_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_pd(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
unsigned char __mmask8
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
_MM_TERNLOG_ENUM
A helper to represent the ternary logic operations among vector A, B and C.
@ _MM_TERNLOG_A
@ _MM_TERNLOG_B
@ _MM_TERNLOG_C
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi32(__m512i __W)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(void *__P, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
_MM_MANTISSA_SIGN_ENUM
@ _MM_MANT_SIGN_zero
@ _MM_MANT_SIGN_src
@ _MM_MANT_SIGN_nan
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_ps(__m512 __a, __m512 __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi32(int e0, int e1, int e2, int e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_ps(float __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi32(__m512i __A, __m512i __B)
_MM_PERM_ENUM
@ _MM_PERM_BBCA
@ _MM_PERM_BCBD
@ _MM_PERM_DAAB
@ _MM_PERM_CBBD
@ _MM_PERM_DCCC
@ _MM_PERM_CDBB
@ _MM_PERM_DDDD
@ _MM_PERM_CCCC
@ _MM_PERM_CADA
@ _MM_PERM_BACD
@ _MM_PERM_CCAD
@ _MM_PERM_ABDB
@ _MM_PERM_BBBD
@ _MM_PERM_DCAB
@ _MM_PERM_BABC
@ _MM_PERM_AACD
@ _MM_PERM_BBAB
@ _MM_PERM_DCDB
@ _MM_PERM_BACC
@ _MM_PERM_ABDA
@ _MM_PERM_ACCC
@ _MM_PERM_ADAC
@ _MM_PERM_CCCD
@ _MM_PERM_CADD
@ _MM_PERM_ACCB
@ _MM_PERM_BBDB
@ _MM_PERM_ABBB
@ _MM_PERM_BACB
@ _MM_PERM_CDCA
@ _MM_PERM_ACBC
@ _MM_PERM_ADCB
@ _MM_PERM_BBBC
@ _MM_PERM_DBBA
@ _MM_PERM_BBBB
@ _MM_PERM_DDDB
@ _MM_PERM_CAAA
@ _MM_PERM_ADBB
@ _MM_PERM_ACDB
@ _MM_PERM_DCAD
@ _MM_PERM_DBBC
@ _MM_PERM_BAAB
@ _MM_PERM_BDDD
@ _MM_PERM_BBAD
@ _MM_PERM_DDBA
@ _MM_PERM_CDCD
@ _MM_PERM_CCCA
@ _MM_PERM_DBBB
@ _MM_PERM_DAAD
@ _MM_PERM_DCBA
@ _MM_PERM_CCBC
@ _MM_PERM_ADDD
@ _MM_PERM_DBAC
@ _MM_PERM_ABAB
@ _MM_PERM_CBDB
@ _MM_PERM_CDBC
@ _MM_PERM_AABC
@ _MM_PERM_DABD
@ _MM_PERM_CBBA
@ _MM_PERM_CBAA
@ _MM_PERM_BDDB
@ _MM_PERM_CABC
@ _MM_PERM_BDBD
@ _MM_PERM_BCAD
@ _MM_PERM_ACBA
@ _MM_PERM_ADBA
@ _MM_PERM_ADBC
@ _MM_PERM_DBCB
@ _MM_PERM_CBDC
@ _MM_PERM_CBAD
@ _MM_PERM_ABCC
@ _MM_PERM_AAAD
@ _MM_PERM_CBAC
@ _MM_PERM_CCDA
@ _MM_PERM_CDAC
@ _MM_PERM_BADD
@ _MM_PERM_DAAC
@ _MM_PERM_BCCC
@ _MM_PERM_DBBD
@ _MM_PERM_DDAC
@ _MM_PERM_DACD
@ _MM_PERM_BAAC
@ _MM_PERM_ACCA
@ _MM_PERM_ABDD
@ _MM_PERM_BBCC
@ _MM_PERM_DAAA
@ _MM_PERM_CAAB
@ _MM_PERM_BCDB
@ _MM_PERM_ACBB
@ _MM_PERM_CDAB
@ _MM_PERM_DBDB
@ _MM_PERM_AABB
@ _MM_PERM_DBDA
@ _MM_PERM_BCBA
@ _MM_PERM_CBAB
@ _MM_PERM_DCDC
@ _MM_PERM_BBCB
@ _MM_PERM_CDCB
@ _MM_PERM_AACA
@ _MM_PERM_ACBD
@ _MM_PERM_AAAB
@ _MM_PERM_DCCB
@ _MM_PERM_ADDB
@ _MM_PERM_AAAA
@ _MM_PERM_AACC
@ _MM_PERM_BDDC
@ _MM_PERM_CBBC
@ _MM_PERM_DDCC
@ _MM_PERM_CABD
@ _MM_PERM_AADC
@ _MM_PERM_BCBC
@ _MM_PERM_BCCA
@ _MM_PERM_CCBD
@ _MM_PERM_CBBB
@ _MM_PERM_CDBA
@ _MM_PERM_CACD
@ _MM_PERM_BDAD
@ _MM_PERM_ADCA
@ _MM_PERM_AAAC
@ _MM_PERM_BDDA
@ _MM_PERM_CCAC
@ _MM_PERM_ACDC
@ _MM_PERM_DBCA
@ _MM_PERM_DBAA
@ _MM_PERM_AABD
@ _MM_PERM_CDCC
@ _MM_PERM_DCAA
@ _MM_PERM_DDDC
@ _MM_PERM_CDDB
@ _MM_PERM_AABA
@ _MM_PERM_DDBB
@ _MM_PERM_CDDA
@ _MM_PERM_AADD
@ _MM_PERM_BADC
@ _MM_PERM_BDBA
@ _MM_PERM_DBDD
@ _MM_PERM_BDAC
@ _MM_PERM_DBDC
@ _MM_PERM_BBBA
@ _MM_PERM_DDBC
@ _MM_PERM_BAAA
@ _MM_PERM_BDCC
@ _MM_PERM_DDAB
@ _MM_PERM_BCCB
@ _MM_PERM_BCCD
@ _MM_PERM_ADBD
@ _MM_PERM_ADCC
@ _MM_PERM_CCBB
@ _MM_PERM_CDAA
@ _MM_PERM_BBDA
@ _MM_PERM_CACC
@ _MM_PERM_DCBB
@ _MM_PERM_DABA
@ _MM_PERM_BADB
@ _MM_PERM_ABCA
@ _MM_PERM_CBCC
@ _MM_PERM_ABAD
@ _MM_PERM_BDBC
@ _MM_PERM_DDDA
@ _MM_PERM_ADAB
@ _MM_PERM_CADB
@ _MM_PERM_ADAA
@ _MM_PERM_ACAC
@ _MM_PERM_DADD
@ _MM_PERM_BABD
@ _MM_PERM_ACCD
@ _MM_PERM_CCAA
@ _MM_PERM_AADA
@ _MM_PERM_BDCA
@ _MM_PERM_CDDD
@ _MM_PERM_ABBD
@ _MM_PERM_ACAA
@ _MM_PERM_ACDD
@ _MM_PERM_DABB
@ _MM_PERM_CCCB
@ _MM_PERM_AADB
@ _MM_PERM_DBAD
@ _MM_PERM_BBDD
@ _MM_PERM_BCDC
@ _MM_PERM_CABA
@ _MM_PERM_BBAA
@ _MM_PERM_ADAD
@ _MM_PERM_BADA
@ _MM_PERM_DCDA
@ _MM_PERM_ABBA
@ _MM_PERM_ACAB
@ _MM_PERM_CCDD
@ _MM_PERM_CADC
@ _MM_PERM_DDCB
@ _MM_PERM_BABB
@ _MM_PERM_CCDB
@ _MM_PERM_DDAD
@ _MM_PERM_DBCC
@ _MM_PERM_BCBB
@ _MM_PERM_ADDC
@ _MM_PERM_CCBA
@ _MM_PERM_ABCD
@ _MM_PERM_BCAB
@ _MM_PERM_DCBC
@ _MM_PERM_BCDD
@ _MM_PERM_CCDC
@ _MM_PERM_ABAC
@ _MM_PERM_CBCB
@ _MM_PERM_CCAB
@ _MM_PERM_DDCD
@ _MM_PERM_DACA
@ _MM_PERM_ACAD
@ _MM_PERM_BABA
@ _MM_PERM_CBCD
@ _MM_PERM_CAAD
@ _MM_PERM_DCDD
@ _MM_PERM_BDBB
@ _MM_PERM_BCAA
@ _MM_PERM_ABDC
@ _MM_PERM_BBCD
@ _MM_PERM_CAAC
@ _MM_PERM_BBAC
@ _MM_PERM_CBCA
@ _MM_PERM_DCAC
@ _MM_PERM_ABAA
@ _MM_PERM_CACB
@ _MM_PERM_BBDC
@ _MM_PERM_CDAD
@ _MM_PERM_ADCD
@ _MM_PERM_DADB
@ _MM_PERM_DBCD
@ _MM_PERM_DACC
@ _MM_PERM_DACB
@ _MM_PERM_DCBD
@ _MM_PERM_CACA
@ _MM_PERM_ABBC
@ _MM_PERM_DCCA
@ _MM_PERM_DABC
@ _MM_PERM_CBDD
@ _MM_PERM_DDBD
@ _MM_PERM_DDCA
@ _MM_PERM_BDCD
@ _MM_PERM_CDBD
@ _MM_PERM_ABCB
@ _MM_PERM_CDDC
@ _MM_PERM_AACB
@ _MM_PERM_DDAA
@ _MM_PERM_ADDA
@ _MM_PERM_DADA
@ _MM_PERM_BCDA
@ _MM_PERM_BDAB
@ _MM_PERM_BAAD
@ _MM_PERM_DBAB
@ _MM_PERM_DCCD
@ _MM_PERM_CABB
@ _MM_PERM_BDAA
@ _MM_PERM_BDCB
@ _MM_PERM_ACDA
@ _MM_PERM_DADC
@ _MM_PERM_CBDA
@ _MM_PERM_BCAC
@ _MM_PERM_BACA
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi32(int __s)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_pd(double e0, double e1, double e2, double e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _cvtu32_mask16(unsigned int __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi32(__m512i __W)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
unsigned short __mmask16
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_si512(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_ps(float e0, float e1, float e2, float e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi32(__m512i __W)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask16_u32(__mmask16 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi16(short __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd(__m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_moveldup_ps(__m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi32(__m512i __V)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
Definition avxintrin.h:3610
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition avxintrin.h:3623
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition avxintrin.h:4294
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition avxintrin.h:4282
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4306
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
Definition emmintrin.h:120
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
Definition emmintrin.h:199
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
Definition emmintrin.h:80
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3877
static __inline__ void int __a
Definition emmintrin.h:4076
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3492
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
Definition emmintrin.h:1886
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition emmintrin.h:1866
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
Definition emmintrin.h:159
static __inline__ void short __D
Definition immintrin.h:342
static __inline__ void const void * __src
__inline unsigned int unsigned int unsigned int * __P
Definition bmi2intrin.h:25
__inline unsigned int unsigned int __Y
Definition bmi2intrin.h:19
#define _MM_FROUND_FLOOR
Definition smmintrin.h:41
#define _MM_FROUND_CEIL
Definition smmintrin.h:42
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
Definition xmmintrin.h:2801
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:160
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
Definition xmmintrin.h:119
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:79
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:2018
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
Definition xmmintrin.h:200