clang 22.0.0git
avx512fintrin.h
Go to the documentation of this file.
1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
15
16typedef char __v64qi __attribute__((__vector_size__(64)));
17typedef short __v32hi __attribute__((__vector_size__(64)));
18typedef double __v8df __attribute__((__vector_size__(64)));
19typedef float __v16sf __attribute__((__vector_size__(64)));
20typedef long long __v8di __attribute__((__vector_size__(64)));
21typedef int __v16si __attribute__((__vector_size__(64)));
22
23/* Unsigned types */
24typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28
29/* We need an explicitly signed variant for char. Note that this shouldn't
30 * appear in the interface though. */
31typedef signed char __v64qs __attribute__((__vector_size__(64)));
32
33typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
34typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
35typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
36
37typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
38typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
39typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
40
41typedef unsigned char __mmask8;
42typedef unsigned short __mmask16;
43
44/* Rounding mode macros. */
45#define _MM_FROUND_TO_NEAREST_INT 0x00
46#define _MM_FROUND_TO_NEG_INF 0x01
47#define _MM_FROUND_TO_POS_INF 0x02
48#define _MM_FROUND_TO_ZERO 0x03
49#define _MM_FROUND_CUR_DIRECTION 0x04
50
51/* Constants for integer comparison predicates */
52typedef enum {
53 _MM_CMPINT_EQ, /* Equal */
54 _MM_CMPINT_LT, /* Less than */
55 _MM_CMPINT_LE, /* Less than or Equal */
57 _MM_CMPINT_NE, /* Not Equal */
58 _MM_CMPINT_NLT, /* Not Less than */
59#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
60 _MM_CMPINT_NLE /* Not Less than or Equal */
61#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
63
64typedef enum
65{
153
154typedef enum
155{
156 _MM_MANT_NORM_1_2, /* interval [1, 2) */
157 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
158 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
159 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
161
162typedef enum
163{
164 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
165 _MM_MANT_SIGN_zero, /* sign = 0 */
166 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
168
169/* Define the default attributes for the functions in this file. */
170#define __DEFAULT_FN_ATTRS512 \
171 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
172 __min_vector_width__(512)))
173#define __DEFAULT_FN_ATTRS128 \
174 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
175 __min_vector_width__(128)))
176#define __DEFAULT_FN_ATTRS \
177 __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
178
179#if defined(__cplusplus) && (__cplusplus >= 201103L)
180#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
181#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
182#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
183#else
184#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
185#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
186#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
187#endif
188
189/* Create vectors with repeated elements */
190
191static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
193 return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0};
194}
195
196#define _mm512_setzero_epi32 _mm512_setzero_si512
197
198static __inline__ __m512d __DEFAULT_FN_ATTRS512
200{
201 return (__m512d)__builtin_ia32_undef512();
202}
203
204static __inline__ __m512 __DEFAULT_FN_ATTRS512
206{
207 return (__m512)__builtin_ia32_undef512();
208}
209
210static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void) {
211 return (__m512)__builtin_ia32_undef512();
212}
213
214static __inline__ __m512i __DEFAULT_FN_ATTRS512
216{
217 return (__m512i)__builtin_ia32_undef512();
218}
219
220static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
222 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
224}
225
226static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
227_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) {
228 return (__m512i)__builtin_ia32_selectd_512(__M,
229 (__v16si) _mm512_broadcastd_epi32(__A),
230 (__v16si) __O);
231}
232
233static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
235 return (__m512i)__builtin_ia32_selectd_512(__M,
236 (__v16si) _mm512_broadcastd_epi32(__A),
237 (__v16si) _mm512_setzero_si512());
238}
239
240static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
242 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
243 0, 0, 0, 0, 0, 0, 0, 0);
244}
245
246static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
247_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) {
248 return (__m512i)__builtin_ia32_selectq_512(
249 __M, (__v8di)_mm512_broadcastq_epi64(__A), (__v8di)__O);
250}
251
252static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
254 return (__m512i)__builtin_ia32_selectq_512(__M,
255 (__v8di) _mm512_broadcastq_epi64(__A),
256 (__v8di) _mm512_setzero_si512());
257}
258
260 return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
261 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
262}
263
264#define _mm512_setzero _mm512_setzero_ps
265
266static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
268 return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
269}
270
271static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
273{
274 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
275 __w, __w, __w, __w, __w, __w, __w, __w };
276}
277
278static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
279_mm512_set1_pd(double __w)
280{
281 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
282}
283
284static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
286{
287 return __extension__ (__m512i)(__v64qi){
288 __w, __w, __w, __w, __w, __w, __w, __w,
289 __w, __w, __w, __w, __w, __w, __w, __w,
290 __w, __w, __w, __w, __w, __w, __w, __w,
291 __w, __w, __w, __w, __w, __w, __w, __w,
292 __w, __w, __w, __w, __w, __w, __w, __w,
293 __w, __w, __w, __w, __w, __w, __w, __w,
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w };
296}
297
298static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
300{
301 return __extension__ (__m512i)(__v32hi){
302 __w, __w, __w, __w, __w, __w, __w, __w,
303 __w, __w, __w, __w, __w, __w, __w, __w,
304 __w, __w, __w, __w, __w, __w, __w, __w,
305 __w, __w, __w, __w, __w, __w, __w, __w };
306}
307
308static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
310{
311 return __extension__ (__m512i)(__v16si){
312 __s, __s, __s, __s, __s, __s, __s, __s,
313 __s, __s, __s, __s, __s, __s, __s, __s };
314}
315
316static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
318 return (__m512i)__builtin_ia32_selectd_512(__M,
319 (__v16si)_mm512_set1_epi32(__A),
320 (__v16si)_mm512_setzero_si512());
321}
322
323static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
324_mm512_set1_epi64(long long __d)
325{
326 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
327}
328
329static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
331 return (__m512i)__builtin_ia32_selectq_512(__M,
332 (__v8di)_mm512_set1_epi64(__A),
333 (__v8di)_mm512_setzero_si512());
334}
335
336static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
338 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
339 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
340}
341
342static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
343_mm512_set4_epi32(int __A, int __B, int __C, int __D) {
344 return __extension__ (__m512i)(__v16si)
345 { __D, __C, __B, __A, __D, __C, __B, __A,
346 __D, __C, __B, __A, __D, __C, __B, __A };
347}
348
349static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
350_mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D) {
351 return __extension__ (__m512i) (__v8di)
352 { __D, __C, __B, __A, __D, __C, __B, __A };
353}
354
355static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
356_mm512_set4_pd(double __A, double __B, double __C, double __D) {
357 return __extension__ (__m512d)
358 { __D, __C, __B, __A, __D, __C, __B, __A };
359}
360
361static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
362_mm512_set4_ps(float __A, float __B, float __C, float __D) {
363 return __extension__ (__m512)
364 { __D, __C, __B, __A, __D, __C, __B, __A,
365 __D, __C, __B, __A, __D, __C, __B, __A };
366}
367
368static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
369_mm512_setr4_epi32(int e0, int e1, int e2, int e3) {
370 return _mm512_set4_epi32(e3, e2, e1, e0);
371}
372
373static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
374_mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3) {
375 return _mm512_set4_epi64(e3, e2, e1, e0);
376}
377
378static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
379_mm512_setr4_pd(double e0, double e1, double e2, double e3) {
380 return _mm512_set4_pd(e3, e2, e1, e0);
381}
382
383static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
384_mm512_setr4_ps(float e0, float e1, float e2, float e3) {
385 return _mm512_set4_ps(e3, e2, e1, e0);
386}
387
388static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
390 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
391 0, 0, 0, 0, 0, 0, 0, 0);
392}
393
394/* Cast between vector types */
395
396static __inline __m512d __DEFAULT_FN_ATTRS512
398{
399 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
400 1, 2, 3, 4, 5, 6, 7);
401}
402
403static __inline __m512 __DEFAULT_FN_ATTRS512
405{
406 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
407 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
408}
409
410static __inline __m128d __DEFAULT_FN_ATTRS512
412{
413 return __builtin_shufflevector(__a, __a, 0, 1);
414}
415
416static __inline __m256d __DEFAULT_FN_ATTRS512
418{
419 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
420}
421
422static __inline __m128 __DEFAULT_FN_ATTRS512
424{
425 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
426}
427
428static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
430 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
431}
432
433static __inline __m512 __DEFAULT_FN_ATTRS512
434_mm512_castpd_ps (__m512d __A)
435{
436 return (__m512) (__A);
437}
438
439static __inline __m512i __DEFAULT_FN_ATTRS512
441{
442 return (__m512i) (__A);
443}
444
445static __inline__ __m512d __DEFAULT_FN_ATTRS512
447{
448 __m256d __B = __builtin_nondeterministic_value(__B);
449 return __builtin_shufflevector(
450 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
451 __B, 0, 1, 2, 3, 4, 5, 6, 7);
452}
453
454static __inline __m512d __DEFAULT_FN_ATTRS512
456{
457 return (__m512d) (__A);
458}
459
460static __inline __m512i __DEFAULT_FN_ATTRS512
462{
463 return (__m512i) (__A);
464}
465
466static __inline__ __m512 __DEFAULT_FN_ATTRS512
468{
469 __m256 __B = __builtin_nondeterministic_value(__B);
470 return __builtin_shufflevector(
471 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
472 __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
473}
474
475static __inline__ __m512i __DEFAULT_FN_ATTRS512
477{
478 __m256i __B = __builtin_nondeterministic_value(__B);
479 return __builtin_shufflevector(
480 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
481 __B, 0, 1, 2, 3, 4, 5, 6, 7);
482}
483
484static __inline__ __m512i __DEFAULT_FN_ATTRS512
486{
487 return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
488}
489
490static __inline __m512 __DEFAULT_FN_ATTRS512
492{
493 return (__m512) (__A);
494}
495
496static __inline __m512d __DEFAULT_FN_ATTRS512
498{
499 return (__m512d) (__A);
500}
501
502static __inline __m128i __DEFAULT_FN_ATTRS512
504{
505 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
506}
507
508static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR
510 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
511}
512
515 return (__mmask16)__a;
516}
517
518static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
520 return (int)__a;
521}
522
523/// Constructs a 512-bit floating-point vector of [8 x double] from a
524/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
525/// contain the value of the source vector. The upper 384 bits are set
526/// to zero.
527///
528/// \headerfile <x86intrin.h>
529///
530/// This intrinsic has no corresponding instruction.
531///
532/// \param __a
533/// A 128-bit vector of [2 x double].
534/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
535/// contain the value of the parameter. The upper 384 bits are set to zero.
536static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
538 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
539}
540
541/// Constructs a 512-bit floating-point vector of [8 x double] from a
542/// 256-bit floating-point vector of [4 x double]. The lower 256 bits
543/// contain the value of the source vector. The upper 256 bits are set
544/// to zero.
545///
546/// \headerfile <x86intrin.h>
547///
548/// This intrinsic has no corresponding instruction.
549///
550/// \param __a
551/// A 256-bit vector of [4 x double].
552/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
553/// contain the value of the parameter. The upper 256 bits are set to zero.
554static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
556 return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
557}
558
559/// Constructs a 512-bit floating-point vector of [16 x float] from a
560/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
561/// the value of the source vector. The upper 384 bits are set to zero.
562///
563/// \headerfile <x86intrin.h>
564///
565/// This intrinsic has no corresponding instruction.
566///
567/// \param __a
568/// A 128-bit vector of [4 x float].
569/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
570/// contain the value of the parameter. The upper 384 bits are set to zero.
571static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
573 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
574}
575
576/// Constructs a 512-bit floating-point vector of [16 x float] from a
577/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
578/// the value of the source vector. The upper 256 bits are set to zero.
579///
580/// \headerfile <x86intrin.h>
581///
582/// This intrinsic has no corresponding instruction.
583///
584/// \param __a
585/// A 256-bit vector of [8 x float].
586/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
587/// contain the value of the parameter. The upper 256 bits are set to zero.
588static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
590 return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
591}
592
593/// Constructs a 512-bit integer vector from a 128-bit integer vector.
594/// The lower 128 bits contain the value of the source vector. The upper
595/// 384 bits are set to zero.
596///
597/// \headerfile <x86intrin.h>
598///
599/// This intrinsic has no corresponding instruction.
600///
601/// \param __a
602/// A 128-bit integer vector.
603/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
604/// the parameter. The upper 384 bits are set to zero.
605static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
607 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
608}
609
610/// Constructs a 512-bit integer vector from a 256-bit integer vector.
611/// The lower 256 bits contain the value of the source vector. The upper
612/// 256 bits are set to zero.
613///
614/// \headerfile <x86intrin.h>
615///
616/// This intrinsic has no corresponding instruction.
617///
618/// \param __a
619/// A 256-bit integer vector.
620/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
621/// the parameter. The upper 256 bits are set to zero.
622static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
624 return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
625}
626
627/* Bitwise operators */
628static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
629_mm512_and_epi32(__m512i __a, __m512i __b)
630{
631 return (__m512i)((__v16su)__a & (__v16su)__b);
632}
633
634static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
635_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) {
636 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
637 (__v16si) _mm512_and_epi32(__a, __b),
638 (__v16si) __src);
639}
640
641static __inline__ __m512i __DEFAULT_FN_ATTRS512
643{
645 __k, __a, __b);
646}
647
648static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
649_mm512_and_epi64(__m512i __a, __m512i __b)
650{
651 return (__m512i)((__v8du)__a & (__v8du)__b);
652}
653
654static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
655_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) {
656 return (__m512i)__builtin_ia32_selectq_512(
657 (__mmask8)__k, (__v8di)_mm512_and_epi64(__a, __b), (__v8di)__src);
658}
659
660static __inline__ __m512i __DEFAULT_FN_ATTRS512
662{
664 __k, __a, __b);
665}
666
667static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
668_mm512_andnot_si512 (__m512i __A, __m512i __B)
669{
670 return (__m512i)(~(__v8du)__A & (__v8du)__B);
671}
672
673static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
674_mm512_andnot_epi32 (__m512i __A, __m512i __B)
675{
676 return (__m512i)(~(__v16su)__A & (__v16su)__B);
677}
678
679static __inline__ __m512i __DEFAULT_FN_ATTRS512
680_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
681{
682 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
683 (__v16si)_mm512_andnot_epi32(__A, __B),
684 (__v16si)__W);
685}
686
687static __inline__ __m512i __DEFAULT_FN_ATTRS512
688_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
689{
691 __U, __A, __B);
692}
693
694static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
695_mm512_andnot_epi64(__m512i __A, __m512i __B)
696{
697 return (__m512i)(~(__v8du)__A & (__v8du)__B);
698}
699
700static __inline__ __m512i __DEFAULT_FN_ATTRS512
701_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
702{
703 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
704 (__v8di)_mm512_andnot_epi64(__A, __B),
705 (__v8di)__W);
706}
707
708static __inline__ __m512i __DEFAULT_FN_ATTRS512
709_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
710{
712 __U, __A, __B);
713}
714
715static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
716_mm512_or_epi32(__m512i __a, __m512i __b)
717{
718 return (__m512i)((__v16su)__a | (__v16su)__b);
719}
720
721static __inline__ __m512i __DEFAULT_FN_ATTRS512
722_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
723{
724 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
725 (__v16si)_mm512_or_epi32(__a, __b),
726 (__v16si)__src);
727}
728
729static __inline__ __m512i __DEFAULT_FN_ATTRS512
731{
732 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
733}
734
735static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
736_mm512_or_epi64(__m512i __a, __m512i __b)
737{
738 return (__m512i)((__v8du)__a | (__v8du)__b);
739}
740
741static __inline__ __m512i __DEFAULT_FN_ATTRS512
742_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
743{
744 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
745 (__v8di)_mm512_or_epi64(__a, __b),
746 (__v8di)__src);
747}
748
749static __inline__ __m512i __DEFAULT_FN_ATTRS512
750_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
751{
752 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
753}
754
755static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
756_mm512_xor_epi32(__m512i __a, __m512i __b)
757{
758 return (__m512i)((__v16su)__a ^ (__v16su)__b);
759}
760
761static __inline__ __m512i __DEFAULT_FN_ATTRS512
762_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
763{
764 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
765 (__v16si)_mm512_xor_epi32(__a, __b),
766 (__v16si)__src);
767}
768
769static __inline__ __m512i __DEFAULT_FN_ATTRS512
771{
772 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
773}
774
775static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
776_mm512_xor_epi64(__m512i __a, __m512i __b)
777{
778 return (__m512i)((__v8du)__a ^ (__v8du)__b);
779}
780
781static __inline__ __m512i __DEFAULT_FN_ATTRS512
782_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
783{
784 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
785 (__v8di)_mm512_xor_epi64(__a, __b),
786 (__v8di)__src);
787}
788
789static __inline__ __m512i __DEFAULT_FN_ATTRS512
791{
792 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
793}
794
795static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
796_mm512_and_si512(__m512i __a, __m512i __b)
797{
798 return (__m512i)((__v8du)__a & (__v8du)__b);
799}
800
801static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
802_mm512_or_si512(__m512i __a, __m512i __b)
803{
804 return (__m512i)((__v8du)__a | (__v8du)__b);
805}
806
807static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
808_mm512_xor_si512(__m512i __a, __m512i __b)
809{
810 return (__m512i)((__v8du)__a ^ (__v8du)__b);
811}
812
813/* Arithmetic */
814
815static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
816_mm512_add_pd(__m512d __a, __m512d __b) {
817 return (__m512d)((__v8df)__a + (__v8df)__b);
818}
819
820static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
821_mm512_add_ps(__m512 __a, __m512 __b) {
822 return (__m512)((__v16sf)__a + (__v16sf)__b);
823}
824
825static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
826_mm512_mul_pd(__m512d __a, __m512d __b) {
827 return (__m512d)((__v8df)__a * (__v8df)__b);
828}
829
830static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
831_mm512_mul_ps(__m512 __a, __m512 __b) {
832 return (__m512)((__v16sf)__a * (__v16sf)__b);
833}
834
835static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
836_mm512_sub_pd(__m512d __a, __m512d __b) {
837 return (__m512d)((__v8df)__a - (__v8df)__b);
838}
839
840static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
841_mm512_sub_ps(__m512 __a, __m512 __b) {
842 return (__m512)((__v16sf)__a - (__v16sf)__b);
843}
844
845static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
846_mm512_add_epi64(__m512i __A, __m512i __B) {
847 return (__m512i) ((__v8du) __A + (__v8du) __B);
848}
849
850static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
851_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
852 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
853 (__v8di)_mm512_add_epi64(__A, __B),
854 (__v8di)__W);
855}
856
857static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
858_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
859 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
860 (__v8di)_mm512_add_epi64(__A, __B),
861 (__v8di)_mm512_setzero_si512());
862}
863
864static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
865_mm512_sub_epi64(__m512i __A, __m512i __B) {
866 return (__m512i) ((__v8du) __A - (__v8du) __B);
867}
868
869static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
870_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
871 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
872 (__v8di)_mm512_sub_epi64(__A, __B),
873 (__v8di)__W);
874}
875
876static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
877_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
878 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
879 (__v8di)_mm512_sub_epi64(__A, __B),
880 (__v8di)_mm512_setzero_si512());
881}
882
883static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
884_mm512_add_epi32(__m512i __A, __m512i __B) {
885 return (__m512i) ((__v16su) __A + (__v16su) __B);
886}
887
888static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
889_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
890 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
891 (__v16si)_mm512_add_epi32(__A, __B),
892 (__v16si)__W);
893}
894
895static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
896_mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
897 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
898 (__v16si)_mm512_add_epi32(__A, __B),
899 (__v16si)_mm512_setzero_si512());
900}
901
902static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
903_mm512_sub_epi32(__m512i __A, __m512i __B) {
904 return (__m512i) ((__v16su) __A - (__v16su) __B);
905}
906
907static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
908_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
909 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
910 (__v16si)_mm512_sub_epi32(__A, __B),
911 (__v16si)__W);
912}
913
914static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
915_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
916 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
917 (__v16si)_mm512_sub_epi32(__A, __B),
918 (__v16si)_mm512_setzero_si512());
919}
920
921#define _mm512_max_round_pd(A, B, R) \
922 ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
923 (__v8df)(__m512d)(B), (int)(R)))
924
925#define _mm512_mask_max_round_pd(W, U, A, B, R) \
926 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
927 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
928 (__v8df)(W)))
929
930#define _mm512_maskz_max_round_pd(U, A, B, R) \
931 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
932 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
933 (__v8df)_mm512_setzero_pd()))
934
935static __inline__ __m512d __DEFAULT_FN_ATTRS512
936_mm512_max_pd(__m512d __A, __m512d __B)
937{
938 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
940}
941
942static __inline__ __m512d __DEFAULT_FN_ATTRS512
943_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
944{
945 return (__m512d)__builtin_ia32_selectpd_512(__U,
946 (__v8df)_mm512_max_pd(__A, __B),
947 (__v8df)__W);
948}
949
950static __inline__ __m512d __DEFAULT_FN_ATTRS512
951_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
952{
953 return (__m512d)__builtin_ia32_selectpd_512(__U,
954 (__v8df)_mm512_max_pd(__A, __B),
955 (__v8df)_mm512_setzero_pd());
956}
957
958#define _mm512_max_round_ps(A, B, R) \
959 ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
960 (__v16sf)(__m512)(B), (int)(R)))
961
962#define _mm512_mask_max_round_ps(W, U, A, B, R) \
963 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
964 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
965 (__v16sf)(W)))
966
967#define _mm512_maskz_max_round_ps(U, A, B, R) \
968 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
969 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
970 (__v16sf)_mm512_setzero_ps()))
971
972static __inline__ __m512 __DEFAULT_FN_ATTRS512
973_mm512_max_ps(__m512 __A, __m512 __B)
974{
975 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
977}
978
979static __inline__ __m512 __DEFAULT_FN_ATTRS512
980_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
981{
982 return (__m512)__builtin_ia32_selectps_512(__U,
983 (__v16sf)_mm512_max_ps(__A, __B),
984 (__v16sf)__W);
985}
986
987static __inline__ __m512 __DEFAULT_FN_ATTRS512
988_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
989{
990 return (__m512)__builtin_ia32_selectps_512(__U,
991 (__v16sf)_mm512_max_ps(__A, __B),
992 (__v16sf)_mm512_setzero_ps());
993}
994
995static __inline__ __m128 __DEFAULT_FN_ATTRS128
996_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
997 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
998 (__v4sf) __B,
999 (__v4sf) __W,
1000 (__mmask8) __U,
1002}
1003
1004static __inline__ __m128 __DEFAULT_FN_ATTRS128
1005_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1006 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1007 (__v4sf) __B,
1008 (__v4sf) _mm_setzero_ps (),
1009 (__mmask8) __U,
1011}
1012
1013#define _mm_max_round_ss(A, B, R) \
1014 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1015 (__v4sf)(__m128)(B), \
1016 (__v4sf)_mm_setzero_ps(), \
1017 (__mmask8)-1, (int)(R)))
1018
1019#define _mm_mask_max_round_ss(W, U, A, B, R) \
1020 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1021 (__v4sf)(__m128)(B), \
1022 (__v4sf)(__m128)(W), (__mmask8)(U), \
1023 (int)(R)))
1024
1025#define _mm_maskz_max_round_ss(U, A, B, R) \
1026 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1027 (__v4sf)(__m128)(B), \
1028 (__v4sf)_mm_setzero_ps(), \
1029 (__mmask8)(U), (int)(R)))
1030
1031static __inline__ __m128d __DEFAULT_FN_ATTRS128
1032_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1033 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1034 (__v2df) __B,
1035 (__v2df) __W,
1036 (__mmask8) __U,
1038}
1039
1040static __inline__ __m128d __DEFAULT_FN_ATTRS128
1041_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1042 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1043 (__v2df) __B,
1044 (__v2df) _mm_setzero_pd (),
1045 (__mmask8) __U,
1047}
1048
1049#define _mm_max_round_sd(A, B, R) \
1050 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1051 (__v2df)(__m128d)(B), \
1052 (__v2df)_mm_setzero_pd(), \
1053 (__mmask8)-1, (int)(R)))
1054
1055#define _mm_mask_max_round_sd(W, U, A, B, R) \
1056 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1057 (__v2df)(__m128d)(B), \
1058 (__v2df)(__m128d)(W), \
1059 (__mmask8)(U), (int)(R)))
1060
1061#define _mm_maskz_max_round_sd(U, A, B, R) \
1062 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1063 (__v2df)(__m128d)(B), \
1064 (__v2df)_mm_setzero_pd(), \
1065 (__mmask8)(U), (int)(R)))
1066
1067static __inline __m512i
1069 return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1070}
1071
1072static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1073_mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1074 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1075 (__v16si)_mm512_max_epi32(__A, __B),
1076 (__v16si)__W);
1077}
1078
1079static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1080_mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1081 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1082 (__v16si)_mm512_max_epi32(__A, __B),
1083 (__v16si)_mm512_setzero_si512());
1084}
1085
1086static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1087_mm512_max_epu32(__m512i __A, __m512i __B) {
1088 return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1089}
1090
1091static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1092_mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1093 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1094 (__v16si)_mm512_max_epu32(__A, __B),
1095 (__v16si)__W);
1096}
1097
1098static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1099_mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
1100 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1101 (__v16si)_mm512_max_epu32(__A, __B),
1102 (__v16si)_mm512_setzero_si512());
1103}
1104
1105static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1106_mm512_max_epi64(__m512i __A, __m512i __B) {
1107 return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1108}
1109
1110static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1111_mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1112 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1113 (__v8di)_mm512_max_epi64(__A, __B),
1114 (__v8di)__W);
1115}
1116
1117static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1118_mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
1119 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1120 (__v8di)_mm512_max_epi64(__A, __B),
1121 (__v8di)_mm512_setzero_si512());
1122}
1123
1124static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1125_mm512_max_epu64(__m512i __A, __m512i __B) {
1126 return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1127}
1128
1129static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1130_mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1131 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1132 (__v8di)_mm512_max_epu64(__A, __B),
1133 (__v8di)__W);
1134}
1135
1136static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1137_mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
1138 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1139 (__v8di)_mm512_max_epu64(__A, __B),
1140 (__v8di)_mm512_setzero_si512());
1141}
1142
1143#define _mm512_min_round_pd(A, B, R) \
1144 ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1145 (__v8df)(__m512d)(B), (int)(R)))
1146
1147#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1148 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1149 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1150 (__v8df)(W)))
1151
1152#define _mm512_maskz_min_round_pd(U, A, B, R) \
1153 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1154 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1155 (__v8df)_mm512_setzero_pd()))
1156
1157static __inline__ __m512d __DEFAULT_FN_ATTRS512
1158_mm512_min_pd(__m512d __A, __m512d __B)
1159{
1160 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1162}
1163
1164static __inline__ __m512d __DEFAULT_FN_ATTRS512
1165_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1166{
1167 return (__m512d)__builtin_ia32_selectpd_512(__U,
1168 (__v8df)_mm512_min_pd(__A, __B),
1169 (__v8df)__W);
1170}
1171
1172static __inline__ __m512d __DEFAULT_FN_ATTRS512
1173_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1174{
1175 return (__m512d)__builtin_ia32_selectpd_512(__U,
1176 (__v8df)_mm512_min_pd(__A, __B),
1177 (__v8df)_mm512_setzero_pd());
1178}
1179
1180#define _mm512_min_round_ps(A, B, R) \
1181 ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1182 (__v16sf)(__m512)(B), (int)(R)))
1183
1184#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1185 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1186 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1187 (__v16sf)(W)))
1188
1189#define _mm512_maskz_min_round_ps(U, A, B, R) \
1190 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1191 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1192 (__v16sf)_mm512_setzero_ps()))
1193
1194static __inline__ __m512 __DEFAULT_FN_ATTRS512
1195_mm512_min_ps(__m512 __A, __m512 __B)
1196{
1197 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1199}
1200
1201static __inline__ __m512 __DEFAULT_FN_ATTRS512
1202_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1203{
1204 return (__m512)__builtin_ia32_selectps_512(__U,
1205 (__v16sf)_mm512_min_ps(__A, __B),
1206 (__v16sf)__W);
1207}
1208
1209static __inline__ __m512 __DEFAULT_FN_ATTRS512
1210_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1211{
1212 return (__m512)__builtin_ia32_selectps_512(__U,
1213 (__v16sf)_mm512_min_ps(__A, __B),
1214 (__v16sf)_mm512_setzero_ps());
1215}
1216
1217static __inline__ __m128 __DEFAULT_FN_ATTRS128
1218_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1219 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1220 (__v4sf) __B,
1221 (__v4sf) __W,
1222 (__mmask8) __U,
1224}
1225
1226static __inline__ __m128 __DEFAULT_FN_ATTRS128
1227_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1228 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1229 (__v4sf) __B,
1230 (__v4sf) _mm_setzero_ps (),
1231 (__mmask8) __U,
1233}
1234
1235#define _mm_min_round_ss(A, B, R) \
1236 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1237 (__v4sf)(__m128)(B), \
1238 (__v4sf)_mm_setzero_ps(), \
1239 (__mmask8)-1, (int)(R)))
1240
1241#define _mm_mask_min_round_ss(W, U, A, B, R) \
1242 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1243 (__v4sf)(__m128)(B), \
1244 (__v4sf)(__m128)(W), (__mmask8)(U), \
1245 (int)(R)))
1246
1247#define _mm_maskz_min_round_ss(U, A, B, R) \
1248 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1249 (__v4sf)(__m128)(B), \
1250 (__v4sf)_mm_setzero_ps(), \
1251 (__mmask8)(U), (int)(R)))
1252
1253static __inline__ __m128d __DEFAULT_FN_ATTRS128
1254_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1255 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1256 (__v2df) __B,
1257 (__v2df) __W,
1258 (__mmask8) __U,
1260}
1261
1262static __inline__ __m128d __DEFAULT_FN_ATTRS128
1263_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1264 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1265 (__v2df) __B,
1266 (__v2df) _mm_setzero_pd (),
1267 (__mmask8) __U,
1269}
1270
1271#define _mm_min_round_sd(A, B, R) \
1272 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1273 (__v2df)(__m128d)(B), \
1274 (__v2df)_mm_setzero_pd(), \
1275 (__mmask8)-1, (int)(R)))
1276
1277#define _mm_mask_min_round_sd(W, U, A, B, R) \
1278 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1279 (__v2df)(__m128d)(B), \
1280 (__v2df)(__m128d)(W), \
1281 (__mmask8)(U), (int)(R)))
1282
1283#define _mm_maskz_min_round_sd(U, A, B, R) \
1284 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1285 (__v2df)(__m128d)(B), \
1286 (__v2df)_mm_setzero_pd(), \
1287 (__mmask8)(U), (int)(R)))
1288
1289static __inline __m512i
1291 return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1292}
1293
1294static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1295_mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1296 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1297 (__v16si)_mm512_min_epi32(__A, __B),
1298 (__v16si)__W);
1299}
1300
1301static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1302_mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1303 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1304 (__v16si)_mm512_min_epi32(__A, __B),
1305 (__v16si)_mm512_setzero_si512());
1306}
1307
1308static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1309_mm512_min_epu32(__m512i __A, __m512i __B) {
1310 return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1311}
1312
1313static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1314_mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1315 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1316 (__v16si)_mm512_min_epu32(__A, __B),
1317 (__v16si)__W);
1318}
1319
1320static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1321_mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
1322 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1323 (__v16si)_mm512_min_epu32(__A, __B),
1324 (__v16si)_mm512_setzero_si512());
1325}
1326
1327static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1328_mm512_min_epi64(__m512i __A, __m512i __B) {
1329 return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1330}
1331
1332static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1333_mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1334 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1335 (__v8di)_mm512_min_epi64(__A, __B),
1336 (__v8di)__W);
1337}
1338
1339static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1340_mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
1341 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1342 (__v8di)_mm512_min_epi64(__A, __B),
1343 (__v8di)_mm512_setzero_si512());
1344}
1345
1346static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1347_mm512_min_epu64(__m512i __A, __m512i __B) {
1348 return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1349}
1350
1351static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1352_mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1353 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1354 (__v8di)_mm512_min_epu64(__A, __B),
1355 (__v8di)__W);
1356}
1357
1358static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1359_mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
1360 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1361 (__v8di)_mm512_min_epu64(__A, __B),
1362 (__v8di)_mm512_setzero_si512());
1363}
1364
1365static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1366_mm512_mul_epi32(__m512i __X, __m512i __Y) {
1367 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1368}
1369
1370static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1371_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
1372 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1373 (__v8di)_mm512_mul_epi32(__X, __Y),
1374 (__v8di)__W);
1375}
1376
1377static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1378_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) {
1379 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1380 (__v8di)_mm512_mul_epi32(__X, __Y),
1381 (__v8di)_mm512_setzero_si512 ());
1382}
1383
1384static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1385_mm512_mul_epu32(__m512i __X, __m512i __Y) {
1386 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1387}
1388
1389static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1390_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
1391 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1392 (__v8di)_mm512_mul_epu32(__X, __Y),
1393 (__v8di)__W);
1394}
1395
1396static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1397_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) {
1398 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1399 (__v8di)_mm512_mul_epu32(__X, __Y),
1400 (__v8di)_mm512_setzero_si512 ());
1401}
1402
1403static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1404_mm512_mullo_epi32(__m512i __A, __m512i __B) {
1405 return (__m512i) ((__v16su) __A * (__v16su) __B);
1406}
1407
1408static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1409_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1410 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1411 (__v16si)_mm512_mullo_epi32(__A, __B),
1412 (__v16si)_mm512_setzero_si512());
1413}
1414
1415static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1416_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1417 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1418 (__v16si)_mm512_mullo_epi32(__A, __B),
1419 (__v16si)__W);
1420}
1421
1422static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1423_mm512_mullox_epi64(__m512i __A, __m512i __B) {
1424 return (__m512i) ((__v8du) __A * (__v8du) __B);
1425}
1426
1427static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1428_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1429 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1430 (__v8di)_mm512_mullox_epi64(__A, __B),
1431 (__v8di)__W);
1432}
1433
1434#define _mm512_sqrt_round_pd(A, R) \
1435 ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1436
1437#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1438 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1439 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1440 (__v8df)(__m512d)(W)))
1441
1442#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1443 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1444 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1445 (__v8df)_mm512_setzero_pd()))
1446
1447static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A) {
1448 return (__m512d)__builtin_elementwise_sqrt((__v8df)__A);
1449}
1450
1451static __inline__ __m512d __DEFAULT_FN_ATTRS512
1452_mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A) {
1453 return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A),
1454 (__v8df)__W);
1455}
1456
1457static __inline__ __m512d __DEFAULT_FN_ATTRS512
1459 return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A),
1460 (__v8df)_mm512_setzero_pd());
1461}
1462
1463#define _mm512_sqrt_round_ps(A, R) \
1464 ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1465
1466#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1467 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1468 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1469 (__v16sf)(__m512)(W)))
1470
1471#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1472 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1473 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1474 (__v16sf)_mm512_setzero_ps()))
1475
1476static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A) {
1477 return (__m512)__builtin_elementwise_sqrt((__v16sf)__A);
1478}
1479
1480static __inline__ __m512 __DEFAULT_FN_ATTRS512
1481_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) {
1482 return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A),
1483 (__v16sf)__W);
1484}
1485
1486static __inline__ __m512 __DEFAULT_FN_ATTRS512
1488 return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A),
1489 (__v16sf)_mm512_setzero_ps());
1490}
1491
1492static __inline__ __m512d __DEFAULT_FN_ATTRS512
1494{
1495 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1496 (__v8df)
1498 (__mmask8) -1);}
1499
1500static __inline__ __m512d __DEFAULT_FN_ATTRS512
1501_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1502{
1503 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1504 (__v8df) __W,
1505 (__mmask8) __U);
1506}
1507
1508static __inline__ __m512d __DEFAULT_FN_ATTRS512
1510{
1511 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1512 (__v8df)
1514 (__mmask8) __U);
1515}
1516
1517static __inline__ __m512 __DEFAULT_FN_ATTRS512
1519{
1520 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1521 (__v16sf)
1523 (__mmask16) -1);
1524}
1525
1526static __inline__ __m512 __DEFAULT_FN_ATTRS512
1527_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1528{
1529 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1530 (__v16sf) __W,
1531 (__mmask16) __U);
1532}
1533
1534static __inline__ __m512 __DEFAULT_FN_ATTRS512
1536{
1537 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1538 (__v16sf)
1540 (__mmask16) __U);
1541}
1542
1543static __inline__ __m128 __DEFAULT_FN_ATTRS128
1544_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1545{
1546 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1547 (__v4sf) __B,
1548 (__v4sf)
1549 _mm_setzero_ps (),
1550 (__mmask8) -1);
1551}
1552
1553static __inline__ __m128 __DEFAULT_FN_ATTRS128
1554_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1555{
1556 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1557 (__v4sf) __B,
1558 (__v4sf) __W,
1559 (__mmask8) __U);
1560}
1561
1562static __inline__ __m128 __DEFAULT_FN_ATTRS128
1563_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1564{
1565 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1566 (__v4sf) __B,
1567 (__v4sf) _mm_setzero_ps (),
1568 (__mmask8) __U);
1569}
1570
1571static __inline__ __m128d __DEFAULT_FN_ATTRS128
1572_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1573{
1574 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1575 (__v2df) __B,
1576 (__v2df)
1577 _mm_setzero_pd (),
1578 (__mmask8) -1);
1579}
1580
1581static __inline__ __m128d __DEFAULT_FN_ATTRS128
1582_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1583{
1584 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1585 (__v2df) __B,
1586 (__v2df) __W,
1587 (__mmask8) __U);
1588}
1589
1590static __inline__ __m128d __DEFAULT_FN_ATTRS128
1591_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1592{
1593 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1594 (__v2df) __B,
1595 (__v2df) _mm_setzero_pd (),
1596 (__mmask8) __U);
1597}
1598
1599static __inline__ __m512d __DEFAULT_FN_ATTRS512
1601{
1602 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1603 (__v8df)
1605 (__mmask8) -1);
1606}
1607
1608static __inline__ __m512d __DEFAULT_FN_ATTRS512
1609_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1610{
1611 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1612 (__v8df) __W,
1613 (__mmask8) __U);
1614}
1615
1616static __inline__ __m512d __DEFAULT_FN_ATTRS512
1618{
1619 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1620 (__v8df)
1622 (__mmask8) __U);
1623}
1624
1625static __inline__ __m512 __DEFAULT_FN_ATTRS512
1627{
1628 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1629 (__v16sf)
1631 (__mmask16) -1);
1632}
1633
1634static __inline__ __m512 __DEFAULT_FN_ATTRS512
1635_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1636{
1637 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1638 (__v16sf) __W,
1639 (__mmask16) __U);
1640}
1641
1642static __inline__ __m512 __DEFAULT_FN_ATTRS512
1644{
1645 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1646 (__v16sf)
1648 (__mmask16) __U);
1649}
1650
1651static __inline__ __m128 __DEFAULT_FN_ATTRS128
1652_mm_rcp14_ss(__m128 __A, __m128 __B)
1653{
1654 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1655 (__v4sf) __B,
1656 (__v4sf)
1657 _mm_setzero_ps (),
1658 (__mmask8) -1);
1659}
1660
1661static __inline__ __m128 __DEFAULT_FN_ATTRS128
1662_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1663{
1664 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1665 (__v4sf) __B,
1666 (__v4sf) __W,
1667 (__mmask8) __U);
1668}
1669
1670static __inline__ __m128 __DEFAULT_FN_ATTRS128
1671_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1672{
1673 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1674 (__v4sf) __B,
1675 (__v4sf) _mm_setzero_ps (),
1676 (__mmask8) __U);
1677}
1678
1679static __inline__ __m128d __DEFAULT_FN_ATTRS128
1680_mm_rcp14_sd(__m128d __A, __m128d __B)
1681{
1682 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1683 (__v2df) __B,
1684 (__v2df)
1685 _mm_setzero_pd (),
1686 (__mmask8) -1);
1687}
1688
1689static __inline__ __m128d __DEFAULT_FN_ATTRS128
1690_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1691{
1692 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1693 (__v2df) __B,
1694 (__v2df) __W,
1695 (__mmask8) __U);
1696}
1697
1698static __inline__ __m128d __DEFAULT_FN_ATTRS128
1699_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1700{
1701 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1702 (__v2df) __B,
1703 (__v2df) _mm_setzero_pd (),
1704 (__mmask8) __U);
1705}
1706
1707static __inline __m512 __DEFAULT_FN_ATTRS512
1709{
1710 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1712 (__v16sf) __A, (unsigned short)-1,
1714}
1715
1716static __inline__ __m512 __DEFAULT_FN_ATTRS512
1717_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1718{
1719 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1721 (__v16sf) __W, __U,
1723}
1724
1725static __inline __m512d __DEFAULT_FN_ATTRS512
1727{
1728 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1730 (__v8df) __A, (unsigned char)-1,
1732}
1733
1734static __inline__ __m512d __DEFAULT_FN_ATTRS512
1735_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1736{
1737 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1739 (__v8df) __W, __U,
1741}
1742
1743static __inline__ __m512 __DEFAULT_FN_ATTRS512
1744_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1745{
1746 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1748 (__v16sf) __W, __U,
1750}
1751
1752static __inline __m512 __DEFAULT_FN_ATTRS512
1754{
1755 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1757 (__v16sf) __A, (unsigned short)-1,
1759}
1760
1761static __inline __m512d __DEFAULT_FN_ATTRS512
1762_mm512_ceil_pd(__m512d __A)
1763{
1764 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1766 (__v8df) __A, (unsigned char)-1,
1768}
1769
1770static __inline__ __m512d __DEFAULT_FN_ATTRS512
1771_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1772{
1773 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1775 (__v8df) __W, __U,
1777}
1778
1779static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1780_mm512_abs_epi64(__m512i __A) {
1781 return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1782}
1783
1784static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1785_mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
1786 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1787 (__v8di)_mm512_abs_epi64(__A),
1788 (__v8di)__W);
1789}
1790
1791static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1793 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1794 (__v8di)_mm512_abs_epi64(__A),
1795 (__v8di)_mm512_setzero_si512());
1796}
1797
1798static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1799_mm512_abs_epi32(__m512i __A) {
1800 return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1801}
1802
1803static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1804_mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
1805 return (__m512i)__builtin_ia32_selectd_512(__U,
1806 (__v16si)_mm512_abs_epi32(__A),
1807 (__v16si)__W);
1808}
1809
1810static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1812 return (__m512i)__builtin_ia32_selectd_512(__U,
1813 (__v16si)_mm512_abs_epi32(__A),
1814 (__v16si)_mm512_setzero_si512());
1815}
1816
1817static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1818_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1819 __A = _mm_add_ss(__A, __B);
1820 return __builtin_ia32_selectss_128(__U, __A, __W);
1821}
1822
1823static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1824_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
1825 __A = _mm_add_ss(__A, __B);
1826 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1827}
1828
1829#define _mm_add_round_ss(A, B, R) \
1830 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1831 (__v4sf)(__m128)(B), \
1832 (__v4sf)_mm_setzero_ps(), \
1833 (__mmask8)-1, (int)(R)))
1834
1835#define _mm_mask_add_round_ss(W, U, A, B, R) \
1836 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1837 (__v4sf)(__m128)(B), \
1838 (__v4sf)(__m128)(W), (__mmask8)(U), \
1839 (int)(R)))
1840
1841#define _mm_maskz_add_round_ss(U, A, B, R) \
1842 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1843 (__v4sf)(__m128)(B), \
1844 (__v4sf)_mm_setzero_ps(), \
1845 (__mmask8)(U), (int)(R)))
1846
1847static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1848_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1849 __A = _mm_add_sd(__A, __B);
1850 return __builtin_ia32_selectsd_128(__U, __A, __W);
1851}
1852
1853static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1854_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
1855 __A = _mm_add_sd(__A, __B);
1856 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1857}
1858#define _mm_add_round_sd(A, B, R) \
1859 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1860 (__v2df)(__m128d)(B), \
1861 (__v2df)_mm_setzero_pd(), \
1862 (__mmask8)-1, (int)(R)))
1863
1864#define _mm_mask_add_round_sd(W, U, A, B, R) \
1865 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1866 (__v2df)(__m128d)(B), \
1867 (__v2df)(__m128d)(W), \
1868 (__mmask8)(U), (int)(R)))
1869
1870#define _mm_maskz_add_round_sd(U, A, B, R) \
1871 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1872 (__v2df)(__m128d)(B), \
1873 (__v2df)_mm_setzero_pd(), \
1874 (__mmask8)(U), (int)(R)))
1875
1876static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1877_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1878 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1879 (__v8df)_mm512_add_pd(__A, __B),
1880 (__v8df)__W);
1881}
1882
1883static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1884_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1885 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1886 (__v8df)_mm512_add_pd(__A, __B),
1887 (__v8df)_mm512_setzero_pd());
1888}
1889
1890static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1891_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1892 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1893 (__v16sf)_mm512_add_ps(__A, __B),
1894 (__v16sf)__W);
1895}
1896
1897static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1898_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1899 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1900 (__v16sf)_mm512_add_ps(__A, __B),
1901 (__v16sf)_mm512_setzero_ps());
1902}
1903
1904#define _mm512_add_round_pd(A, B, R) \
1905 ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1906 (__v8df)(__m512d)(B), (int)(R)))
1907
1908#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1909 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1910 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1911 (__v8df)(__m512d)(W)))
1912
1913#define _mm512_maskz_add_round_pd(U, A, B, R) \
1914 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1915 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1916 (__v8df)_mm512_setzero_pd()))
1917
1918#define _mm512_add_round_ps(A, B, R) \
1919 ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1920 (__v16sf)(__m512)(B), (int)(R)))
1921
1922#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1923 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1924 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1925 (__v16sf)(__m512)(W)))
1926
1927#define _mm512_maskz_add_round_ps(U, A, B, R) \
1928 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1929 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1930 (__v16sf)_mm512_setzero_ps()))
1931
1932static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1933_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1934 __A = _mm_sub_ss(__A, __B);
1935 return __builtin_ia32_selectss_128(__U, __A, __W);
1936}
1937
1938static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1939_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
1940 __A = _mm_sub_ss(__A, __B);
1941 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1942}
1943#define _mm_sub_round_ss(A, B, R) \
1944 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1945 (__v4sf)(__m128)(B), \
1946 (__v4sf)_mm_setzero_ps(), \
1947 (__mmask8)-1, (int)(R)))
1948
1949#define _mm_mask_sub_round_ss(W, U, A, B, R) \
1950 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1951 (__v4sf)(__m128)(B), \
1952 (__v4sf)(__m128)(W), (__mmask8)(U), \
1953 (int)(R)))
1954
1955#define _mm_maskz_sub_round_ss(U, A, B, R) \
1956 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1957 (__v4sf)(__m128)(B), \
1958 (__v4sf)_mm_setzero_ps(), \
1959 (__mmask8)(U), (int)(R)))
1960
1961static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1962_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1963 __A = _mm_sub_sd(__A, __B);
1964 return __builtin_ia32_selectsd_128(__U, __A, __W);
1965}
1966
1967static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1968_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
1969 __A = _mm_sub_sd(__A, __B);
1970 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1971}
1972
1973#define _mm_sub_round_sd(A, B, R) \
1974 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1975 (__v2df)(__m128d)(B), \
1976 (__v2df)_mm_setzero_pd(), \
1977 (__mmask8)-1, (int)(R)))
1978
1979#define _mm_mask_sub_round_sd(W, U, A, B, R) \
1980 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1981 (__v2df)(__m128d)(B), \
1982 (__v2df)(__m128d)(W), \
1983 (__mmask8)(U), (int)(R)))
1984
1985#define _mm_maskz_sub_round_sd(U, A, B, R) \
1986 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1987 (__v2df)(__m128d)(B), \
1988 (__v2df)_mm_setzero_pd(), \
1989 (__mmask8)(U), (int)(R)))
1990
1991static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1992_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1993 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1994 (__v8df)_mm512_sub_pd(__A, __B),
1995 (__v8df)__W);
1996}
1997
1998static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1999_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2000 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2001 (__v8df)_mm512_sub_pd(__A, __B),
2002 (__v8df)_mm512_setzero_pd());
2003}
2004
2005static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2006_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2007 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2008 (__v16sf)_mm512_sub_ps(__A, __B),
2009 (__v16sf)__W);
2010}
2011
2012static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2013_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2014 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2015 (__v16sf)_mm512_sub_ps(__A, __B),
2016 (__v16sf)_mm512_setzero_ps());
2017}
2018
2019#define _mm512_sub_round_pd(A, B, R) \
2020 ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2021 (__v8df)(__m512d)(B), (int)(R)))
2022
2023#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2024 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2025 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2026 (__v8df)(__m512d)(W)))
2027
2028#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2029 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2030 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2031 (__v8df)_mm512_setzero_pd()))
2032
2033#define _mm512_sub_round_ps(A, B, R) \
2034 ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2035 (__v16sf)(__m512)(B), (int)(R)))
2036
2037#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2038 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2039 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2040 (__v16sf)(__m512)(W)))
2041
2042#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2043 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2044 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2045 (__v16sf)_mm512_setzero_ps()))
2046
2047static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2048_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2049 __A = _mm_mul_ss(__A, __B);
2050 return __builtin_ia32_selectss_128(__U, __A, __W);
2051}
2052
2053static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2054_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
2055 __A = _mm_mul_ss(__A, __B);
2056 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2057}
2058#define _mm_mul_round_ss(A, B, R) \
2059 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2060 (__v4sf)(__m128)(B), \
2061 (__v4sf)_mm_setzero_ps(), \
2062 (__mmask8)-1, (int)(R)))
2063
2064#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2065 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2066 (__v4sf)(__m128)(B), \
2067 (__v4sf)(__m128)(W), (__mmask8)(U), \
2068 (int)(R)))
2069
2070#define _mm_maskz_mul_round_ss(U, A, B, R) \
2071 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2072 (__v4sf)(__m128)(B), \
2073 (__v4sf)_mm_setzero_ps(), \
2074 (__mmask8)(U), (int)(R)))
2075
2076static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2077_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2078 __A = _mm_mul_sd(__A, __B);
2079 return __builtin_ia32_selectsd_128(__U, __A, __W);
2080}
2081
2082static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2083_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
2084 __A = _mm_mul_sd(__A, __B);
2085 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2086}
2087
2088#define _mm_mul_round_sd(A, B, R) \
2089 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2090 (__v2df)(__m128d)(B), \
2091 (__v2df)_mm_setzero_pd(), \
2092 (__mmask8)-1, (int)(R)))
2093
2094#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2095 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2096 (__v2df)(__m128d)(B), \
2097 (__v2df)(__m128d)(W), \
2098 (__mmask8)(U), (int)(R)))
2099
2100#define _mm_maskz_mul_round_sd(U, A, B, R) \
2101 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2102 (__v2df)(__m128d)(B), \
2103 (__v2df)_mm_setzero_pd(), \
2104 (__mmask8)(U), (int)(R)))
2105
2106static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2107_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2108 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2109 (__v8df)_mm512_mul_pd(__A, __B),
2110 (__v8df)__W);
2111}
2112
2113static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2114_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2115 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2116 (__v8df)_mm512_mul_pd(__A, __B),
2117 (__v8df)_mm512_setzero_pd());
2118}
2119
2120static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2121_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2122 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2123 (__v16sf)_mm512_mul_ps(__A, __B),
2124 (__v16sf)__W);
2125}
2126
2127static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2128_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2129 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2130 (__v16sf)_mm512_mul_ps(__A, __B),
2131 (__v16sf)_mm512_setzero_ps());
2132}
2133
2134#define _mm512_mul_round_pd(A, B, R) \
2135 ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2136 (__v8df)(__m512d)(B), (int)(R)))
2137
2138#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2139 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2140 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2141 (__v8df)(__m512d)(W)))
2142
2143#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2144 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2145 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2146 (__v8df)_mm512_setzero_pd()))
2147
2148#define _mm512_mul_round_ps(A, B, R) \
2149 ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2150 (__v16sf)(__m512)(B), (int)(R)))
2151
2152#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2153 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2154 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2155 (__v16sf)(__m512)(W)))
2156
2157#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2158 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2159 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2160 (__v16sf)_mm512_setzero_ps()))
2161
2162static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2163_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2164 __A = _mm_div_ss(__A, __B);
2165 return __builtin_ia32_selectss_128(__U, __A, __W);
2166}
2167
2168static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2169_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
2170 __A = _mm_div_ss(__A, __B);
2171 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2172}
2173
2174#define _mm_div_round_ss(A, B, R) \
2175 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2176 (__v4sf)(__m128)(B), \
2177 (__v4sf)_mm_setzero_ps(), \
2178 (__mmask8)-1, (int)(R)))
2179
2180#define _mm_mask_div_round_ss(W, U, A, B, R) \
2181 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2182 (__v4sf)(__m128)(B), \
2183 (__v4sf)(__m128)(W), (__mmask8)(U), \
2184 (int)(R)))
2185
2186#define _mm_maskz_div_round_ss(U, A, B, R) \
2187 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2188 (__v4sf)(__m128)(B), \
2189 (__v4sf)_mm_setzero_ps(), \
2190 (__mmask8)(U), (int)(R)))
2191
2192static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2193_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2194 __A = _mm_div_sd(__A, __B);
2195 return __builtin_ia32_selectsd_128(__U, __A, __W);
2196}
2197
2198static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2199_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
2200 __A = _mm_div_sd(__A, __B);
2201 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2202}
2203
2204#define _mm_div_round_sd(A, B, R) \
2205 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2206 (__v2df)(__m128d)(B), \
2207 (__v2df)_mm_setzero_pd(), \
2208 (__mmask8)-1, (int)(R)))
2209
2210#define _mm_mask_div_round_sd(W, U, A, B, R) \
2211 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2212 (__v2df)(__m128d)(B), \
2213 (__v2df)(__m128d)(W), \
2214 (__mmask8)(U), (int)(R)))
2215
2216#define _mm_maskz_div_round_sd(U, A, B, R) \
2217 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2218 (__v2df)(__m128d)(B), \
2219 (__v2df)_mm_setzero_pd(), \
2220 (__mmask8)(U), (int)(R)))
2221
2222static __inline __m512d
2224 return (__m512d)((__v8df)__a/(__v8df)__b);
2225}
2226
2227static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2228_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2229 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2230 (__v8df)_mm512_div_pd(__A, __B),
2231 (__v8df)__W);
2232}
2233
2234static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2235_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2236 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2237 (__v8df)_mm512_div_pd(__A, __B),
2238 (__v8df)_mm512_setzero_pd());
2239}
2240
2241static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2242_mm512_div_ps(__m512 __a, __m512 __b) {
2243 return (__m512)((__v16sf)__a/(__v16sf)__b);
2244}
2245
2246static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2247_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2248 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2249 (__v16sf)_mm512_div_ps(__A, __B),
2250 (__v16sf)__W);
2251}
2252
2253static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2254_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2255 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2256 (__v16sf)_mm512_div_ps(__A, __B),
2257 (__v16sf)_mm512_setzero_ps());
2258}
2259
2260#define _mm512_div_round_pd(A, B, R) \
2261 ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2262 (__v8df)(__m512d)(B), (int)(R)))
2263
2264#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2265 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2266 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2267 (__v8df)(__m512d)(W)))
2268
2269#define _mm512_maskz_div_round_pd(U, A, B, R) \
2270 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2271 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2272 (__v8df)_mm512_setzero_pd()))
2273
2274#define _mm512_div_round_ps(A, B, R) \
2275 ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2276 (__v16sf)(__m512)(B), (int)(R)))
2277
2278#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2279 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2280 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2281 (__v16sf)(__m512)(W)))
2282
2283#define _mm512_maskz_div_round_ps(U, A, B, R) \
2284 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2285 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2286 (__v16sf)_mm512_setzero_ps()))
2287
2288#define _mm512_roundscale_ps(A, B) \
2289 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2290 (__v16sf)_mm512_undefined_ps(), \
2291 (__mmask16)-1, \
2292 _MM_FROUND_CUR_DIRECTION))
2293
2294#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2295 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2296 (__v16sf)(__m512)(A), (__mmask16)(B), \
2297 _MM_FROUND_CUR_DIRECTION))
2298
2299#define _mm512_maskz_roundscale_ps(A, B, imm) \
2300 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2301 (__v16sf)_mm512_setzero_ps(), \
2302 (__mmask16)(A), \
2303 _MM_FROUND_CUR_DIRECTION))
2304
2305#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2306 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2307 (__v16sf)(__m512)(A), (__mmask16)(B), \
2308 (int)(R)))
2309
2310#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2311 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2312 (__v16sf)_mm512_setzero_ps(), \
2313 (__mmask16)(A), (int)(R)))
2314
2315#define _mm512_roundscale_round_ps(A, imm, R) \
2316 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2317 (__v16sf)_mm512_undefined_ps(), \
2318 (__mmask16)-1, (int)(R)))
2319
2320#define _mm512_roundscale_pd(A, B) \
2321 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2322 (__v8df)_mm512_undefined_pd(), \
2323 (__mmask8)-1, \
2324 _MM_FROUND_CUR_DIRECTION))
2325
2326#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2327 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2328 (__v8df)(__m512d)(A), (__mmask8)(B), \
2329 _MM_FROUND_CUR_DIRECTION))
2330
2331#define _mm512_maskz_roundscale_pd(A, B, imm) \
2332 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2333 (__v8df)_mm512_setzero_pd(), \
2334 (__mmask8)(A), \
2335 _MM_FROUND_CUR_DIRECTION))
2336
2337#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2338 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2339 (__v8df)(__m512d)(A), (__mmask8)(B), \
2340 (int)(R)))
2341
2342#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2343 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2344 (__v8df)_mm512_setzero_pd(), \
2345 (__mmask8)(A), (int)(R)))
2346
2347#define _mm512_roundscale_round_pd(A, imm, R) \
2348 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2349 (__v8df)_mm512_undefined_pd(), \
2350 (__mmask8)-1, (int)(R)))
2351
2352#define _mm512_fmadd_round_pd(A, B, C, R) \
2353 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2354 (__v8df)(__m512d)(B), \
2355 (__v8df)(__m512d)(C), \
2356 (__mmask8)-1, (int)(R)))
2357
2358
2359#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2360 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2361 (__v8df)(__m512d)(B), \
2362 (__v8df)(__m512d)(C), \
2363 (__mmask8)(U), (int)(R)))
2364
2365
2366#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2367 ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2368 (__v8df)(__m512d)(B), \
2369 (__v8df)(__m512d)(C), \
2370 (__mmask8)(U), (int)(R)))
2371
2372
2373#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2374 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2375 (__v8df)(__m512d)(B), \
2376 (__v8df)(__m512d)(C), \
2377 (__mmask8)(U), (int)(R)))
2378
2379
2380#define _mm512_fmsub_round_pd(A, B, C, R) \
2381 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2382 (__v8df)(__m512d)(B), \
2383 -(__v8df)(__m512d)(C), \
2384 (__mmask8)-1, (int)(R)))
2385
2386
2387#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2388 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2389 (__v8df)(__m512d)(B), \
2390 -(__v8df)(__m512d)(C), \
2391 (__mmask8)(U), (int)(R)))
2392
2393
2394#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2395 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2396 (__v8df)(__m512d)(B), \
2397 -(__v8df)(__m512d)(C), \
2398 (__mmask8)(U), (int)(R)))
2399
2400
2401#define _mm512_fnmadd_round_pd(A, B, C, R) \
2402 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2403 (__v8df)(__m512d)(B), \
2404 (__v8df)(__m512d)(C), \
2405 (__mmask8)-1, (int)(R)))
2406
2407
2408#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2409 ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2410 (__v8df)(__m512d)(B), \
2411 (__v8df)(__m512d)(C), \
2412 (__mmask8)(U), (int)(R)))
2413
2414
2415#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2416 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2417 (__v8df)(__m512d)(B), \
2418 (__v8df)(__m512d)(C), \
2419 (__mmask8)(U), (int)(R)))
2420
2421
2422#define _mm512_fnmsub_round_pd(A, B, C, R) \
2423 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2424 (__v8df)(__m512d)(B), \
2425 -(__v8df)(__m512d)(C), \
2426 (__mmask8)-1, (int)(R)))
2427
2428
2429#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2430 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2431 (__v8df)(__m512d)(B), \
2432 -(__v8df)(__m512d)(C), \
2433 (__mmask8)(U), (int)(R)))
2434
2435static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2436_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
2437 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2438 (__v8df)__C);
2439}
2440
2441static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2442_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2443 return (__m512d)__builtin_ia32_selectpd_512(
2444 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__A);
2445}
2446
2447static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2448_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2449 return (__m512d)__builtin_ia32_selectpd_512(
2450 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__C);
2451}
2452
2453static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2454_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2455 return (__m512d)__builtin_ia32_selectpd_512(
2456 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C),
2457 (__v8df)_mm512_setzero_pd());
2458}
2459
2460static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2461_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
2462 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2463 -(__v8df)__C);
2464}
2465
2466static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2467_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2468 return (__m512d)__builtin_ia32_selectpd_512(
2469 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__A);
2470}
2471
2472static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2473_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2474 return (__m512d)__builtin_ia32_selectpd_512(
2475 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__C);
2476}
2477
2478static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2479_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2480 return (__m512d)__builtin_ia32_selectpd_512(
2481 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C),
2482 (__v8df)_mm512_setzero_pd());
2483}
2484
2485static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2486_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
2487 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2488 (__v8df)__C);
2489}
2490
2491static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2492_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2493 return (__m512d)__builtin_ia32_selectpd_512(
2494 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__A);
2495}
2496
2497static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2498_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2499 return (__m512d)__builtin_ia32_selectpd_512(
2500 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__C);
2501}
2502
2503static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2504_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2505 return (__m512d)__builtin_ia32_selectpd_512(
2506 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C),
2507 (__v8df)_mm512_setzero_pd());
2508}
2509
2510static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2511_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
2512 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2513 -(__v8df)__C);
2514}
2515
2516static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2517_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2518 return (__m512d)__builtin_ia32_selectpd_512(
2519 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__A);
2520}
2521
2522static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2523_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2524 return (__m512d)__builtin_ia32_selectpd_512(
2525 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__C);
2526}
2527
2528static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2529_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2530 return (__m512d)__builtin_ia32_selectpd_512(
2531 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C),
2532 (__v8df)_mm512_setzero_pd());
2533}
2534
2535#define _mm512_fmadd_round_ps(A, B, C, R) \
2536 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2537 (__v16sf)(__m512)(B), \
2538 (__v16sf)(__m512)(C), \
2539 (__mmask16)-1, (int)(R)))
2540
2541
2542#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2543 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2544 (__v16sf)(__m512)(B), \
2545 (__v16sf)(__m512)(C), \
2546 (__mmask16)(U), (int)(R)))
2547
2548
2549#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2550 ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2551 (__v16sf)(__m512)(B), \
2552 (__v16sf)(__m512)(C), \
2553 (__mmask16)(U), (int)(R)))
2554
2555
2556#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2557 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2558 (__v16sf)(__m512)(B), \
2559 (__v16sf)(__m512)(C), \
2560 (__mmask16)(U), (int)(R)))
2561
2562
2563#define _mm512_fmsub_round_ps(A, B, C, R) \
2564 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2565 (__v16sf)(__m512)(B), \
2566 -(__v16sf)(__m512)(C), \
2567 (__mmask16)-1, (int)(R)))
2568
2569
2570#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2571 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2572 (__v16sf)(__m512)(B), \
2573 -(__v16sf)(__m512)(C), \
2574 (__mmask16)(U), (int)(R)))
2575
2576
2577#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2578 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2579 (__v16sf)(__m512)(B), \
2580 -(__v16sf)(__m512)(C), \
2581 (__mmask16)(U), (int)(R)))
2582
2583
2584#define _mm512_fnmadd_round_ps(A, B, C, R) \
2585 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2586 -(__v16sf)(__m512)(B), \
2587 (__v16sf)(__m512)(C), \
2588 (__mmask16)-1, (int)(R)))
2589
2590
2591#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2592 ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2593 (__v16sf)(__m512)(B), \
2594 (__v16sf)(__m512)(C), \
2595 (__mmask16)(U), (int)(R)))
2596
2597
2598#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2599 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2600 (__v16sf)(__m512)(B), \
2601 (__v16sf)(__m512)(C), \
2602 (__mmask16)(U), (int)(R)))
2603
2604
2605#define _mm512_fnmsub_round_ps(A, B, C, R) \
2606 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2607 -(__v16sf)(__m512)(B), \
2608 -(__v16sf)(__m512)(C), \
2609 (__mmask16)-1, (int)(R)))
2610
2611
2612#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2613 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2614 (__v16sf)(__m512)(B), \
2615 -(__v16sf)(__m512)(C), \
2616 (__mmask16)(U), (int)(R)))
2617
2618static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2619_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
2620 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2621 (__v16sf)__C);
2622}
2623
2624static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2625_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2626 return (__m512)__builtin_ia32_selectps_512(
2627 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__A);
2628}
2629
2630static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2631_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2632 return (__m512)__builtin_ia32_selectps_512(
2633 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__C);
2634}
2635
2636static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2637_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2638 return (__m512)__builtin_ia32_selectps_512(
2639 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C),
2640 (__v16sf)_mm512_setzero_ps());
2641}
2642
2643static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2644_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
2645 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2646 -(__v16sf)__C);
2647}
2648
2649static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2650_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2651 return (__m512)__builtin_ia32_selectps_512(
2652 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__A);
2653}
2654
2655static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2656_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2657 return (__m512)__builtin_ia32_selectps_512(
2658 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__C);
2659}
2660
2661static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2662_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2663 return (__m512)__builtin_ia32_selectps_512(
2664 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C),
2665 (__v16sf)_mm512_setzero_ps());
2666}
2667
2668static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2669_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
2670 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2671 (__v16sf)__C);
2672}
2673
2674static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2675_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2676 return (__m512)__builtin_ia32_selectps_512(
2677 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__A);
2678}
2679
2680static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2681_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2682 return (__m512)__builtin_ia32_selectps_512(
2683 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__C);
2684}
2685
2686static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2687_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2688 return (__m512)__builtin_ia32_selectps_512(
2689 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C),
2690 (__v16sf)_mm512_setzero_ps());
2691}
2692
2693static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2694_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
2695 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2696 -(__v16sf)__C);
2697}
2698
2699static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2700_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2701 return (__m512)__builtin_ia32_selectps_512(
2702 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__A);
2703}
2704
2705static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2706_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2707 return (__m512)__builtin_ia32_selectps_512(
2708 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__C);
2709}
2710
2711static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2712_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2713 return (__m512)__builtin_ia32_selectps_512(
2714 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C),
2715 (__v16sf)_mm512_setzero_ps());
2716}
2717
2718#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2719 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2720 (__v8df)(__m512d)(B), \
2721 (__v8df)(__m512d)(C), \
2722 (__mmask8)-1, (int)(R)))
2723
2724
2725#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2726 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2727 (__v8df)(__m512d)(B), \
2728 (__v8df)(__m512d)(C), \
2729 (__mmask8)(U), (int)(R)))
2730
2731
2732#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2733 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2734 (__v8df)(__m512d)(B), \
2735 (__v8df)(__m512d)(C), \
2736 (__mmask8)(U), (int)(R)))
2737
2738
2739#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2740 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2741 (__v8df)(__m512d)(B), \
2742 (__v8df)(__m512d)(C), \
2743 (__mmask8)(U), (int)(R)))
2744
2745
2746#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2747 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2748 (__v8df)(__m512d)(B), \
2749 -(__v8df)(__m512d)(C), \
2750 (__mmask8)-1, (int)(R)))
2751
2752
2753#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2754 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2755 (__v8df)(__m512d)(B), \
2756 -(__v8df)(__m512d)(C), \
2757 (__mmask8)(U), (int)(R)))
2758
2759
2760#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2761 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2762 (__v8df)(__m512d)(B), \
2763 -(__v8df)(__m512d)(C), \
2764 (__mmask8)(U), (int)(R)))
2765
2766
2767static __inline__ __m512d __DEFAULT_FN_ATTRS512
2768_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2769{
2770 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2771 (__v8df) __B,
2772 (__v8df) __C,
2773 (__mmask8) -1,
2775}
2776
2777static __inline__ __m512d __DEFAULT_FN_ATTRS512
2778_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2779{
2780 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2781 (__v8df) __B,
2782 (__v8df) __C,
2783 (__mmask8) __U,
2785}
2786
2787static __inline__ __m512d __DEFAULT_FN_ATTRS512
2788_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2789{
2790 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2791 (__v8df) __B,
2792 (__v8df) __C,
2793 (__mmask8) __U,
2795}
2796
2797static __inline__ __m512d __DEFAULT_FN_ATTRS512
2798_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2799{
2800 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2801 (__v8df) __B,
2802 (__v8df) __C,
2803 (__mmask8) __U,
2805}
2806
2807static __inline__ __m512d __DEFAULT_FN_ATTRS512
2808_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2809{
2810 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2811 (__v8df) __B,
2812 -(__v8df) __C,
2813 (__mmask8) -1,
2815}
2816
2817static __inline__ __m512d __DEFAULT_FN_ATTRS512
2818_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2819{
2820 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2821 (__v8df) __B,
2822 -(__v8df) __C,
2823 (__mmask8) __U,
2825}
2826
2827static __inline__ __m512d __DEFAULT_FN_ATTRS512
2828_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2829{
2830 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2831 (__v8df) __B,
2832 -(__v8df) __C,
2833 (__mmask8) __U,
2835}
2836
2837#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2838 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2839 (__v16sf)(__m512)(B), \
2840 (__v16sf)(__m512)(C), \
2841 (__mmask16)-1, (int)(R)))
2842
2843
2844#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2845 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2846 (__v16sf)(__m512)(B), \
2847 (__v16sf)(__m512)(C), \
2848 (__mmask16)(U), (int)(R)))
2849
2850
2851#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2852 ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2853 (__v16sf)(__m512)(B), \
2854 (__v16sf)(__m512)(C), \
2855 (__mmask16)(U), (int)(R)))
2856
2857
2858#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2859 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2860 (__v16sf)(__m512)(B), \
2861 (__v16sf)(__m512)(C), \
2862 (__mmask16)(U), (int)(R)))
2863
2864
2865#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2866 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2867 (__v16sf)(__m512)(B), \
2868 -(__v16sf)(__m512)(C), \
2869 (__mmask16)-1, (int)(R)))
2870
2871
2872#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2873 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2874 (__v16sf)(__m512)(B), \
2875 -(__v16sf)(__m512)(C), \
2876 (__mmask16)(U), (int)(R)))
2877
2878
2879#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2880 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2881 (__v16sf)(__m512)(B), \
2882 -(__v16sf)(__m512)(C), \
2883 (__mmask16)(U), (int)(R)))
2884
2885
2886static __inline__ __m512 __DEFAULT_FN_ATTRS512
2887_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
2888{
2889 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2890 (__v16sf) __B,
2891 (__v16sf) __C,
2892 (__mmask16) -1,
2894}
2895
2896static __inline__ __m512 __DEFAULT_FN_ATTRS512
2897_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2898{
2899 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2900 (__v16sf) __B,
2901 (__v16sf) __C,
2902 (__mmask16) __U,
2904}
2905
2906static __inline__ __m512 __DEFAULT_FN_ATTRS512
2907_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2908{
2909 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2910 (__v16sf) __B,
2911 (__v16sf) __C,
2912 (__mmask16) __U,
2914}
2915
2916static __inline__ __m512 __DEFAULT_FN_ATTRS512
2917_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2918{
2919 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2920 (__v16sf) __B,
2921 (__v16sf) __C,
2922 (__mmask16) __U,
2924}
2925
2926static __inline__ __m512 __DEFAULT_FN_ATTRS512
2927_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
2928{
2929 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2930 (__v16sf) __B,
2931 -(__v16sf) __C,
2932 (__mmask16) -1,
2934}
2935
2936static __inline__ __m512 __DEFAULT_FN_ATTRS512
2937_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2938{
2939 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2940 (__v16sf) __B,
2941 -(__v16sf) __C,
2942 (__mmask16) __U,
2944}
2945
2946static __inline__ __m512 __DEFAULT_FN_ATTRS512
2947_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2948{
2949 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2950 (__v16sf) __B,
2951 -(__v16sf) __C,
2952 (__mmask16) __U,
2954}
2955
2956#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
2957 ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
2958 (__v8df)(__m512d)(B), \
2959 (__v8df)(__m512d)(C), \
2960 (__mmask8)(U), (int)(R)))
2961
2962#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
2963 ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
2964 (__v16sf)(__m512)(B), \
2965 (__v16sf)(__m512)(C), \
2966 (__mmask16)(U), (int)(R)))
2967
2968#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
2969 ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
2970 (__v8df)(__m512d)(B), \
2971 (__v8df)(__m512d)(C), \
2972 (__mmask8)(U), (int)(R)))
2973
2974
2975static __inline__ __m512d __DEFAULT_FN_ATTRS512
2976_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2977{
2978 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2979 (__v8df) __B,
2980 (__v8df) __C,
2981 (__mmask8) __U,
2983}
2984
2985#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
2986 ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
2987 (__v16sf)(__m512)(B), \
2988 (__v16sf)(__m512)(C), \
2989 (__mmask16)(U), (int)(R)))
2990
2991
2992static __inline__ __m512 __DEFAULT_FN_ATTRS512
2993_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2994{
2995 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2996 (__v16sf) __B,
2997 (__v16sf) __C,
2998 (__mmask16) __U,
3000}
3001
3002#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3003 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3004 -(__v8df)(__m512d)(B), \
3005 (__v8df)(__m512d)(C), \
3006 (__mmask8)(U), (int)(R)))
3007
3008#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3009 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3010 -(__v16sf)(__m512)(B), \
3011 (__v16sf)(__m512)(C), \
3012 (__mmask16)(U), (int)(R)))
3013
3014#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3015 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3016 -(__v8df)(__m512d)(B), \
3017 -(__v8df)(__m512d)(C), \
3018 (__mmask8)(U), (int)(R)))
3019
3020
3021#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3022 ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3023 (__v8df)(__m512d)(B), \
3024 (__v8df)(__m512d)(C), \
3025 (__mmask8)(U), (int)(R)))
3026
3027#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3028 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3029 -(__v16sf)(__m512)(B), \
3030 -(__v16sf)(__m512)(C), \
3031 (__mmask16)(U), (int)(R)))
3032
3033
3034#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3035 ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3036 (__v16sf)(__m512)(B), \
3037 (__v16sf)(__m512)(C), \
3038 (__mmask16)(U), (int)(R)))
3039
3040/* Vector permutations */
3041
3042static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3043_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) {
3044 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3045 (__v16si) __B);
3046}
3047
3048static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3049_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3050 __m512i __B) {
3051 return (__m512i)__builtin_ia32_selectd_512(__U,
3052 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3053 (__v16si)__A);
3054}
3055
3056static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3057_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3058 __m512i __B) {
3059 return (__m512i)__builtin_ia32_selectd_512(__U,
3060 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3061 (__v16si)__I);
3062}
3063
3064static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3065_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3066 __m512i __B) {
3067 return (__m512i)__builtin_ia32_selectd_512(__U,
3068 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3069 (__v16si)_mm512_setzero_si512());
3070}
3071
3072static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3073_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) {
3074 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3075 (__v8di) __B);
3076}
3077
3078static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3079_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3080 __m512i __B) {
3081 return (__m512i)__builtin_ia32_selectq_512(__U,
3082 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3083 (__v8di)__A);
3084}
3085
3086static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3087_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3088 __m512i __B) {
3089 return (__m512i)__builtin_ia32_selectq_512(__U,
3090 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3091 (__v8di)__I);
3092}
3093
3094static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3095_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3096 __m512i __B) {
3097 return (__m512i)__builtin_ia32_selectq_512(__U,
3098 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3099 (__v8di)_mm512_setzero_si512());
3100}
3101
3102#define _mm512_alignr_epi64(A, B, I) \
3103 ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3104 (__v8di)(__m512i)(B), (int)(I)))
3105
3106#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3107 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3108 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3109 (__v8di)(__m512i)(W)))
3110
3111#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3112 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3113 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3114 (__v8di)_mm512_setzero_si512()))
3115
3116#define _mm512_alignr_epi32(A, B, I) \
3117 ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3118 (__v16si)(__m512i)(B), (int)(I)))
3119
3120#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3121 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3122 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3123 (__v16si)(__m512i)(W)))
3124
3125#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3126 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3127 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3128 (__v16si)_mm512_setzero_si512()))
3129/* Vector Extract */
3130
3131#define _mm512_extractf64x4_pd(A, I) \
3132 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3133 (__v4df)_mm256_setzero_pd(), \
3134 (__mmask8) - 1))
3135
3136#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3137 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3138 (__v4df)(__m256d)(W), \
3139 (__mmask8)(U)))
3140
3141#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3142 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3143 (__v4df)_mm256_setzero_pd(), \
3144 (__mmask8)(U)))
3145
3146#define _mm512_extractf32x4_ps(A, I) \
3147 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3148 (__v4sf)_mm_setzero_ps(), \
3149 (__mmask8) - 1))
3150
3151#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3152 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3153 (__v4sf)(__m128)(W), \
3154 (__mmask8)(U)))
3155
3156#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3157 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3158 (__v4sf)_mm_setzero_ps(), \
3159 (__mmask8)(U)))
3160
3161/* Vector Blend */
3162
3163static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3164_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
3165 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3166 (__v8df) __W,
3167 (__v8df) __A);
3168}
3169
3170static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3171_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) {
3172 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3173 (__v16sf) __W,
3174 (__v16sf) __A);
3175}
3176
3177static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3178_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
3179 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3180 (__v8di) __W,
3181 (__v8di) __A);
3182}
3183
3184static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3185_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
3186 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3187 (__v16si) __W,
3188 (__v16si) __A);
3189}
3190
3191/* Compare */
3192
3193#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3194 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3195 (__v16sf)(__m512)(B), (int)(P), \
3196 (__mmask16)-1, (int)(R)))
3197
3198#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3199 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3200 (__v16sf)(__m512)(B), (int)(P), \
3201 (__mmask16)(U), (int)(R)))
3202
3203#define _mm512_cmp_ps_mask(A, B, P) \
3204 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3205#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3206 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3207
3208#define _mm512_cmpeq_ps_mask(A, B) \
3209 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3210#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3211 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3212
3213#define _mm512_cmplt_ps_mask(A, B) \
3214 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3215#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3216 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3217
3218#define _mm512_cmple_ps_mask(A, B) \
3219 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3220#define _mm512_mask_cmple_ps_mask(k, A, B) \
3221 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3222
3223#define _mm512_cmpunord_ps_mask(A, B) \
3224 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3225#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3226 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3227
3228#define _mm512_cmpneq_ps_mask(A, B) \
3229 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3230#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3231 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3232
3233#define _mm512_cmpnlt_ps_mask(A, B) \
3234 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3235#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3236 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3237
3238#define _mm512_cmpnle_ps_mask(A, B) \
3239 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3240#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3241 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3242
3243#define _mm512_cmpord_ps_mask(A, B) \
3244 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3245#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3246 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3247
3248#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3249 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3250 (__v8df)(__m512d)(B), (int)(P), \
3251 (__mmask8)-1, (int)(R)))
3252
3253#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3254 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3255 (__v8df)(__m512d)(B), (int)(P), \
3256 (__mmask8)(U), (int)(R)))
3257
3258#define _mm512_cmp_pd_mask(A, B, P) \
3259 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3260#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3261 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3262
3263#define _mm512_cmpeq_pd_mask(A, B) \
3264 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3265#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3266 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3267
3268#define _mm512_cmplt_pd_mask(A, B) \
3269 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3270#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3271 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3272
3273#define _mm512_cmple_pd_mask(A, B) \
3274 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3275#define _mm512_mask_cmple_pd_mask(k, A, B) \
3276 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3277
3278#define _mm512_cmpunord_pd_mask(A, B) \
3279 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3280#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3281 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3282
3283#define _mm512_cmpneq_pd_mask(A, B) \
3284 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3285#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3286 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3287
3288#define _mm512_cmpnlt_pd_mask(A, B) \
3289 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3290#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3291 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3292
3293#define _mm512_cmpnle_pd_mask(A, B) \
3294 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3295#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3296 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3297
3298#define _mm512_cmpord_pd_mask(A, B) \
3299 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3300#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3301 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3302
3303/* Conversion */
3304
3305#define _mm512_cvtt_roundps_epu32(A, R) \
3306 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3307 (__v16si)_mm512_undefined_epi32(), \
3308 (__mmask16)-1, (int)(R)))
3309
3310#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3311 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3312 (__v16si)(__m512i)(W), \
3313 (__mmask16)(U), (int)(R)))
3314
3315#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3316 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3317 (__v16si)_mm512_setzero_si512(), \
3318 (__mmask16)(U), (int)(R)))
3319
3320
3321static __inline __m512i __DEFAULT_FN_ATTRS512
3323{
3324 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3325 (__v16si)
3327 (__mmask16) -1,
3329}
3330
3331static __inline__ __m512i __DEFAULT_FN_ATTRS512
3332_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3333{
3334 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3335 (__v16si) __W,
3336 (__mmask16) __U,
3338}
3339
3340static __inline__ __m512i __DEFAULT_FN_ATTRS512
3342{
3343 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3344 (__v16si) _mm512_setzero_si512 (),
3345 (__mmask16) __U,
3347}
3348
3349#define _mm512_cvt_roundepi32_ps(A, R) \
3350 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3351 (__v16sf)_mm512_setzero_ps(), \
3352 (__mmask16)-1, (int)(R)))
3353
3354#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3355 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3356 (__v16sf)(__m512)(W), \
3357 (__mmask16)(U), (int)(R)))
3358
3359#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3360 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3361 (__v16sf)_mm512_setzero_ps(), \
3362 (__mmask16)(U), (int)(R)))
3363
3364#define _mm512_cvt_roundepu32_ps(A, R) \
3365 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3366 (__v16sf)_mm512_setzero_ps(), \
3367 (__mmask16)-1, (int)(R)))
3368
3369#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3370 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3371 (__v16sf)(__m512)(W), \
3372 (__mmask16)(U), (int)(R)))
3373
3374#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3375 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3376 (__v16sf)_mm512_setzero_ps(), \
3377 (__mmask16)(U), (int)(R)))
3378
3379static __inline__ __m512
3381 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3382}
3383
3384static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3385_mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
3386 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3387 (__v16sf)_mm512_cvtepu32_ps(__A),
3388 (__v16sf)__W);
3389}
3390
3391static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3393 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3394 (__v16sf)_mm512_cvtepu32_ps(__A),
3395 (__v16sf)_mm512_setzero_ps());
3396}
3397
3398static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3400 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3401}
3402
3403static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3404_mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
3405 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3406 (__v8df)_mm512_cvtepi32_pd(__A),
3407 (__v8df)__W);
3408}
3409
3410static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3412 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3413 (__v8df)_mm512_cvtepi32_pd(__A),
3414 (__v8df)_mm512_setzero_pd());
3415}
3416
3417static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3419 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3420}
3421
3422static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3423_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
3424 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3425}
3426
3427static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3429 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3430}
3431
3432static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3433_mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
3434 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3435 (__v16sf)_mm512_cvtepi32_ps(__A),
3436 (__v16sf)__W);
3437}
3438
3439static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3441 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3442 (__v16sf)_mm512_cvtepi32_ps(__A),
3443 (__v16sf)_mm512_setzero_ps());
3444}
3445
3446static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3448 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3449}
3450
3451static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3452_mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
3453 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3454 (__v8df)_mm512_cvtepu32_pd(__A),
3455 (__v8df)__W);
3456}
3457
3458static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3460 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3461 (__v8df)_mm512_cvtepu32_pd(__A),
3462 (__v8df)_mm512_setzero_pd());
3463}
3464
3465static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3467 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3468}
3469
3470static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3471_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
3472 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3473}
3474
3475#define _mm512_cvt_roundpd_ps(A, R) \
3476 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3477 (__v8sf)_mm256_setzero_ps(), \
3478 (__mmask8)-1, (int)(R)))
3479
3480#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3481 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3482 (__v8sf)(__m256)(W), (__mmask8)(U), \
3483 (int)(R)))
3484
3485#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3486 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3487 (__v8sf)_mm256_setzero_ps(), \
3488 (__mmask8)(U), (int)(R)))
3489
3490static __inline__ __m256
3492 return (__m256)__builtin_ia32_cvtpd2ps512_mask(
3493 (__v8df)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)-1,
3495}
3496
3497static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
3498_mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A) {
3499 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3500 (__v8sf) __W,
3501 (__mmask8) __U,
3503}
3504
3505static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
3507 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3508 (__v8sf) _mm256_setzero_ps (),
3509 (__mmask8) __U,
3511}
3512
3513static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3514_mm512_cvtpd_pslo(__m512d __A) {
3515 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3516 (__v8sf) _mm256_setzero_ps (),
3517 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3518}
3519
3520static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3521_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) {
3522 return (__m512) __builtin_shufflevector (
3524 __U, __A),
3525 (__v8sf) _mm256_setzero_ps (),
3526 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3527}
3528
3529#define _mm512_cvt_roundps_ph(A, I) \
3530 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3531 (__v16hi)_mm256_undefined_si256(), \
3532 (__mmask16)-1))
3533
3534#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3535 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3536 (__v16hi)(__m256i)(U), \
3537 (__mmask16)(W)))
3538
3539#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3540 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3541 (__v16hi)_mm256_setzero_si256(), \
3542 (__mmask16)(W)))
3543
3544#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3545#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3546#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3547
3548#define _mm512_cvt_roundph_ps(A, R) \
3549 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3550 (__v16sf)_mm512_undefined_ps(), \
3551 (__mmask16)-1, (int)(R)))
3552
3553#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3554 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3555 (__v16sf)(__m512)(W), \
3556 (__mmask16)(U), (int)(R)))
3557
3558#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3559 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3560 (__v16sf)_mm512_setzero_ps(), \
3561 (__mmask16)(U), (int)(R)))
3562
3563
3564static __inline __m512 __DEFAULT_FN_ATTRS512
3566{
3567 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3568 (__v16sf)
3570 (__mmask16) -1,
3572}
3573
3574static __inline__ __m512 __DEFAULT_FN_ATTRS512
3575_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3576{
3577 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3578 (__v16sf) __W,
3579 (__mmask16) __U,
3581}
3582
3583static __inline__ __m512 __DEFAULT_FN_ATTRS512
3585{
3586 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3587 (__v16sf) _mm512_setzero_ps (),
3588 (__mmask16) __U,
3590}
3591
3592#define _mm512_cvtt_roundpd_epi32(A, R) \
3593 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3594 (__v8si)_mm256_setzero_si256(), \
3595 (__mmask8)-1, (int)(R)))
3596
3597#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3598 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3599 (__v8si)(__m256i)(W), \
3600 (__mmask8)(U), (int)(R)))
3601
3602#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3603 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3604 (__v8si)_mm256_setzero_si256(), \
3605 (__mmask8)(U), (int)(R)))
3606
3607static __inline __m256i __DEFAULT_FN_ATTRS512
3609{
3610 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3611 (__v8si)_mm256_setzero_si256(),
3612 (__mmask8) -1,
3614}
3615
3616static __inline__ __m256i __DEFAULT_FN_ATTRS512
3617_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3618{
3619 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3620 (__v8si) __W,
3621 (__mmask8) __U,
3623}
3624
3625static __inline__ __m256i __DEFAULT_FN_ATTRS512
3627{
3628 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3629 (__v8si) _mm256_setzero_si256 (),
3630 (__mmask8) __U,
3632}
3633
3634#define _mm512_cvtt_roundps_epi32(A, R) \
3635 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3636 (__v16si)_mm512_setzero_si512(), \
3637 (__mmask16)-1, (int)(R)))
3638
3639#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3640 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3641 (__v16si)(__m512i)(W), \
3642 (__mmask16)(U), (int)(R)))
3643
3644#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3645 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3646 (__v16si)_mm512_setzero_si512(), \
3647 (__mmask16)(U), (int)(R)))
3648
3649static __inline __m512i __DEFAULT_FN_ATTRS512
3651{
3652 return (__m512i)
3653 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3654 (__v16si) _mm512_setzero_si512 (),
3656}
3657
3658static __inline__ __m512i __DEFAULT_FN_ATTRS512
3659_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3660{
3661 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3662 (__v16si) __W,
3663 (__mmask16) __U,
3665}
3666
3667static __inline__ __m512i __DEFAULT_FN_ATTRS512
3669{
3670 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3671 (__v16si) _mm512_setzero_si512 (),
3672 (__mmask16) __U,
3674}
3675
3676#define _mm512_cvt_roundps_epi32(A, R) \
3677 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3678 (__v16si)_mm512_setzero_si512(), \
3679 (__mmask16)-1, (int)(R)))
3680
3681#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3682 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3683 (__v16si)(__m512i)(W), \
3684 (__mmask16)(U), (int)(R)))
3685
3686#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3687 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3688 (__v16si)_mm512_setzero_si512(), \
3689 (__mmask16)(U), (int)(R)))
3690
3691static __inline__ __m512i __DEFAULT_FN_ATTRS512
3693{
3694 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3695 (__v16si) _mm512_undefined_epi32 (),
3696 (__mmask16) -1,
3698}
3699
3700static __inline__ __m512i __DEFAULT_FN_ATTRS512
3701_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3702{
3703 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3704 (__v16si) __W,
3705 (__mmask16) __U,
3707}
3708
3709static __inline__ __m512i __DEFAULT_FN_ATTRS512
3711{
3712 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3713 (__v16si)
3715 (__mmask16) __U,
3717}
3718
3719#define _mm512_cvt_roundpd_epi32(A, R) \
3720 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3721 (__v8si)_mm256_setzero_si256(), \
3722 (__mmask8)-1, (int)(R)))
3723
3724#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3725 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3726 (__v8si)(__m256i)(W), \
3727 (__mmask8)(U), (int)(R)))
3728
3729#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3730 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3731 (__v8si)_mm256_setzero_si256(), \
3732 (__mmask8)(U), (int)(R)))
3733
3734static __inline__ __m256i __DEFAULT_FN_ATTRS512
3736{
3737 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3738 (__v8si)
3740 (__mmask8) -1,
3742}
3743
3744static __inline__ __m256i __DEFAULT_FN_ATTRS512
3745_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3746{
3747 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3748 (__v8si) __W,
3749 (__mmask8) __U,
3751}
3752
3753static __inline__ __m256i __DEFAULT_FN_ATTRS512
3755{
3756 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3757 (__v8si)
3759 (__mmask8) __U,
3761}
3762
3763#define _mm512_cvt_roundps_epu32(A, R) \
3764 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3765 (__v16si)_mm512_setzero_si512(), \
3766 (__mmask16)-1, (int)(R)))
3767
3768#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
3769 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3770 (__v16si)(__m512i)(W), \
3771 (__mmask16)(U), (int)(R)))
3772
3773#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
3774 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3775 (__v16si)_mm512_setzero_si512(), \
3776 (__mmask16)(U), (int)(R)))
3777
3778static __inline__ __m512i __DEFAULT_FN_ATTRS512
3780{
3781 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
3782 (__v16si)\
3784 (__mmask16) -1,\
3786}
3787
3788static __inline__ __m512i __DEFAULT_FN_ATTRS512
3789_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3790{
3791 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3792 (__v16si) __W,
3793 (__mmask16) __U,
3795}
3796
3797static __inline__ __m512i __DEFAULT_FN_ATTRS512
3799{
3800 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3801 (__v16si)
3803 (__mmask16) __U ,
3805}
3806
3807#define _mm512_cvt_roundpd_epu32(A, R) \
3808 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3809 (__v8si)_mm256_setzero_si256(), \
3810 (__mmask8)-1, (int)(R)))
3811
3812#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
3813 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3814 (__v8si)(__m256i)(W), \
3815 (__mmask8)(U), (int)(R)))
3816
3817#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
3818 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3819 (__v8si)_mm256_setzero_si256(), \
3820 (__mmask8)(U), (int)(R)))
3821
3822static __inline__ __m256i __DEFAULT_FN_ATTRS512
3824{
3825 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3826 (__v8si)
3828 (__mmask8) -1,
3830}
3831
3832static __inline__ __m256i __DEFAULT_FN_ATTRS512
3833_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
3834{
3835 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3836 (__v8si) __W,
3837 (__mmask8) __U,
3839}
3840
3841static __inline__ __m256i __DEFAULT_FN_ATTRS512
3843{
3844 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3845 (__v8si)
3847 (__mmask8) __U,
3849}
3850
3851static __inline__ double __DEFAULT_FN_ATTRS512
3853{
3854 return __a[0];
3855}
3856
3857static __inline__ float __DEFAULT_FN_ATTRS512
3859{
3860 return __a[0];
3861}
3862
3863/* Unpack and Interleave */
3864
3865static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3866_mm512_unpackhi_pd(__m512d __a, __m512d __b) {
3867 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
3868 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
3869}
3870
3871static __inline__ __m512d __DEFAULT_FN_ATTRS512
3872_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
3873{
3874 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3875 (__v8df)_mm512_unpackhi_pd(__A, __B),
3876 (__v8df)__W);
3877}
3878
3879static __inline__ __m512d __DEFAULT_FN_ATTRS512
3880_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
3881{
3882 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3883 (__v8df)_mm512_unpackhi_pd(__A, __B),
3884 (__v8df)_mm512_setzero_pd());
3885}
3886
3887static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3888_mm512_unpacklo_pd(__m512d __a, __m512d __b) {
3889 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
3890 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
3891}
3892
3893static __inline__ __m512d __DEFAULT_FN_ATTRS512
3894_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
3895{
3896 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3897 (__v8df)_mm512_unpacklo_pd(__A, __B),
3898 (__v8df)__W);
3899}
3900
3901static __inline__ __m512d __DEFAULT_FN_ATTRS512
3902_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
3903{
3904 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3905 (__v8df)_mm512_unpacklo_pd(__A, __B),
3906 (__v8df)_mm512_setzero_pd());
3907}
3908
3909static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3910_mm512_unpackhi_ps(__m512 __a, __m512 __b) {
3911 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
3912 2, 18, 3, 19,
3913 2+4, 18+4, 3+4, 19+4,
3914 2+8, 18+8, 3+8, 19+8,
3915 2+12, 18+12, 3+12, 19+12);
3916}
3917
3918static __inline__ __m512 __DEFAULT_FN_ATTRS512
3919_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
3920{
3921 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3922 (__v16sf)_mm512_unpackhi_ps(__A, __B),
3923 (__v16sf)__W);
3924}
3925
3926static __inline__ __m512 __DEFAULT_FN_ATTRS512
3927_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
3928{
3929 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3930 (__v16sf)_mm512_unpackhi_ps(__A, __B),
3931 (__v16sf)_mm512_setzero_ps());
3932}
3933
3934static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3935_mm512_unpacklo_ps(__m512 __a, __m512 __b) {
3936 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
3937 0, 16, 1, 17,
3938 0+4, 16+4, 1+4, 17+4,
3939 0+8, 16+8, 1+8, 17+8,
3940 0+12, 16+12, 1+12, 17+12);
3941}
3942
3943static __inline__ __m512 __DEFAULT_FN_ATTRS512
3944_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
3945{
3946 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3947 (__v16sf)_mm512_unpacklo_ps(__A, __B),
3948 (__v16sf)__W);
3949}
3950
3951static __inline__ __m512 __DEFAULT_FN_ATTRS512
3952_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
3953{
3954 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3955 (__v16sf)_mm512_unpacklo_ps(__A, __B),
3956 (__v16sf)_mm512_setzero_ps());
3957}
3958
3959static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3960_mm512_unpackhi_epi32(__m512i __A, __m512i __B) {
3961 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3962 2, 18, 3, 19,
3963 2+4, 18+4, 3+4, 19+4,
3964 2+8, 18+8, 3+8, 19+8,
3965 2+12, 18+12, 3+12, 19+12);
3966}
3967
3968static __inline__ __m512i __DEFAULT_FN_ATTRS512
3969_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
3970{
3971 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3972 (__v16si)_mm512_unpackhi_epi32(__A, __B),
3973 (__v16si)__W);
3974}
3975
3976static __inline__ __m512i __DEFAULT_FN_ATTRS512
3977_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
3978{
3979 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3980 (__v16si)_mm512_unpackhi_epi32(__A, __B),
3981 (__v16si)_mm512_setzero_si512());
3982}
3983
3984static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3985_mm512_unpacklo_epi32(__m512i __A, __m512i __B) {
3986 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3987 0, 16, 1, 17,
3988 0+4, 16+4, 1+4, 17+4,
3989 0+8, 16+8, 1+8, 17+8,
3990 0+12, 16+12, 1+12, 17+12);
3991}
3992
3993static __inline__ __m512i __DEFAULT_FN_ATTRS512
3994_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
3995{
3996 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3997 (__v16si)_mm512_unpacklo_epi32(__A, __B),
3998 (__v16si)__W);
3999}
4000
4001static __inline__ __m512i __DEFAULT_FN_ATTRS512
4002_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4003{
4004 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4005 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4006 (__v16si)_mm512_setzero_si512());
4007}
4008
4009static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4010_mm512_unpackhi_epi64(__m512i __A, __m512i __B) {
4011 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4012 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4013}
4014
4015static __inline__ __m512i __DEFAULT_FN_ATTRS512
4016_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4017{
4018 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4019 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4020 (__v8di)__W);
4021}
4022
4023static __inline__ __m512i __DEFAULT_FN_ATTRS512
4024_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4025{
4026 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4027 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4028 (__v8di)_mm512_setzero_si512());
4029}
4030
4031static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4032_mm512_unpacklo_epi64(__m512i __A, __m512i __B) {
4033 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4034 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4035}
4036
4037static __inline__ __m512i __DEFAULT_FN_ATTRS512
4038_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4039{
4040 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4041 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4042 (__v8di)__W);
4043}
4044
4045static __inline__ __m512i __DEFAULT_FN_ATTRS512
4046_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4047{
4048 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4049 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4050 (__v8di)_mm512_setzero_si512());
4051}
4052
4053
4054/* SIMD load ops */
4055
4056static __inline __m512i __DEFAULT_FN_ATTRS512
4058{
4059 struct __loadu_si512 {
4060 __m512i_u __v;
4061 } __attribute__((__packed__, __may_alias__));
4062 return ((const struct __loadu_si512*)__P)->__v;
4063}
4064
4065static __inline __m512i __DEFAULT_FN_ATTRS512
4067{
4068 struct __loadu_epi32 {
4069 __m512i_u __v;
4070 } __attribute__((__packed__, __may_alias__));
4071 return ((const struct __loadu_epi32*)__P)->__v;
4072}
4073
4074static __inline __m512i __DEFAULT_FN_ATTRS512
4075_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4076{
4077 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4078 (__v16si) __W,
4079 (__mmask16) __U);
4080}
4081
4082
4083static __inline __m512i __DEFAULT_FN_ATTRS512
4085{
4086 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4087 (__v16si)
4089 (__mmask16) __U);
4090}
4091
4092static __inline __m512i __DEFAULT_FN_ATTRS512
4094{
4095 struct __loadu_epi64 {
4096 __m512i_u __v;
4097 } __attribute__((__packed__, __may_alias__));
4098 return ((const struct __loadu_epi64*)__P)->__v;
4099}
4100
4101static __inline __m512i __DEFAULT_FN_ATTRS512
4102_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4103{
4104 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4105 (__v8di) __W,
4106 (__mmask8) __U);
4107}
4108
4109static __inline __m512i __DEFAULT_FN_ATTRS512
4111{
4112 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4113 (__v8di)
4115 (__mmask8) __U);
4116}
4117
4118static __inline __m512 __DEFAULT_FN_ATTRS512
4119_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4120{
4121 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4122 (__v16sf) __W,
4123 (__mmask16) __U);
4124}
4125
4126static __inline __m512 __DEFAULT_FN_ATTRS512
4128{
4129 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4130 (__v16sf)
4132 (__mmask16) __U);
4133}
4134
4135static __inline __m512d __DEFAULT_FN_ATTRS512
4136_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4137{
4138 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4139 (__v8df) __W,
4140 (__mmask8) __U);
4141}
4142
4143static __inline __m512d __DEFAULT_FN_ATTRS512
4145{
4146 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4147 (__v8df)
4149 (__mmask8) __U);
4150}
4151
4152static __inline __m512d __DEFAULT_FN_ATTRS512
4154{
4155 struct __loadu_pd {
4156 __m512d_u __v;
4157 } __attribute__((__packed__, __may_alias__));
4158 return ((const struct __loadu_pd*)__p)->__v;
4159}
4160
4161static __inline __m512 __DEFAULT_FN_ATTRS512
4163{
4164 struct __loadu_ps {
4165 __m512_u __v;
4166 } __attribute__((__packed__, __may_alias__));
4167 return ((const struct __loadu_ps*)__p)->__v;
4168}
4169
4170static __inline __m512 __DEFAULT_FN_ATTRS512
4172{
4173 return *(const __m512*)__p;
4174}
4175
4176static __inline __m512 __DEFAULT_FN_ATTRS512
4177_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4178{
4179 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4180 (__v16sf) __W,
4181 (__mmask16) __U);
4182}
4183
4184static __inline __m512 __DEFAULT_FN_ATTRS512
4186{
4187 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4188 (__v16sf)
4190 (__mmask16) __U);
4191}
4192
4193static __inline __m512d __DEFAULT_FN_ATTRS512
4195{
4196 return *(const __m512d*)__p;
4197}
4198
4199static __inline __m512d __DEFAULT_FN_ATTRS512
4200_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4201{
4202 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4203 (__v8df) __W,
4204 (__mmask8) __U);
4205}
4206
4207static __inline __m512d __DEFAULT_FN_ATTRS512
4209{
4210 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4211 (__v8df)
4213 (__mmask8) __U);
4214}
4215
4216static __inline __m512i __DEFAULT_FN_ATTRS512
4218{
4219 return *(const __m512i *) __P;
4220}
4221
4222static __inline __m512i __DEFAULT_FN_ATTRS512
4224{
4225 return *(const __m512i *) __P;
4226}
4227
4228static __inline __m512i __DEFAULT_FN_ATTRS512
4230{
4231 return *(const __m512i *) __P;
4232}
4233
4234/* SIMD store ops */
4235
4236static __inline void __DEFAULT_FN_ATTRS512
4237_mm512_storeu_epi64 (void *__P, __m512i __A)
4238{
4239 struct __storeu_epi64 {
4240 __m512i_u __v;
4241 } __attribute__((__packed__, __may_alias__));
4242 ((struct __storeu_epi64*)__P)->__v = __A;
4243}
4244
4245static __inline void __DEFAULT_FN_ATTRS512
4246_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4247{
4248 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4249 (__mmask8) __U);
4250}
4251
4252static __inline void __DEFAULT_FN_ATTRS512
4253_mm512_storeu_si512 (void *__P, __m512i __A)
4254{
4255 struct __storeu_si512 {
4256 __m512i_u __v;
4257 } __attribute__((__packed__, __may_alias__));
4258 ((struct __storeu_si512*)__P)->__v = __A;
4259}
4260
4261static __inline void __DEFAULT_FN_ATTRS512
4262_mm512_storeu_epi32 (void *__P, __m512i __A)
4263{
4264 struct __storeu_epi32 {
4265 __m512i_u __v;
4266 } __attribute__((__packed__, __may_alias__));
4267 ((struct __storeu_epi32*)__P)->__v = __A;
4268}
4269
4270static __inline void __DEFAULT_FN_ATTRS512
4272{
4273 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4274 (__mmask16) __U);
4275}
4276
4277static __inline void __DEFAULT_FN_ATTRS512
4278_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4279{
4280 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4281}
4282
4283static __inline void __DEFAULT_FN_ATTRS512
4284_mm512_storeu_pd(void *__P, __m512d __A)
4285{
4286 struct __storeu_pd {
4287 __m512d_u __v;
4288 } __attribute__((__packed__, __may_alias__));
4289 ((struct __storeu_pd*)__P)->__v = __A;
4290}
4291
4292static __inline void __DEFAULT_FN_ATTRS512
4293_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4294{
4295 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4296 (__mmask16) __U);
4297}
4298
4299static __inline void __DEFAULT_FN_ATTRS512
4300_mm512_storeu_ps(void *__P, __m512 __A)
4301{
4302 struct __storeu_ps {
4303 __m512_u __v;
4304 } __attribute__((__packed__, __may_alias__));
4305 ((struct __storeu_ps*)__P)->__v = __A;
4306}
4307
4308static __inline void __DEFAULT_FN_ATTRS512
4309_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4310{
4311 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4312}
4313
4314static __inline void __DEFAULT_FN_ATTRS512
4315_mm512_store_pd(void *__P, __m512d __A)
4316{
4317 *(__m512d*)__P = __A;
4318}
4319
4320static __inline void __DEFAULT_FN_ATTRS512
4321_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4322{
4323 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4324 (__mmask16) __U);
4325}
4326
4327static __inline void __DEFAULT_FN_ATTRS512
4328_mm512_store_ps(void *__P, __m512 __A)
4329{
4330 *(__m512*)__P = __A;
4331}
4332
4333static __inline void __DEFAULT_FN_ATTRS512
4334_mm512_store_si512 (void *__P, __m512i __A)
4335{
4336 *(__m512i *) __P = __A;
4337}
4338
4339static __inline void __DEFAULT_FN_ATTRS512
4340_mm512_store_epi32 (void *__P, __m512i __A)
4341{
4342 *(__m512i *) __P = __A;
4343}
4344
4345static __inline void __DEFAULT_FN_ATTRS512
4346_mm512_store_epi64 (void *__P, __m512i __A)
4347{
4348 *(__m512i *) __P = __A;
4349}
4350
4351/* Mask ops */
4352
4355 return __builtin_ia32_knothi(__M);
4356}
4357
4358/* Integer compare */
4359
4360#define _mm512_cmpeq_epi32_mask(A, B) \
4361 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4362#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4363 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4364#define _mm512_cmpge_epi32_mask(A, B) \
4365 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4366#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4367 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4368#define _mm512_cmpgt_epi32_mask(A, B) \
4369 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4370#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4371 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4372#define _mm512_cmple_epi32_mask(A, B) \
4373 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4374#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4375 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4376#define _mm512_cmplt_epi32_mask(A, B) \
4377 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4378#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4379 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4380#define _mm512_cmpneq_epi32_mask(A, B) \
4381 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4382#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4383 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4384
4385#define _mm512_cmpeq_epu32_mask(A, B) \
4386 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4387#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4388 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4389#define _mm512_cmpge_epu32_mask(A, B) \
4390 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4391#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4392 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4393#define _mm512_cmpgt_epu32_mask(A, B) \
4394 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4395#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4396 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4397#define _mm512_cmple_epu32_mask(A, B) \
4398 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4399#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4400 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4401#define _mm512_cmplt_epu32_mask(A, B) \
4402 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4403#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4404 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4405#define _mm512_cmpneq_epu32_mask(A, B) \
4406 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4407#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4408 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4409
4410#define _mm512_cmpeq_epi64_mask(A, B) \
4411 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4412#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4413 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4414#define _mm512_cmpge_epi64_mask(A, B) \
4415 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4416#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4417 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4418#define _mm512_cmpgt_epi64_mask(A, B) \
4419 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4420#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4421 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4422#define _mm512_cmple_epi64_mask(A, B) \
4423 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4424#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4425 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4426#define _mm512_cmplt_epi64_mask(A, B) \
4427 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4428#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4429 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4430#define _mm512_cmpneq_epi64_mask(A, B) \
4431 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4432#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4433 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4434
4435#define _mm512_cmpeq_epu64_mask(A, B) \
4436 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4437#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4438 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4439#define _mm512_cmpge_epu64_mask(A, B) \
4440 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4441#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4442 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4443#define _mm512_cmpgt_epu64_mask(A, B) \
4444 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4445#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4446 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4447#define _mm512_cmple_epu64_mask(A, B) \
4448 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4449#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4450 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4451#define _mm512_cmplt_epu64_mask(A, B) \
4452 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4453#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4454 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4455#define _mm512_cmpneq_epu64_mask(A, B) \
4456 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4457#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4458 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4459
4460static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4462 /* This function always performs a signed extension, but __v16qi is a char
4463 which may be signed or unsigned, so use __v16qs. */
4464 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4465}
4466
4467static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4468_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
4469 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4470 (__v16si)_mm512_cvtepi8_epi32(__A),
4471 (__v16si)__W);
4472}
4473
4474static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4476 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4477 (__v16si)_mm512_cvtepi8_epi32(__A),
4478 (__v16si)_mm512_setzero_si512());
4479}
4480
4481static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4483 /* This function always performs a signed extension, but __v16qi is a char
4484 which may be signed or unsigned, so use __v16qs. */
4485 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4486}
4487
4488static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4489_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4490 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4491 (__v8di)_mm512_cvtepi8_epi64(__A),
4492 (__v8di)__W);
4493}
4494
4495static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4497 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4498 (__v8di)_mm512_cvtepi8_epi64(__A),
4499 (__v8di)_mm512_setzero_si512 ());
4500}
4501
4502static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4504 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4505}
4506
4507static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4508_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
4509 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4510 (__v8di)_mm512_cvtepi32_epi64(__X),
4511 (__v8di)__W);
4512}
4513
4514static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4516 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4517 (__v8di)_mm512_cvtepi32_epi64(__X),
4518 (__v8di)_mm512_setzero_si512());
4519}
4520
4521static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4523 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4524}
4525
4526static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4527_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
4528 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4529 (__v16si)_mm512_cvtepi16_epi32(__A),
4530 (__v16si)__W);
4531}
4532
4533static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4535 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4536 (__v16si)_mm512_cvtepi16_epi32(__A),
4537 (__v16si)_mm512_setzero_si512 ());
4538}
4539
4540static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4542 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4543}
4544
4545static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4546_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4547 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4548 (__v8di)_mm512_cvtepi16_epi64(__A),
4549 (__v8di)__W);
4550}
4551
4552static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4554 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4555 (__v8di)_mm512_cvtepi16_epi64(__A),
4556 (__v8di)_mm512_setzero_si512());
4557}
4558
4559static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4561 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4562}
4563
4564static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4565_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
4566 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4567 (__v16si)_mm512_cvtepu8_epi32(__A),
4568 (__v16si)__W);
4569}
4570
4571static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4573 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4574 (__v16si)_mm512_cvtepu8_epi32(__A),
4575 (__v16si)_mm512_setzero_si512());
4576}
4577
4578static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4580 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4581}
4582
4583static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4584_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4585 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4586 (__v8di)_mm512_cvtepu8_epi64(__A),
4587 (__v8di)__W);
4588}
4589
4590static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4592 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4593 (__v8di)_mm512_cvtepu8_epi64(__A),
4594 (__v8di)_mm512_setzero_si512());
4595}
4596
4597static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4599 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4600}
4601
4602static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4603_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
4604 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4605 (__v8di)_mm512_cvtepu32_epi64(__X),
4606 (__v8di)__W);
4607}
4608
4609static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4611 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4612 (__v8di)_mm512_cvtepu32_epi64(__X),
4613 (__v8di)_mm512_setzero_si512());
4614}
4615
4616static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4618 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4619}
4620
4621static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4622_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
4623 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4624 (__v16si)_mm512_cvtepu16_epi32(__A),
4625 (__v16si)__W);
4626}
4627
4628static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4630 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4631 (__v16si)_mm512_cvtepu16_epi32(__A),
4632 (__v16si)_mm512_setzero_si512());
4633}
4634
4635static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4637 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4638}
4639
4640static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4641_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4642 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4643 (__v8di)_mm512_cvtepu16_epi64(__A),
4644 (__v8di)__W);
4645}
4646
4647static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4649 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4650 (__v8di)_mm512_cvtepu16_epi64(__A),
4651 (__v8di)_mm512_setzero_si512());
4652}
4653
4654static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4655_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4656{
4657 return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B);
4658}
4659
4660static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4661_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4662{
4663 return (__m512i)__builtin_ia32_selectd_512(__U,
4664 (__v16si)_mm512_rorv_epi32(__A, __B),
4665 (__v16si)__W);
4666}
4667
4668static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4669_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4670{
4671 return (__m512i)__builtin_ia32_selectd_512(__U,
4672 (__v16si)_mm512_rorv_epi32(__A, __B),
4673 (__v16si)_mm512_setzero_si512());
4674}
4675
4676static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4677_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4678{
4679 return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B);
4680}
4681
4682static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4683_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4684{
4685 return (__m512i)__builtin_ia32_selectq_512(__U,
4686 (__v8di)_mm512_rorv_epi64(__A, __B),
4687 (__v8di)__W);
4688}
4689
4690static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4691_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4692{
4693 return (__m512i)__builtin_ia32_selectq_512(__U,
4694 (__v8di)_mm512_rorv_epi64(__A, __B),
4695 (__v8di)_mm512_setzero_si512());
4696}
4697
4698
4699
4700#define _mm512_cmp_epi32_mask(a, b, p) \
4701 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4702 (__v16si)(__m512i)(b), (int)(p), \
4703 (__mmask16)-1))
4704
4705#define _mm512_cmp_epu32_mask(a, b, p) \
4706 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4707 (__v16si)(__m512i)(b), (int)(p), \
4708 (__mmask16)-1))
4709
4710#define _mm512_cmp_epi64_mask(a, b, p) \
4711 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4712 (__v8di)(__m512i)(b), (int)(p), \
4713 (__mmask8)-1))
4714
4715#define _mm512_cmp_epu64_mask(a, b, p) \
4716 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4717 (__v8di)(__m512i)(b), (int)(p), \
4718 (__mmask8)-1))
4719
4720#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
4721 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4722 (__v16si)(__m512i)(b), (int)(p), \
4723 (__mmask16)(m)))
4724
4725#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
4726 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4727 (__v16si)(__m512i)(b), (int)(p), \
4728 (__mmask16)(m)))
4729
4730#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
4731 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4732 (__v8di)(__m512i)(b), (int)(p), \
4733 (__mmask8)(m)))
4734
4735#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
4736 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4737 (__v8di)(__m512i)(b), (int)(p), \
4738 (__mmask8)(m)))
4739
4740#define _mm512_rol_epi32(a, b) \
4741 ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
4742
4743#define _mm512_mask_rol_epi32(W, U, a, b) \
4744 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4745 (__v16si)_mm512_rol_epi32((a), (b)), \
4746 (__v16si)(__m512i)(W)))
4747
4748#define _mm512_maskz_rol_epi32(U, a, b) \
4749 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4750 (__v16si)_mm512_rol_epi32((a), (b)), \
4751 (__v16si)_mm512_setzero_si512()))
4752
4753#define _mm512_rol_epi64(a, b) \
4754 ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
4755
4756#define _mm512_mask_rol_epi64(W, U, a, b) \
4757 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4758 (__v8di)_mm512_rol_epi64((a), (b)), \
4759 (__v8di)(__m512i)(W)))
4760
4761#define _mm512_maskz_rol_epi64(U, a, b) \
4762 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4763 (__v8di)_mm512_rol_epi64((a), (b)), \
4764 (__v8di)_mm512_setzero_si512()))
4765
4766static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4767_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4768{
4769 return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B);
4770}
4771
4772static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4773_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4774{
4775 return (__m512i)__builtin_ia32_selectd_512(__U,
4776 (__v16si)_mm512_rolv_epi32(__A, __B),
4777 (__v16si)__W);
4778}
4779
4780static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4781_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4782{
4783 return (__m512i)__builtin_ia32_selectd_512(__U,
4784 (__v16si)_mm512_rolv_epi32(__A, __B),
4785 (__v16si)_mm512_setzero_si512());
4786}
4787
4788static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4789_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4790{
4791 return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B);
4792}
4793
4794static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4795_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4796{
4797 return (__m512i)__builtin_ia32_selectq_512(__U,
4798 (__v8di)_mm512_rolv_epi64(__A, __B),
4799 (__v8di)__W);
4800}
4801
4802static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4803_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4804{
4805 return (__m512i)__builtin_ia32_selectq_512(__U,
4806 (__v8di)_mm512_rolv_epi64(__A, __B),
4807 (__v8di)_mm512_setzero_si512());
4808}
4809
4810#define _mm512_ror_epi32(A, B) \
4811 ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
4812
4813#define _mm512_mask_ror_epi32(W, U, A, B) \
4814 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4815 (__v16si)_mm512_ror_epi32((A), (B)), \
4816 (__v16si)(__m512i)(W)))
4817
4818#define _mm512_maskz_ror_epi32(U, A, B) \
4819 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4820 (__v16si)_mm512_ror_epi32((A), (B)), \
4821 (__v16si)_mm512_setzero_si512()))
4822
4823#define _mm512_ror_epi64(A, B) \
4824 ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
4825
4826#define _mm512_mask_ror_epi64(W, U, A, B) \
4827 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4828 (__v8di)_mm512_ror_epi64((A), (B)), \
4829 (__v8di)(__m512i)(W)))
4830
4831#define _mm512_maskz_ror_epi64(U, A, B) \
4832 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4833 (__v8di)_mm512_ror_epi64((A), (B)), \
4834 (__v8di)_mm512_setzero_si512()))
4835
4836static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4837_mm512_slli_epi32(__m512i __A, unsigned int __B) {
4838 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B);
4839}
4840
4841static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4842_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
4843 unsigned int __B) {
4844 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4845 (__v16si)_mm512_slli_epi32(__A, __B),
4846 (__v16si)__W);
4847}
4848
4849static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4850_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
4851 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4852 (__v16si)_mm512_slli_epi32(__A, __B),
4853 (__v16si)_mm512_setzero_si512());
4854}
4855
4856static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4857_mm512_slli_epi64(__m512i __A, unsigned int __B) {
4858 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B);
4859}
4860
4861static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4862_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
4863 unsigned int __B) {
4864 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4865 (__v8di)_mm512_slli_epi64(__A, __B),
4866 (__v8di)__W);
4867}
4868
4869static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4870_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
4871 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4872 (__v8di)_mm512_slli_epi64(__A, __B),
4873 (__v8di)_mm512_setzero_si512());
4874}
4875
4876static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4877_mm512_srli_epi32(__m512i __A, unsigned int __B) {
4878 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B);
4879}
4880
4881static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4882_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
4883 unsigned int __B) {
4884 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4885 (__v16si)_mm512_srli_epi32(__A, __B),
4886 (__v16si)__W);
4887}
4888
4889static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4890_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
4891 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4892 (__v16si)_mm512_srli_epi32(__A, __B),
4893 (__v16si)_mm512_setzero_si512());
4894}
4895
4896static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4897_mm512_srli_epi64(__m512i __A, unsigned int __B) {
4898 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B);
4899}
4900
4901static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4902_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
4903 unsigned int __B) {
4904 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4905 (__v8di)_mm512_srli_epi64(__A, __B),
4906 (__v8di)__W);
4907}
4908
4909static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4910_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
4911 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4912 (__v8di)_mm512_srli_epi64(__A, __B),
4913 (__v8di)_mm512_setzero_si512());
4914}
4915
4916static __inline__ __m512i __DEFAULT_FN_ATTRS512
4917_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4918{
4919 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4920 (__v16si) __W,
4921 (__mmask16) __U);
4922}
4923
4924static __inline__ __m512i __DEFAULT_FN_ATTRS512
4926{
4927 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4928 (__v16si)
4930 (__mmask16) __U);
4931}
4932
4933static __inline__ void __DEFAULT_FN_ATTRS512
4934_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
4935{
4936 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
4937 (__mmask16) __U);
4938}
4939
4940static __inline__ __m512i __DEFAULT_FN_ATTRS512
4941_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
4942{
4943 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
4944 (__v16si) __A,
4945 (__v16si) __W);
4946}
4947
4948static __inline__ __m512i __DEFAULT_FN_ATTRS512
4950{
4951 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
4952 (__v16si) __A,
4953 (__v16si) _mm512_setzero_si512 ());
4954}
4955
4956static __inline__ __m512i __DEFAULT_FN_ATTRS512
4957_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
4958{
4959 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
4960 (__v8di) __A,
4961 (__v8di) __W);
4962}
4963
4964static __inline__ __m512i __DEFAULT_FN_ATTRS512
4966{
4967 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
4968 (__v8di) __A,
4969 (__v8di) _mm512_setzero_si512 ());
4970}
4971
4972static __inline__ __m512i __DEFAULT_FN_ATTRS512
4973_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4974{
4975 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4976 (__v8di) __W,
4977 (__mmask8) __U);
4978}
4979
4980static __inline__ __m512i __DEFAULT_FN_ATTRS512
4982{
4983 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4984 (__v8di)
4986 (__mmask8) __U);
4987}
4988
4989static __inline__ void __DEFAULT_FN_ATTRS512
4990_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
4991{
4992 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
4993 (__mmask8) __U);
4994}
4995
4996static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
4998{
4999 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5000 0, 0, 2, 2, 4, 4, 6, 6);
5001}
5002
5003static __inline__ __m512d __DEFAULT_FN_ATTRS512
5004_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5005{
5006 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5007 (__v8df)_mm512_movedup_pd(__A),
5008 (__v8df)__W);
5009}
5010
5011static __inline__ __m512d __DEFAULT_FN_ATTRS512
5013{
5014 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5015 (__v8df)_mm512_movedup_pd(__A),
5016 (__v8df)_mm512_setzero_pd());
5017}
5018
5019#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5020 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5021 (__v8df)(__m512d)(B), \
5022 (__v8di)(__m512i)(C), (int)(imm), \
5023 (__mmask8)-1, (int)(R)))
5024
5025#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5026 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5027 (__v8df)(__m512d)(B), \
5028 (__v8di)(__m512i)(C), (int)(imm), \
5029 (__mmask8)(U), (int)(R)))
5030
5031#define _mm512_fixupimm_pd(A, B, C, imm) \
5032 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5033 (__v8df)(__m512d)(B), \
5034 (__v8di)(__m512i)(C), (int)(imm), \
5035 (__mmask8)-1, \
5036 _MM_FROUND_CUR_DIRECTION))
5037
5038#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5039 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5040 (__v8df)(__m512d)(B), \
5041 (__v8di)(__m512i)(C), (int)(imm), \
5042 (__mmask8)(U), \
5043 _MM_FROUND_CUR_DIRECTION))
5044
5045#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5046 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5047 (__v8df)(__m512d)(B), \
5048 (__v8di)(__m512i)(C), \
5049 (int)(imm), (__mmask8)(U), \
5050 (int)(R)))
5051
5052#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5053 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5054 (__v8df)(__m512d)(B), \
5055 (__v8di)(__m512i)(C), \
5056 (int)(imm), (__mmask8)(U), \
5057 _MM_FROUND_CUR_DIRECTION))
5058
5059#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5060 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5061 (__v16sf)(__m512)(B), \
5062 (__v16si)(__m512i)(C), (int)(imm), \
5063 (__mmask16)-1, (int)(R)))
5064
5065#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5066 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5067 (__v16sf)(__m512)(B), \
5068 (__v16si)(__m512i)(C), (int)(imm), \
5069 (__mmask16)(U), (int)(R)))
5070
5071#define _mm512_fixupimm_ps(A, B, C, imm) \
5072 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5073 (__v16sf)(__m512)(B), \
5074 (__v16si)(__m512i)(C), (int)(imm), \
5075 (__mmask16)-1, \
5076 _MM_FROUND_CUR_DIRECTION))
5077
5078#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5079 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5080 (__v16sf)(__m512)(B), \
5081 (__v16si)(__m512i)(C), (int)(imm), \
5082 (__mmask16)(U), \
5083 _MM_FROUND_CUR_DIRECTION))
5084
5085#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5086 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5087 (__v16sf)(__m512)(B), \
5088 (__v16si)(__m512i)(C), \
5089 (int)(imm), (__mmask16)(U), \
5090 (int)(R)))
5091
5092#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5093 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5094 (__v16sf)(__m512)(B), \
5095 (__v16si)(__m512i)(C), \
5096 (int)(imm), (__mmask16)(U), \
5097 _MM_FROUND_CUR_DIRECTION))
5098
5099#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5100 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5101 (__v2df)(__m128d)(B), \
5102 (__v2di)(__m128i)(C), (int)(imm), \
5103 (__mmask8)-1, (int)(R)))
5104
5105#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5106 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5107 (__v2df)(__m128d)(B), \
5108 (__v2di)(__m128i)(C), (int)(imm), \
5109 (__mmask8)(U), (int)(R)))
5110
5111#define _mm_fixupimm_sd(A, B, C, imm) \
5112 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5113 (__v2df)(__m128d)(B), \
5114 (__v2di)(__m128i)(C), (int)(imm), \
5115 (__mmask8)-1, \
5116 _MM_FROUND_CUR_DIRECTION))
5117
5118#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5119 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5120 (__v2df)(__m128d)(B), \
5121 (__v2di)(__m128i)(C), (int)(imm), \
5122 (__mmask8)(U), \
5123 _MM_FROUND_CUR_DIRECTION))
5124
5125#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5126 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5127 (__v2df)(__m128d)(B), \
5128 (__v2di)(__m128i)(C), (int)(imm), \
5129 (__mmask8)(U), (int)(R)))
5130
5131#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5132 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5133 (__v2df)(__m128d)(B), \
5134 (__v2di)(__m128i)(C), (int)(imm), \
5135 (__mmask8)(U), \
5136 _MM_FROUND_CUR_DIRECTION))
5137
5138#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5139 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5140 (__v4sf)(__m128)(B), \
5141 (__v4si)(__m128i)(C), (int)(imm), \
5142 (__mmask8)-1, (int)(R)))
5143
5144#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5145 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5146 (__v4sf)(__m128)(B), \
5147 (__v4si)(__m128i)(C), (int)(imm), \
5148 (__mmask8)(U), (int)(R)))
5149
5150#define _mm_fixupimm_ss(A, B, C, imm) \
5151 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5152 (__v4sf)(__m128)(B), \
5153 (__v4si)(__m128i)(C), (int)(imm), \
5154 (__mmask8)-1, \
5155 _MM_FROUND_CUR_DIRECTION))
5156
5157#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5158 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5159 (__v4sf)(__m128)(B), \
5160 (__v4si)(__m128i)(C), (int)(imm), \
5161 (__mmask8)(U), \
5162 _MM_FROUND_CUR_DIRECTION))
5163
5164#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5165 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5166 (__v4sf)(__m128)(B), \
5167 (__v4si)(__m128i)(C), (int)(imm), \
5168 (__mmask8)(U), (int)(R)))
5169
5170#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5171 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5172 (__v4sf)(__m128)(B), \
5173 (__v4si)(__m128i)(C), (int)(imm), \
5174 (__mmask8)(U), \
5175 _MM_FROUND_CUR_DIRECTION))
5176
5177#define _mm_getexp_round_sd(A, B, R) \
5178 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5179 (__v2df)(__m128d)(B), \
5180 (__v2df)_mm_setzero_pd(), \
5181 (__mmask8)-1, (int)(R)))
5182
5183
5184static __inline__ __m128d __DEFAULT_FN_ATTRS128
5185_mm_getexp_sd (__m128d __A, __m128d __B)
5186{
5187 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5188 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5189}
5190
5191static __inline__ __m128d __DEFAULT_FN_ATTRS128
5192_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5193{
5194 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5195 (__v2df) __B,
5196 (__v2df) __W,
5197 (__mmask8) __U,
5199}
5200
5201#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5202 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5203 (__v2df)(__m128d)(B), \
5204 (__v2df)(__m128d)(W), \
5205 (__mmask8)(U), (int)(R)))
5206
5207static __inline__ __m128d __DEFAULT_FN_ATTRS128
5208_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5209{
5210 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5211 (__v2df) __B,
5212 (__v2df) _mm_setzero_pd (),
5213 (__mmask8) __U,
5215}
5216
5217#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5218 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5219 (__v2df)(__m128d)(B), \
5220 (__v2df)_mm_setzero_pd(), \
5221 (__mmask8)(U), (int)(R)))
5222
5223#define _mm_getexp_round_ss(A, B, R) \
5224 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5225 (__v4sf)(__m128)(B), \
5226 (__v4sf)_mm_setzero_ps(), \
5227 (__mmask8)-1, (int)(R)))
5228
5229static __inline__ __m128 __DEFAULT_FN_ATTRS128
5230_mm_getexp_ss (__m128 __A, __m128 __B)
5231{
5232 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5233 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5234}
5235
5236static __inline__ __m128 __DEFAULT_FN_ATTRS128
5237_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5238{
5239 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5240 (__v4sf) __B,
5241 (__v4sf) __W,
5242 (__mmask8) __U,
5244}
5245
5246#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5247 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5248 (__v4sf)(__m128)(B), \
5249 (__v4sf)(__m128)(W), \
5250 (__mmask8)(U), (int)(R)))
5251
5252static __inline__ __m128 __DEFAULT_FN_ATTRS128
5253_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5254{
5255 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5256 (__v4sf) __B,
5257 (__v4sf) _mm_setzero_ps (),
5258 (__mmask8) __U,
5260}
5261
5262#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5263 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5264 (__v4sf)(__m128)(B), \
5265 (__v4sf)_mm_setzero_ps(), \
5266 (__mmask8)(U), (int)(R)))
5267
5268#define _mm_getmant_round_sd(A, B, C, D, R) \
5269 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5270 (__v2df)(__m128d)(B), \
5271 (int)(((D)<<2) | (C)), \
5272 (__v2df)_mm_setzero_pd(), \
5273 (__mmask8)-1, (int)(R)))
5274
5275#define _mm_getmant_sd(A, B, C, D) \
5276 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5277 (__v2df)(__m128d)(B), \
5278 (int)(((D)<<2) | (C)), \
5279 (__v2df)_mm_setzero_pd(), \
5280 (__mmask8)-1, \
5281 _MM_FROUND_CUR_DIRECTION))
5282
5283#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5284 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5285 (__v2df)(__m128d)(B), \
5286 (int)(((D)<<2) | (C)), \
5287 (__v2df)(__m128d)(W), \
5288 (__mmask8)(U), \
5289 _MM_FROUND_CUR_DIRECTION))
5290
5291#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5292 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5293 (__v2df)(__m128d)(B), \
5294 (int)(((D)<<2) | (C)), \
5295 (__v2df)(__m128d)(W), \
5296 (__mmask8)(U), (int)(R)))
5297
5298#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5299 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5300 (__v2df)(__m128d)(B), \
5301 (int)(((D)<<2) | (C)), \
5302 (__v2df)_mm_setzero_pd(), \
5303 (__mmask8)(U), \
5304 _MM_FROUND_CUR_DIRECTION))
5305
5306#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5307 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5308 (__v2df)(__m128d)(B), \
5309 (int)(((D)<<2) | (C)), \
5310 (__v2df)_mm_setzero_pd(), \
5311 (__mmask8)(U), (int)(R)))
5312
5313#define _mm_getmant_round_ss(A, B, C, D, R) \
5314 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5315 (__v4sf)(__m128)(B), \
5316 (int)(((D)<<2) | (C)), \
5317 (__v4sf)_mm_setzero_ps(), \
5318 (__mmask8)-1, (int)(R)))
5319
5320#define _mm_getmant_ss(A, B, C, D) \
5321 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5322 (__v4sf)(__m128)(B), \
5323 (int)(((D)<<2) | (C)), \
5324 (__v4sf)_mm_setzero_ps(), \
5325 (__mmask8)-1, \
5326 _MM_FROUND_CUR_DIRECTION))
5327
5328#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5329 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5330 (__v4sf)(__m128)(B), \
5331 (int)(((D)<<2) | (C)), \
5332 (__v4sf)(__m128)(W), \
5333 (__mmask8)(U), \
5334 _MM_FROUND_CUR_DIRECTION))
5335
5336#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5337 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5338 (__v4sf)(__m128)(B), \
5339 (int)(((D)<<2) | (C)), \
5340 (__v4sf)(__m128)(W), \
5341 (__mmask8)(U), (int)(R)))
5342
5343#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5344 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5345 (__v4sf)(__m128)(B), \
5346 (int)(((D)<<2) | (C)), \
5347 (__v4sf)_mm_setzero_ps(), \
5348 (__mmask8)(U), \
5349 _MM_FROUND_CUR_DIRECTION))
5350
5351#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5352 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5353 (__v4sf)(__m128)(B), \
5354 (int)(((D)<<2) | (C)), \
5355 (__v4sf)_mm_setzero_ps(), \
5356 (__mmask8)(U), (int)(R)))
5357
5358static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5360{
5361 return __A;
5362}
5363
5364#define _mm_comi_round_sd(A, B, P, R) \
5365 ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5366 (int)(P), (int)(R)))
5367
5368#define _mm_comi_round_ss(A, B, P, R) \
5369 ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5370 (int)(P), (int)(R)))
5371
5372#ifdef __x86_64__
5373#define _mm_cvt_roundsd_si64(A, R) \
5374 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5375#endif
5376
5377static __inline__ __m512i
5379 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5380}
5381
5382static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5383_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
5384 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5385 (__v16si)_mm512_sll_epi32(__A, __B),
5386 (__v16si)__W);
5387}
5388
5389static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5390_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
5391 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5392 (__v16si)_mm512_sll_epi32(__A, __B),
5393 (__v16si)_mm512_setzero_si512());
5394}
5395
5396static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5397_mm512_sll_epi64(__m512i __A, __m128i __B) {
5398 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5399}
5400
5401static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5402_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
5403 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5404 (__v8di)_mm512_sll_epi64(__A, __B),
5405 (__v8di)__W);
5406}
5407
5408static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5409_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
5410 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5411 (__v8di)_mm512_sll_epi64(__A, __B),
5412 (__v8di)_mm512_setzero_si512());
5413}
5414
5415static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5416_mm512_sllv_epi32(__m512i __X, __m512i __Y) {
5417 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5418}
5419
5420static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5421_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5422 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5423 (__v16si)_mm512_sllv_epi32(__X, __Y),
5424 (__v16si)__W);
5425}
5426
5427static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5428_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5429 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5430 (__v16si)_mm512_sllv_epi32(__X, __Y),
5431 (__v16si)_mm512_setzero_si512());
5432}
5433
5434static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5435_mm512_sllv_epi64(__m512i __X, __m512i __Y)
5436{
5437 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5438}
5439
5440static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5441_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5442{
5443 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5444 (__v8di)_mm512_sllv_epi64(__X, __Y),
5445 (__v8di)__W);
5446}
5447
5448static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5449_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5450{
5451 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5452 (__v8di)_mm512_sllv_epi64(__X, __Y),
5453 (__v8di)_mm512_setzero_si512());
5454}
5455
5456static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5457_mm512_sra_epi32(__m512i __A, __m128i __B) {
5458 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5459}
5460
5461static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5462_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
5463 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5464 (__v16si)_mm512_sra_epi32(__A, __B),
5465 (__v16si)__W);
5466}
5467
5468static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5469_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
5470 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5471 (__v16si)_mm512_sra_epi32(__A, __B),
5472 (__v16si)_mm512_setzero_si512());
5473}
5474
5475static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5476_mm512_sra_epi64(__m512i __A, __m128i __B) {
5477 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5478}
5479
5480static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5481_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
5482 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5483 (__v8di)_mm512_sra_epi64(__A, __B),
5484 (__v8di)__W);
5485}
5486
5487static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5488_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
5489 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5490 (__v8di)_mm512_sra_epi64(__A, __B),
5491 (__v8di)_mm512_setzero_si512());
5492}
5493
5494static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5495_mm512_srav_epi32(__m512i __X, __m512i __Y) {
5496 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5497}
5498
5499static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5500_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5501 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5502 (__v16si)_mm512_srav_epi32(__X, __Y),
5503 (__v16si)__W);
5504}
5505
5506static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5507_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5508 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5509 (__v16si)_mm512_srav_epi32(__X, __Y),
5510 (__v16si)_mm512_setzero_si512());
5511}
5512
5513static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5514_mm512_srav_epi64(__m512i __X, __m512i __Y)
5515{
5516 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5517}
5518
5519static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5520_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5521{
5522 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5523 (__v8di)_mm512_srav_epi64(__X, __Y),
5524 (__v8di)__W);
5525}
5526
5527static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5528_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5529{
5530 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5531 (__v8di)_mm512_srav_epi64(__X, __Y),
5532 (__v8di)_mm512_setzero_si512());
5533}
5534
5535static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5536_mm512_srl_epi32(__m512i __A, __m128i __B) {
5537 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5538}
5539
5540static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5541_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
5542 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5543 (__v16si)_mm512_srl_epi32(__A, __B),
5544 (__v16si)__W);
5545}
5546
5547static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5548_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
5549 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5550 (__v16si)_mm512_srl_epi32(__A, __B),
5551 (__v16si)_mm512_setzero_si512());
5552}
5553
5554static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5555_mm512_srl_epi64(__m512i __A, __m128i __B) {
5556 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5557}
5558
5559static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5560_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
5561 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5562 (__v8di)_mm512_srl_epi64(__A, __B),
5563 (__v8di)__W);
5564}
5565
5566static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5567_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
5568 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5569 (__v8di)_mm512_srl_epi64(__A, __B),
5570 (__v8di)_mm512_setzero_si512());
5571}
5572
5573static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5574_mm512_srlv_epi32(__m512i __X, __m512i __Y) {
5575 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5576}
5577
5578static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5579_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5580 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5581 (__v16si)_mm512_srlv_epi32(__X, __Y),
5582 (__v16si)__W);
5583}
5584
5585static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5586_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5587 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5588 (__v16si)_mm512_srlv_epi32(__X, __Y),
5589 (__v16si)_mm512_setzero_si512());
5590}
5591
5592static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5593_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5594{
5595 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5596}
5597
5598static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5599_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5600{
5601 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5602 (__v8di)_mm512_srlv_epi64(__X, __Y),
5603 (__v8di)__W);
5604}
5605
5606static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5607_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5608{
5609 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5610 (__v8di)_mm512_srlv_epi64(__X, __Y),
5611 (__v8di)_mm512_setzero_si512());
5612}
5613
5614/// \enum _MM_TERNLOG_ENUM
5615/// A helper to represent the ternary logic operations among vector \a A,
5616/// \a B and \a C. The representation is passed to \a imm.
5622
5623#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5624 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5625 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5626 (unsigned char)(imm), (__mmask16)-1))
5627
5628#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5629 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5630 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5631 (unsigned char)(imm), (__mmask16)(U)))
5632
5633#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5634 ((__m512i)__builtin_ia32_pternlogd512_maskz( \
5635 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5636 (unsigned char)(imm), (__mmask16)(U)))
5637
5638#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5639 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5640 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5641 (unsigned char)(imm), (__mmask8)-1))
5642
5643#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5644 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5645 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5646 (unsigned char)(imm), (__mmask8)(U)))
5647
5648#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5649 ((__m512i)__builtin_ia32_pternlogq512_maskz( \
5650 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5651 (unsigned char)(imm), (__mmask8)(U)))
5652
5653#ifdef __x86_64__
5654#define _mm_cvt_roundsd_i64(A, R) \
5655 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5656#endif
5657
5658#define _mm_cvt_roundsd_si32(A, R) \
5659 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5660
5661#define _mm_cvt_roundsd_i32(A, R) \
5662 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5663
5664#define _mm_cvt_roundsd_u32(A, R) \
5665 ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5666
5667static __inline__ unsigned __DEFAULT_FN_ATTRS128
5668_mm_cvtsd_u32 (__m128d __A)
5669{
5670 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5672}
5673
5674#ifdef __x86_64__
5675#define _mm_cvt_roundsd_u64(A, R) \
5676 ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5677 (int)(R)))
5678
5679static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5680_mm_cvtsd_u64 (__m128d __A)
5681{
5682 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5683 __A,
5685}
5686#endif
5687
5688#define _mm_cvt_roundss_si32(A, R) \
5689 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5690
5691#define _mm_cvt_roundss_i32(A, R) \
5692 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5693
5694#ifdef __x86_64__
5695#define _mm_cvt_roundss_si64(A, R) \
5696 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5697
5698#define _mm_cvt_roundss_i64(A, R) \
5699 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5700#endif
5701
5702#define _mm_cvt_roundss_u32(A, R) \
5703 ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
5704
5705static __inline__ unsigned __DEFAULT_FN_ATTRS128
5706_mm_cvtss_u32 (__m128 __A)
5707{
5708 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
5710}
5711
5712#ifdef __x86_64__
5713#define _mm_cvt_roundss_u64(A, R) \
5714 ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
5715 (int)(R)))
5716
5717static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5718_mm_cvtss_u64 (__m128 __A)
5719{
5720 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
5721 __A,
5723}
5724#endif
5725
5726#define _mm_cvtt_roundsd_i32(A, R) \
5727 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5728
5729#define _mm_cvtt_roundsd_si32(A, R) \
5730 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5731
5732static __inline__ int __DEFAULT_FN_ATTRS128
5733_mm_cvttsd_i32 (__m128d __A)
5734{
5735 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
5737}
5738
5739#ifdef __x86_64__
5740#define _mm_cvtt_roundsd_si64(A, R) \
5741 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5742
5743#define _mm_cvtt_roundsd_i64(A, R) \
5744 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5745
5746static __inline__ long long __DEFAULT_FN_ATTRS128
5747_mm_cvttsd_i64 (__m128d __A)
5748{
5749 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
5751}
5752#endif
5753
5754#define _mm_cvtt_roundsd_u32(A, R) \
5755 ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5756
5757static __inline__ unsigned __DEFAULT_FN_ATTRS128
5758_mm_cvttsd_u32 (__m128d __A)
5759{
5760 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
5762}
5763
5764#ifdef __x86_64__
5765#define _mm_cvtt_roundsd_u64(A, R) \
5766 ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
5767 (int)(R)))
5768
5769static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5770_mm_cvttsd_u64 (__m128d __A)
5771{
5772 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
5773 __A,
5775}
5776#endif
5777
5778#define _mm_cvtt_roundss_i32(A, R) \
5779 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5780
5781#define _mm_cvtt_roundss_si32(A, R) \
5782 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5783
5784static __inline__ int __DEFAULT_FN_ATTRS128
5785_mm_cvttss_i32 (__m128 __A)
5786{
5787 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
5789}
5790
5791#ifdef __x86_64__
5792#define _mm_cvtt_roundss_i64(A, R) \
5793 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5794
5795#define _mm_cvtt_roundss_si64(A, R) \
5796 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5797
5798static __inline__ long long __DEFAULT_FN_ATTRS128
5799_mm_cvttss_i64 (__m128 __A)
5800{
5801 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
5803}
5804#endif
5805
5806#define _mm_cvtt_roundss_u32(A, R) \
5807 ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
5808
5809static __inline__ unsigned __DEFAULT_FN_ATTRS128
5810_mm_cvttss_u32 (__m128 __A)
5811{
5812 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
5814}
5815
5816#ifdef __x86_64__
5817#define _mm_cvtt_roundss_u64(A, R) \
5818 ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
5819 (int)(R)))
5820
5821static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5822_mm_cvttss_u64 (__m128 __A)
5823{
5824 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
5825 __A,
5827}
5828#endif
5829
5830#define _mm512_permute_pd(X, C) \
5831 ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
5832
5833#define _mm512_mask_permute_pd(W, U, X, C) \
5834 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5835 (__v8df)_mm512_permute_pd((X), (C)), \
5836 (__v8df)(__m512d)(W)))
5837
5838#define _mm512_maskz_permute_pd(U, X, C) \
5839 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5840 (__v8df)_mm512_permute_pd((X), (C)), \
5841 (__v8df)_mm512_setzero_pd()))
5842
5843#define _mm512_permute_ps(X, C) \
5844 ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
5845
5846#define _mm512_mask_permute_ps(W, U, X, C) \
5847 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5848 (__v16sf)_mm512_permute_ps((X), (C)), \
5849 (__v16sf)(__m512)(W)))
5850
5851#define _mm512_maskz_permute_ps(U, X, C) \
5852 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5853 (__v16sf)_mm512_permute_ps((X), (C)), \
5854 (__v16sf)_mm512_setzero_ps()))
5855
5856static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5857_mm512_permutevar_pd(__m512d __A, __m512i __C) {
5858 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
5859}
5860
5861static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5862_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) {
5863 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5864 (__v8df)_mm512_permutevar_pd(__A, __C),
5865 (__v8df)__W);
5866}
5867
5868static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5869_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
5870 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5871 (__v8df)_mm512_permutevar_pd(__A, __C),
5872 (__v8df)_mm512_setzero_pd());
5873}
5874
5875static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5876_mm512_permutevar_ps(__m512 __A, __m512i __C) {
5877 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
5878}
5879
5880static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5881_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) {
5882 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
5883 (__v16sf)_mm512_permutevar_ps(__A, __C),
5884 (__v16sf)__W);
5885}
5886
5887static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5888_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
5889 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
5890 (__v16sf)_mm512_permutevar_ps(__A, __C),
5891 (__v16sf)_mm512_setzero_ps());
5892}
5893
5894static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5895_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) {
5896 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
5897 (__v8df)__B);
5898}
5899
5900static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5901_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I,
5902 __m512d __B) {
5903 return (__m512d)__builtin_ia32_selectpd_512(__U,
5904 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5905 (__v8df)__A);
5906}
5907
5908static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5909_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
5910 __m512d __B) {
5911 return (__m512d)__builtin_ia32_selectpd_512(__U,
5912 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5913 (__v8df)(__m512d)__I);
5914}
5915
5916static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5917_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
5918 __m512d __B) {
5919 return (__m512d)__builtin_ia32_selectpd_512(__U,
5920 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5921 (__v8df)_mm512_setzero_pd());
5922}
5923
5924static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5925_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) {
5926 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
5927 (__v16sf) __B);
5928}
5929
5930static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5931_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I,
5932 __m512 __B) {
5933 return (__m512)__builtin_ia32_selectps_512(__U,
5934 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5935 (__v16sf)__A);
5936}
5937
5938static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5939_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U,
5940 __m512 __B) {
5941 return (__m512)__builtin_ia32_selectps_512(__U,
5942 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5943 (__v16sf)(__m512)__I);
5944}
5945
5946static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5947_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I,
5948 __m512 __B) {
5949 return (__m512)__builtin_ia32_selectps_512(__U,
5950 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5951 (__v16sf)_mm512_setzero_ps());
5952}
5953
5954#define _mm512_cvtt_roundpd_epu32(A, R) \
5955 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5956 (__v8si)_mm256_undefined_si256(), \
5957 (__mmask8)-1, (int)(R)))
5958
5959#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
5960 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5961 (__v8si)(__m256i)(W), \
5962 (__mmask8)(U), (int)(R)))
5963
5964#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
5965 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5966 (__v8si)_mm256_setzero_si256(), \
5967 (__mmask8)(U), (int)(R)))
5968
5969static __inline__ __m256i __DEFAULT_FN_ATTRS512
5971{
5972 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5973 (__v8si)
5975 (__mmask8) -1,
5977}
5978
5979static __inline__ __m256i __DEFAULT_FN_ATTRS512
5980_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
5981{
5982 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5983 (__v8si) __W,
5984 (__mmask8) __U,
5986}
5987
5988static __inline__ __m256i __DEFAULT_FN_ATTRS512
5990{
5991 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5992 (__v8si)
5994 (__mmask8) __U,
5996}
5997
5998#define _mm_roundscale_round_sd(A, B, imm, R) \
5999 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6000 (__v2df)(__m128d)(B), \
6001 (__v2df)_mm_setzero_pd(), \
6002 (__mmask8)-1, (int)(imm), \
6003 (int)(R)))
6004
6005#define _mm_roundscale_sd(A, B, imm) \
6006 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6007 (__v2df)(__m128d)(B), \
6008 (__v2df)_mm_setzero_pd(), \
6009 (__mmask8)-1, (int)(imm), \
6010 _MM_FROUND_CUR_DIRECTION))
6011
6012#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6013 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6014 (__v2df)(__m128d)(B), \
6015 (__v2df)(__m128d)(W), \
6016 (__mmask8)(U), (int)(imm), \
6017 _MM_FROUND_CUR_DIRECTION))
6018
6019#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6020 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6021 (__v2df)(__m128d)(B), \
6022 (__v2df)(__m128d)(W), \
6023 (__mmask8)(U), (int)(I), \
6024 (int)(R)))
6025
6026#define _mm_maskz_roundscale_sd(U, A, B, I) \
6027 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6028 (__v2df)(__m128d)(B), \
6029 (__v2df)_mm_setzero_pd(), \
6030 (__mmask8)(U), (int)(I), \
6031 _MM_FROUND_CUR_DIRECTION))
6032
6033#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6034 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6035 (__v2df)(__m128d)(B), \
6036 (__v2df)_mm_setzero_pd(), \
6037 (__mmask8)(U), (int)(I), \
6038 (int)(R)))
6039
6040#define _mm_roundscale_round_ss(A, B, imm, R) \
6041 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6042 (__v4sf)(__m128)(B), \
6043 (__v4sf)_mm_setzero_ps(), \
6044 (__mmask8)-1, (int)(imm), \
6045 (int)(R)))
6046
6047#define _mm_roundscale_ss(A, B, imm) \
6048 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6049 (__v4sf)(__m128)(B), \
6050 (__v4sf)_mm_setzero_ps(), \
6051 (__mmask8)-1, (int)(imm), \
6052 _MM_FROUND_CUR_DIRECTION))
6053
6054#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6055 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6056 (__v4sf)(__m128)(B), \
6057 (__v4sf)(__m128)(W), \
6058 (__mmask8)(U), (int)(I), \
6059 _MM_FROUND_CUR_DIRECTION))
6060
6061#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6062 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6063 (__v4sf)(__m128)(B), \
6064 (__v4sf)(__m128)(W), \
6065 (__mmask8)(U), (int)(I), \
6066 (int)(R)))
6067
6068#define _mm_maskz_roundscale_ss(U, A, B, I) \
6069 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6070 (__v4sf)(__m128)(B), \
6071 (__v4sf)_mm_setzero_ps(), \
6072 (__mmask8)(U), (int)(I), \
6073 _MM_FROUND_CUR_DIRECTION))
6074
6075#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6076 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6077 (__v4sf)(__m128)(B), \
6078 (__v4sf)_mm_setzero_ps(), \
6079 (__mmask8)(U), (int)(I), \
6080 (int)(R)))
6081
6082#define _mm512_scalef_round_pd(A, B, R) \
6083 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6084 (__v8df)(__m512d)(B), \
6085 (__v8df)_mm512_undefined_pd(), \
6086 (__mmask8)-1, (int)(R)))
6087
6088#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6089 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6090 (__v8df)(__m512d)(B), \
6091 (__v8df)(__m512d)(W), \
6092 (__mmask8)(U), (int)(R)))
6093
6094#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6095 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6096 (__v8df)(__m512d)(B), \
6097 (__v8df)_mm512_setzero_pd(), \
6098 (__mmask8)(U), (int)(R)))
6099
6100static __inline__ __m512d __DEFAULT_FN_ATTRS512
6101_mm512_scalef_pd (__m512d __A, __m512d __B)
6102{
6103 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6104 (__v8df) __B,
6105 (__v8df)
6107 (__mmask8) -1,
6109}
6110
6111static __inline__ __m512d __DEFAULT_FN_ATTRS512
6112_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6113{
6114 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6115 (__v8df) __B,
6116 (__v8df) __W,
6117 (__mmask8) __U,
6119}
6120
6121static __inline__ __m512d __DEFAULT_FN_ATTRS512
6122_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6123{
6124 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6125 (__v8df) __B,
6126 (__v8df)
6128 (__mmask8) __U,
6130}
6131
6132#define _mm512_scalef_round_ps(A, B, R) \
6133 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6134 (__v16sf)(__m512)(B), \
6135 (__v16sf)_mm512_undefined_ps(), \
6136 (__mmask16)-1, (int)(R)))
6137
6138#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6139 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6140 (__v16sf)(__m512)(B), \
6141 (__v16sf)(__m512)(W), \
6142 (__mmask16)(U), (int)(R)))
6143
6144#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6145 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6146 (__v16sf)(__m512)(B), \
6147 (__v16sf)_mm512_setzero_ps(), \
6148 (__mmask16)(U), (int)(R)))
6149
6150static __inline__ __m512 __DEFAULT_FN_ATTRS512
6151_mm512_scalef_ps (__m512 __A, __m512 __B)
6152{
6153 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6154 (__v16sf) __B,
6155 (__v16sf)
6157 (__mmask16) -1,
6159}
6160
6161static __inline__ __m512 __DEFAULT_FN_ATTRS512
6162_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6163{
6164 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6165 (__v16sf) __B,
6166 (__v16sf) __W,
6167 (__mmask16) __U,
6169}
6170
6171static __inline__ __m512 __DEFAULT_FN_ATTRS512
6172_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6173{
6174 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6175 (__v16sf) __B,
6176 (__v16sf)
6178 (__mmask16) __U,
6180}
6181
6182#define _mm_scalef_round_sd(A, B, R) \
6183 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6184 (__v2df)(__m128d)(B), \
6185 (__v2df)_mm_setzero_pd(), \
6186 (__mmask8)-1, (int)(R)))
6187
6188static __inline__ __m128d __DEFAULT_FN_ATTRS128
6189_mm_scalef_sd (__m128d __A, __m128d __B)
6190{
6191 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6192 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6193 (__mmask8) -1,
6195}
6196
6197static __inline__ __m128d __DEFAULT_FN_ATTRS128
6198_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6199{
6200 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6201 (__v2df) __B,
6202 (__v2df) __W,
6203 (__mmask8) __U,
6205}
6206
6207#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6208 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6209 (__v2df)(__m128d)(B), \
6210 (__v2df)(__m128d)(W), \
6211 (__mmask8)(U), (int)(R)))
6212
6213static __inline__ __m128d __DEFAULT_FN_ATTRS128
6214_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6215{
6216 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6217 (__v2df) __B,
6218 (__v2df) _mm_setzero_pd (),
6219 (__mmask8) __U,
6221}
6222
6223#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6224 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6225 (__v2df)(__m128d)(B), \
6226 (__v2df)_mm_setzero_pd(), \
6227 (__mmask8)(U), (int)(R)))
6228
6229#define _mm_scalef_round_ss(A, B, R) \
6230 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6231 (__v4sf)(__m128)(B), \
6232 (__v4sf)_mm_setzero_ps(), \
6233 (__mmask8)-1, (int)(R)))
6234
6235static __inline__ __m128 __DEFAULT_FN_ATTRS128
6236_mm_scalef_ss (__m128 __A, __m128 __B)
6237{
6238 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6239 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6240 (__mmask8) -1,
6242}
6243
6244static __inline__ __m128 __DEFAULT_FN_ATTRS128
6245_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6246{
6247 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6248 (__v4sf) __B,
6249 (__v4sf) __W,
6250 (__mmask8) __U,
6252}
6253
6254#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6255 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6256 (__v4sf)(__m128)(B), \
6257 (__v4sf)(__m128)(W), \
6258 (__mmask8)(U), (int)(R)))
6259
6260static __inline__ __m128 __DEFAULT_FN_ATTRS128
6261_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6262{
6263 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6264 (__v4sf) __B,
6265 (__v4sf) _mm_setzero_ps (),
6266 (__mmask8) __U,
6268}
6269
6270#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6271 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6272 (__v4sf)(__m128)(B), \
6273 (__v4sf)_mm_setzero_ps(), \
6274 (__mmask8)(U), \
6275 (int)(R)))
6276
6277static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6278_mm512_srai_epi32(__m512i __A, unsigned int __B) {
6279 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B);
6280}
6281
6282static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6283_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
6284 unsigned int __B) {
6285 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6286 (__v16si)_mm512_srai_epi32(__A, __B),
6287 (__v16si)__W);
6288}
6289
6290static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6291_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
6292 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6293 (__v16si)_mm512_srai_epi32(__A, __B),
6294 (__v16si)_mm512_setzero_si512());
6295}
6296
6297static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6298_mm512_srai_epi64(__m512i __A, unsigned int __B) {
6299 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B);
6300}
6301
6302static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6303_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A,
6304 unsigned int __B) {
6305 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6306 (__v8di)_mm512_srai_epi64(__A, __B),
6307 (__v8di)__W);
6308}
6309
6310static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6311_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
6312 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6313 (__v8di)_mm512_srai_epi64(__A, __B),
6314 (__v8di)_mm512_setzero_si512());
6315}
6316
6317#define _mm512_shuffle_f32x4(A, B, imm) \
6318 ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6319 (__v16sf)(__m512)(B), (int)(imm)))
6320
6321#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6322 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6323 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6324 (__v16sf)(__m512)(W)))
6325
6326#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6327 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6328 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6329 (__v16sf)_mm512_setzero_ps()))
6330
6331#define _mm512_shuffle_f64x2(A, B, imm) \
6332 ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6333 (__v8df)(__m512d)(B), (int)(imm)))
6334
6335#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6336 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6337 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6338 (__v8df)(__m512d)(W)))
6339
6340#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6341 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6342 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6343 (__v8df)_mm512_setzero_pd()))
6344
6345#define _mm512_shuffle_i32x4(A, B, imm) \
6346 ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6347 (__v16si)(__m512i)(B), (int)(imm)))
6348
6349#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6350 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6351 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6352 (__v16si)(__m512i)(W)))
6353
6354#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6355 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6356 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6357 (__v16si)_mm512_setzero_si512()))
6358
6359#define _mm512_shuffle_i64x2(A, B, imm) \
6360 ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6361 (__v8di)(__m512i)(B), (int)(imm)))
6362
6363#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6364 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6365 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6366 (__v8di)(__m512i)(W)))
6367
6368#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6369 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6370 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6371 (__v8di)_mm512_setzero_si512()))
6372
6373#define _mm512_shuffle_pd(A, B, M) \
6374 ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6375 (__v8df)(__m512d)(B), (int)(M)))
6376
6377#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6378 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6379 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6380 (__v8df)(__m512d)(W)))
6381
6382#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6383 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6384 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6385 (__v8df)_mm512_setzero_pd()))
6386
6387#define _mm512_shuffle_ps(A, B, M) \
6388 ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6389 (__v16sf)(__m512)(B), (int)(M)))
6390
6391#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6392 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6393 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6394 (__v16sf)(__m512)(W)))
6395
6396#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6397 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6398 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6399 (__v16sf)_mm512_setzero_ps()))
6400
6401#define _mm_sqrt_round_sd(A, B, R) \
6402 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6403 (__v2df)(__m128d)(B), \
6404 (__v2df)_mm_setzero_pd(), \
6405 (__mmask8)-1, (int)(R)))
6406
6407static __inline__ __m128d __DEFAULT_FN_ATTRS128
6408_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6409{
6410 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6411 (__v2df) __B,
6412 (__v2df) __W,
6413 (__mmask8) __U,
6415}
6416
6417#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6418 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6419 (__v2df)(__m128d)(B), \
6420 (__v2df)(__m128d)(W), \
6421 (__mmask8)(U), (int)(R)))
6422
6423static __inline__ __m128d __DEFAULT_FN_ATTRS128
6424_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6425{
6426 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6427 (__v2df) __B,
6428 (__v2df) _mm_setzero_pd (),
6429 (__mmask8) __U,
6431}
6432
6433#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6434 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6435 (__v2df)(__m128d)(B), \
6436 (__v2df)_mm_setzero_pd(), \
6437 (__mmask8)(U), (int)(R)))
6438
6439#define _mm_sqrt_round_ss(A, B, R) \
6440 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6441 (__v4sf)(__m128)(B), \
6442 (__v4sf)_mm_setzero_ps(), \
6443 (__mmask8)-1, (int)(R)))
6444
6445static __inline__ __m128 __DEFAULT_FN_ATTRS128
6446_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6447{
6448 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6449 (__v4sf) __B,
6450 (__v4sf) __W,
6451 (__mmask8) __U,
6453}
6454
6455#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6456 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6457 (__v4sf)(__m128)(B), \
6458 (__v4sf)(__m128)(W), (__mmask8)(U), \
6459 (int)(R)))
6460
6461static __inline__ __m128 __DEFAULT_FN_ATTRS128
6462_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6463{
6464 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6465 (__v4sf) __B,
6466 (__v4sf) _mm_setzero_ps (),
6467 (__mmask8) __U,
6469}
6470
6471#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6472 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6473 (__v4sf)(__m128)(B), \
6474 (__v4sf)_mm_setzero_ps(), \
6475 (__mmask8)(U), (int)(R)))
6476
6477static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6479 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6480 0, 1, 2, 3, 0, 1, 2, 3,
6481 0, 1, 2, 3, 0, 1, 2, 3);
6482}
6483
6484static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6485_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) {
6486 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6487 (__v16sf)_mm512_broadcast_f32x4(__A),
6488 (__v16sf)__O);
6489}
6490
6491static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6493 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6494 (__v16sf)_mm512_broadcast_f32x4(__A),
6495 (__v16sf)_mm512_setzero_ps());
6496}
6497
6498static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6500 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6501 0, 1, 2, 3, 0, 1, 2, 3);
6502}
6503
6504static __inline__ __m512d __DEFAULT_FN_ATTRS512
6505_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
6506{
6507 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6508 (__v8df)_mm512_broadcast_f64x4(__A),
6509 (__v8df)__O);
6510}
6511
6512static __inline__ __m512d __DEFAULT_FN_ATTRS512
6514{
6515 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6516 (__v8df)_mm512_broadcast_f64x4(__A),
6517 (__v8df)_mm512_setzero_pd());
6518}
6519
6520static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6522 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6523 0, 1, 2, 3, 0, 1, 2, 3,
6524 0, 1, 2, 3, 0, 1, 2, 3);
6525}
6526
6527static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6528_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) {
6529 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6530 (__v16si)_mm512_broadcast_i32x4(__A),
6531 (__v16si)__O);
6532}
6533
6534static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6536 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6537 (__v16si)_mm512_broadcast_i32x4(__A),
6538 (__v16si)_mm512_setzero_si512());
6539}
6540
6541static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6543 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6544 0, 1, 2, 3, 0, 1, 2, 3);
6545}
6546
6547static __inline__ __m512i __DEFAULT_FN_ATTRS512
6548_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
6549{
6550 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6551 (__v8di)_mm512_broadcast_i64x4(__A),
6552 (__v8di)__O);
6553}
6554
6555static __inline__ __m512i __DEFAULT_FN_ATTRS512
6557{
6558 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6559 (__v8di)_mm512_broadcast_i64x4(__A),
6560 (__v8di)_mm512_setzero_si512());
6561}
6562
6563static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6564_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) {
6565 return (__m512d)__builtin_ia32_selectpd_512(__M,
6566 (__v8df) _mm512_broadcastsd_pd(__A),
6567 (__v8df) __O);
6568}
6569
6570static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6572 return (__m512d)__builtin_ia32_selectpd_512(__M,
6573 (__v8df) _mm512_broadcastsd_pd(__A),
6574 (__v8df) _mm512_setzero_pd());
6575}
6576
6577static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6578_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) {
6579 return (__m512)__builtin_ia32_selectps_512(__M,
6580 (__v16sf) _mm512_broadcastss_ps(__A),
6581 (__v16sf) __O);
6582}
6583
6584static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6586 return (__m512)__builtin_ia32_selectps_512(__M,
6587 (__v16sf) _mm512_broadcastss_ps(__A),
6588 (__v16sf) _mm512_setzero_ps());
6589}
6590
6591static __inline__ __m128i __DEFAULT_FN_ATTRS512
6593{
6594 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6595 (__v16qi) _mm_undefined_si128 (),
6596 (__mmask16) -1);
6597}
6598
6599static __inline__ __m128i __DEFAULT_FN_ATTRS512
6600_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6601{
6602 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6603 (__v16qi) __O, __M);
6604}
6605
6606static __inline__ __m128i __DEFAULT_FN_ATTRS512
6608{
6609 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6610 (__v16qi) _mm_setzero_si128 (),
6611 __M);
6612}
6613
6614static __inline__ void __DEFAULT_FN_ATTRS512
6616{
6617 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6618}
6619
6620static __inline__ __m256i __DEFAULT_FN_ATTRS512
6622{
6623 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6624 (__v16hi) _mm256_undefined_si256 (),
6625 (__mmask16) -1);
6626}
6627
6628static __inline__ __m256i __DEFAULT_FN_ATTRS512
6629_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6630{
6631 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6632 (__v16hi) __O, __M);
6633}
6634
6635static __inline__ __m256i __DEFAULT_FN_ATTRS512
6637{
6638 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6639 (__v16hi) _mm256_setzero_si256 (),
6640 __M);
6641}
6642
6643static __inline__ void __DEFAULT_FN_ATTRS512
6645{
6646 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6647}
6648
6649static __inline__ __m128i __DEFAULT_FN_ATTRS512
6651{
6652 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6653 (__v16qi) _mm_undefined_si128 (),
6654 (__mmask8) -1);
6655}
6656
6657static __inline__ __m128i __DEFAULT_FN_ATTRS512
6658_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6659{
6660 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6661 (__v16qi) __O, __M);
6662}
6663
6664static __inline__ __m128i __DEFAULT_FN_ATTRS512
6666{
6667 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6668 (__v16qi) _mm_setzero_si128 (),
6669 __M);
6670}
6671
6672static __inline__ void __DEFAULT_FN_ATTRS512
6674{
6675 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6676}
6677
6678static __inline__ __m256i __DEFAULT_FN_ATTRS512
6680{
6681 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6682 (__v8si) _mm256_undefined_si256 (),
6683 (__mmask8) -1);
6684}
6685
6686static __inline__ __m256i __DEFAULT_FN_ATTRS512
6687_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6688{
6689 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6690 (__v8si) __O, __M);
6691}
6692
6693static __inline__ __m256i __DEFAULT_FN_ATTRS512
6695{
6696 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6697 (__v8si) _mm256_setzero_si256 (),
6698 __M);
6699}
6700
6701static __inline__ void __DEFAULT_FN_ATTRS512
6703{
6704 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
6705}
6706
6707static __inline__ __m128i __DEFAULT_FN_ATTRS512
6709{
6710 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6711 (__v8hi) _mm_undefined_si128 (),
6712 (__mmask8) -1);
6713}
6714
6715static __inline__ __m128i __DEFAULT_FN_ATTRS512
6716_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6717{
6718 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6719 (__v8hi) __O, __M);
6720}
6721
6722static __inline__ __m128i __DEFAULT_FN_ATTRS512
6724{
6725 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6726 (__v8hi) _mm_setzero_si128 (),
6727 __M);
6728}
6729
6730static __inline__ void __DEFAULT_FN_ATTRS512
6732{
6733 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
6734}
6735
6736static __inline__ __m128i __DEFAULT_FN_ATTRS512
6738{
6739 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6740 (__v16qi) _mm_undefined_si128 (),
6741 (__mmask16) -1);
6742}
6743
6744static __inline__ __m128i __DEFAULT_FN_ATTRS512
6745_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6746{
6747 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6748 (__v16qi) __O,
6749 __M);
6750}
6751
6752static __inline__ __m128i __DEFAULT_FN_ATTRS512
6754{
6755 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6756 (__v16qi) _mm_setzero_si128 (),
6757 __M);
6758}
6759
6760static __inline__ void __DEFAULT_FN_ATTRS512
6762{
6763 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6764}
6765
6766static __inline__ __m256i __DEFAULT_FN_ATTRS512
6768{
6769 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6770 (__v16hi) _mm256_undefined_si256 (),
6771 (__mmask16) -1);
6772}
6773
6774static __inline__ __m256i __DEFAULT_FN_ATTRS512
6775_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6776{
6777 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6778 (__v16hi) __O,
6779 __M);
6780}
6781
6782static __inline__ __m256i __DEFAULT_FN_ATTRS512
6784{
6785 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6786 (__v16hi) _mm256_setzero_si256 (),
6787 __M);
6788}
6789
6790static __inline__ void __DEFAULT_FN_ATTRS512
6792{
6793 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6794}
6795
6796static __inline__ __m128i __DEFAULT_FN_ATTRS512
6798{
6799 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6800 (__v16qi) _mm_undefined_si128 (),
6801 (__mmask8) -1);
6802}
6803
6804static __inline__ __m128i __DEFAULT_FN_ATTRS512
6805_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6806{
6807 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6808 (__v16qi) __O,
6809 __M);
6810}
6811
6812static __inline__ __m128i __DEFAULT_FN_ATTRS512
6814{
6815 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6816 (__v16qi) _mm_setzero_si128 (),
6817 __M);
6818}
6819
6820static __inline__ void __DEFAULT_FN_ATTRS512
6822{
6823 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6824}
6825
6826static __inline__ __m256i __DEFAULT_FN_ATTRS512
6828{
6829 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6830 (__v8si) _mm256_undefined_si256 (),
6831 (__mmask8) -1);
6832}
6833
6834static __inline__ __m256i __DEFAULT_FN_ATTRS512
6835_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6836{
6837 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6838 (__v8si) __O, __M);
6839}
6840
6841static __inline__ __m256i __DEFAULT_FN_ATTRS512
6843{
6844 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6845 (__v8si) _mm256_setzero_si256 (),
6846 __M);
6847}
6848
6849static __inline__ void __DEFAULT_FN_ATTRS512
6851{
6852 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
6853}
6854
6855static __inline__ __m128i __DEFAULT_FN_ATTRS512
6857{
6858 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6859 (__v8hi) _mm_undefined_si128 (),
6860 (__mmask8) -1);
6861}
6862
6863static __inline__ __m128i __DEFAULT_FN_ATTRS512
6864_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6865{
6866 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6867 (__v8hi) __O, __M);
6868}
6869
6870static __inline__ __m128i __DEFAULT_FN_ATTRS512
6872{
6873 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6874 (__v8hi) _mm_setzero_si128 (),
6875 __M);
6876}
6877
6878static __inline__ void __DEFAULT_FN_ATTRS512
6880{
6881 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
6882}
6883
6884static __inline__ __m128i __DEFAULT_FN_ATTRS512
6886{
6887 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6888 (__v16qi) _mm_undefined_si128 (),
6889 (__mmask16) -1);
6890}
6891
6892static __inline__ __m128i __DEFAULT_FN_ATTRS512
6893_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6894{
6895 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6896 (__v16qi) __O, __M);
6897}
6898
6899static __inline__ __m128i __DEFAULT_FN_ATTRS512
6901{
6902 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6903 (__v16qi) _mm_setzero_si128 (),
6904 __M);
6905}
6906
6907static __inline__ void __DEFAULT_FN_ATTRS512
6909{
6910 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6911}
6912
6913static __inline__ __m256i __DEFAULT_FN_ATTRS512
6915{
6916 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6917 (__v16hi) _mm256_undefined_si256 (),
6918 (__mmask16) -1);
6919}
6920
6921static __inline__ __m256i __DEFAULT_FN_ATTRS512
6922_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6923{
6924 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6925 (__v16hi) __O, __M);
6926}
6927
6928static __inline__ __m256i __DEFAULT_FN_ATTRS512
6930{
6931 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6932 (__v16hi) _mm256_setzero_si256 (),
6933 __M);
6934}
6935
6936static __inline__ void __DEFAULT_FN_ATTRS512
6938{
6939 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
6940}
6941
6942static __inline__ __m128i __DEFAULT_FN_ATTRS512
6944{
6945 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6946 (__v16qi) _mm_undefined_si128 (),
6947 (__mmask8) -1);
6948}
6949
6950static __inline__ __m128i __DEFAULT_FN_ATTRS512
6951_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6952{
6953 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6954 (__v16qi) __O, __M);
6955}
6956
6957static __inline__ __m128i __DEFAULT_FN_ATTRS512
6959{
6960 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6961 (__v16qi) _mm_setzero_si128 (),
6962 __M);
6963}
6964
6965static __inline__ void __DEFAULT_FN_ATTRS512
6967{
6968 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6969}
6970
6971static __inline__ __m256i __DEFAULT_FN_ATTRS512
6973{
6974 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6975 (__v8si) _mm256_undefined_si256 (),
6976 (__mmask8) -1);
6977}
6978
6979static __inline__ __m256i __DEFAULT_FN_ATTRS512
6980_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6981{
6982 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6983 (__v8si) __O, __M);
6984}
6985
6986static __inline__ __m256i __DEFAULT_FN_ATTRS512
6988{
6989 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6990 (__v8si) _mm256_setzero_si256 (),
6991 __M);
6992}
6993
6994static __inline__ void __DEFAULT_FN_ATTRS512
6996{
6997 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
6998}
6999
7000static __inline__ __m128i __DEFAULT_FN_ATTRS512
7002{
7003 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7004 (__v8hi) _mm_undefined_si128 (),
7005 (__mmask8) -1);
7006}
7007
7008static __inline__ __m128i __DEFAULT_FN_ATTRS512
7009_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7010{
7011 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7012 (__v8hi) __O, __M);
7013}
7014
7015static __inline__ __m128i __DEFAULT_FN_ATTRS512
7017{
7018 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7019 (__v8hi) _mm_setzero_si128 (),
7020 __M);
7021}
7022
7023static __inline__ void __DEFAULT_FN_ATTRS512
7025{
7026 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7027}
7028
7029#define _mm512_extracti32x4_epi32(A, imm) \
7030 ((__m128i)__builtin_ia32_extracti32x4_mask( \
7031 (__v16si)(__m512i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \
7032 (__mmask8) - 1))
7033
7034#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7035 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7036 (__v4si)(__m128i)(W), \
7037 (__mmask8)(U)))
7038
7039#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7040 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7041 (__v4si)_mm_setzero_si128(), \
7042 (__mmask8)(U)))
7043
7044#define _mm512_extracti64x4_epi64(A, imm) \
7045 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7046 (__v4di)_mm256_setzero_si256(), \
7047 (__mmask8) - 1))
7048
7049#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7050 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7051 (__v4di)(__m256i)(W), \
7052 (__mmask8)(U)))
7053
7054#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7055 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7056 (__v4di)_mm256_setzero_si256(), \
7057 (__mmask8)(U)))
7058
7059#define _mm512_insertf64x4(A, B, imm) \
7060 ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7061 (__v4df)(__m256d)(B), (int)(imm)))
7062
7063#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7064 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7065 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7066 (__v8df)(__m512d)(W)))
7067
7068#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7069 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7070 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7071 (__v8df)_mm512_setzero_pd()))
7072
7073#define _mm512_inserti64x4(A, B, imm) \
7074 ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7075 (__v4di)(__m256i)(B), (int)(imm)))
7076
7077#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7078 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7079 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7080 (__v8di)(__m512i)(W)))
7081
7082#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7083 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7084 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7085 (__v8di)_mm512_setzero_si512()))
7086
7087#define _mm512_insertf32x4(A, B, imm) \
7088 ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7089 (__v4sf)(__m128)(B), (int)(imm)))
7090
7091#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7092 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7093 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7094 (__v16sf)(__m512)(W)))
7095
7096#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7097 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7098 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7099 (__v16sf)_mm512_setzero_ps()))
7100
7101#define _mm512_inserti32x4(A, B, imm) \
7102 ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7103 (__v4si)(__m128i)(B), (int)(imm)))
7104
7105#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7106 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7107 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7108 (__v16si)(__m512i)(W)))
7109
7110#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7111 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7112 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7113 (__v16si)_mm512_setzero_si512()))
7114
7115#define _mm512_getmant_round_pd(A, B, C, R) \
7116 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7117 (int)(((C)<<2) | (B)), \
7118 (__v8df)_mm512_undefined_pd(), \
7119 (__mmask8)-1, (int)(R)))
7120
7121#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7122 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7123 (int)(((C)<<2) | (B)), \
7124 (__v8df)(__m512d)(W), \
7125 (__mmask8)(U), (int)(R)))
7126
7127#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7128 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7129 (int)(((C)<<2) | (B)), \
7130 (__v8df)_mm512_setzero_pd(), \
7131 (__mmask8)(U), (int)(R)))
7132
7133#define _mm512_getmant_pd(A, B, C) \
7134 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7135 (int)(((C)<<2) | (B)), \
7136 (__v8df)_mm512_setzero_pd(), \
7137 (__mmask8)-1, \
7138 _MM_FROUND_CUR_DIRECTION))
7139
7140#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7141 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7142 (int)(((C)<<2) | (B)), \
7143 (__v8df)(__m512d)(W), \
7144 (__mmask8)(U), \
7145 _MM_FROUND_CUR_DIRECTION))
7146
7147#define _mm512_maskz_getmant_pd(U, A, B, C) \
7148 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7149 (int)(((C)<<2) | (B)), \
7150 (__v8df)_mm512_setzero_pd(), \
7151 (__mmask8)(U), \
7152 _MM_FROUND_CUR_DIRECTION))
7153
7154#define _mm512_getmant_round_ps(A, B, C, R) \
7155 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7156 (int)(((C)<<2) | (B)), \
7157 (__v16sf)_mm512_undefined_ps(), \
7158 (__mmask16)-1, (int)(R)))
7159
7160#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7161 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7162 (int)(((C)<<2) | (B)), \
7163 (__v16sf)(__m512)(W), \
7164 (__mmask16)(U), (int)(R)))
7165
7166#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7167 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7168 (int)(((C)<<2) | (B)), \
7169 (__v16sf)_mm512_setzero_ps(), \
7170 (__mmask16)(U), (int)(R)))
7171
7172#define _mm512_getmant_ps(A, B, C) \
7173 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7174 (int)(((C)<<2)|(B)), \
7175 (__v16sf)_mm512_undefined_ps(), \
7176 (__mmask16)-1, \
7177 _MM_FROUND_CUR_DIRECTION))
7178
7179#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7180 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7181 (int)(((C)<<2)|(B)), \
7182 (__v16sf)(__m512)(W), \
7183 (__mmask16)(U), \
7184 _MM_FROUND_CUR_DIRECTION))
7185
7186#define _mm512_maskz_getmant_ps(U, A, B, C) \
7187 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7188 (int)(((C)<<2)|(B)), \
7189 (__v16sf)_mm512_setzero_ps(), \
7190 (__mmask16)(U), \
7191 _MM_FROUND_CUR_DIRECTION))
7192
7193#define _mm512_getexp_round_pd(A, R) \
7194 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7195 (__v8df)_mm512_undefined_pd(), \
7196 (__mmask8)-1, (int)(R)))
7197
7198#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7199 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7200 (__v8df)(__m512d)(W), \
7201 (__mmask8)(U), (int)(R)))
7202
7203#define _mm512_maskz_getexp_round_pd(U, A, R) \
7204 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7205 (__v8df)_mm512_setzero_pd(), \
7206 (__mmask8)(U), (int)(R)))
7207
7208static __inline__ __m512d __DEFAULT_FN_ATTRS512
7209_mm512_getexp_pd (__m512d __A)
7210{
7211 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7212 (__v8df) _mm512_undefined_pd (),
7213 (__mmask8) -1,
7215}
7216
7217static __inline__ __m512d __DEFAULT_FN_ATTRS512
7218_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7219{
7220 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7221 (__v8df) __W,
7222 (__mmask8) __U,
7224}
7225
7226static __inline__ __m512d __DEFAULT_FN_ATTRS512
7228{
7229 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7230 (__v8df) _mm512_setzero_pd (),
7231 (__mmask8) __U,
7233}
7234
7235#define _mm512_getexp_round_ps(A, R) \
7236 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7237 (__v16sf)_mm512_undefined_ps(), \
7238 (__mmask16)-1, (int)(R)))
7239
7240#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7241 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7242 (__v16sf)(__m512)(W), \
7243 (__mmask16)(U), (int)(R)))
7244
7245#define _mm512_maskz_getexp_round_ps(U, A, R) \
7246 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7247 (__v16sf)_mm512_setzero_ps(), \
7248 (__mmask16)(U), (int)(R)))
7249
7250static __inline__ __m512 __DEFAULT_FN_ATTRS512
7252{
7253 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7254 (__v16sf) _mm512_undefined_ps (),
7255 (__mmask16) -1,
7257}
7258
7259static __inline__ __m512 __DEFAULT_FN_ATTRS512
7260_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7261{
7262 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7263 (__v16sf) __W,
7264 (__mmask16) __U,
7266}
7267
7268static __inline__ __m512 __DEFAULT_FN_ATTRS512
7270{
7271 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7272 (__v16sf) _mm512_setzero_ps (),
7273 (__mmask16) __U,
7275}
7276
7277#define _mm512_i64gather_ps(index, addr, scale) \
7278 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7279 (void const *)(addr), \
7280 (__v8di)(__m512i)(index), (__mmask8)-1, \
7281 (int)(scale)))
7282
7283#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7284 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7285 (void const *)(addr), \
7286 (__v8di)(__m512i)(index), \
7287 (__mmask8)(mask), (int)(scale)))
7288
7289#define _mm512_i64gather_epi32(index, addr, scale) \
7290 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7291 (void const *)(addr), \
7292 (__v8di)(__m512i)(index), \
7293 (__mmask8)-1, (int)(scale)))
7294
7295#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7296 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7297 (void const *)(addr), \
7298 (__v8di)(__m512i)(index), \
7299 (__mmask8)(mask), (int)(scale)))
7300
7301#define _mm512_i64gather_pd(index, addr, scale) \
7302 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7303 (void const *)(addr), \
7304 (__v8di)(__m512i)(index), (__mmask8)-1, \
7305 (int)(scale)))
7306
7307#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7308 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7309 (void const *)(addr), \
7310 (__v8di)(__m512i)(index), \
7311 (__mmask8)(mask), (int)(scale)))
7312
7313#define _mm512_i64gather_epi64(index, addr, scale) \
7314 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7315 (void const *)(addr), \
7316 (__v8di)(__m512i)(index), (__mmask8)-1, \
7317 (int)(scale)))
7318
7319#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7320 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7321 (void const *)(addr), \
7322 (__v8di)(__m512i)(index), \
7323 (__mmask8)(mask), (int)(scale)))
7324
7325#define _mm512_i32gather_ps(index, addr, scale) \
7326 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7327 (void const *)(addr), \
7328 (__v16si)(__m512)(index), \
7329 (__mmask16)-1, (int)(scale)))
7330
7331#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7332 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7333 (void const *)(addr), \
7334 (__v16si)(__m512)(index), \
7335 (__mmask16)(mask), (int)(scale)))
7336
7337#define _mm512_i32gather_epi32(index, addr, scale) \
7338 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7339 (void const *)(addr), \
7340 (__v16si)(__m512i)(index), \
7341 (__mmask16)-1, (int)(scale)))
7342
7343#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7344 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7345 (void const *)(addr), \
7346 (__v16si)(__m512i)(index), \
7347 (__mmask16)(mask), (int)(scale)))
7348
7349#define _mm512_i32gather_pd(index, addr, scale) \
7350 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7351 (void const *)(addr), \
7352 (__v8si)(__m256i)(index), (__mmask8)-1, \
7353 (int)(scale)))
7354
7355#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7356 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7357 (void const *)(addr), \
7358 (__v8si)(__m256i)(index), \
7359 (__mmask8)(mask), (int)(scale)))
7360
7361#define _mm512_i32gather_epi64(index, addr, scale) \
7362 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7363 (void const *)(addr), \
7364 (__v8si)(__m256i)(index), (__mmask8)-1, \
7365 (int)(scale)))
7366
7367#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7368 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7369 (void const *)(addr), \
7370 (__v8si)(__m256i)(index), \
7371 (__mmask8)(mask), (int)(scale)))
7372
7373#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7374 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7375 (__v8di)(__m512i)(index), \
7376 (__v8sf)(__m256)(v1), (int)(scale))
7377
7378#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7379 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7380 (__v8di)(__m512i)(index), \
7381 (__v8sf)(__m256)(v1), (int)(scale))
7382
7383#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7384 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7385 (__v8di)(__m512i)(index), \
7386 (__v8si)(__m256i)(v1), (int)(scale))
7387
7388#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7389 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7390 (__v8di)(__m512i)(index), \
7391 (__v8si)(__m256i)(v1), (int)(scale))
7392
7393#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7394 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7395 (__v8di)(__m512i)(index), \
7396 (__v8df)(__m512d)(v1), (int)(scale))
7397
7398#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7399 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7400 (__v8di)(__m512i)(index), \
7401 (__v8df)(__m512d)(v1), (int)(scale))
7402
7403#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7404 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7405 (__v8di)(__m512i)(index), \
7406 (__v8di)(__m512i)(v1), (int)(scale))
7407
7408#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7409 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7410 (__v8di)(__m512i)(index), \
7411 (__v8di)(__m512i)(v1), (int)(scale))
7412
7413#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7414 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7415 (__v16si)(__m512i)(index), \
7416 (__v16sf)(__m512)(v1), (int)(scale))
7417
7418#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7419 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7420 (__v16si)(__m512i)(index), \
7421 (__v16sf)(__m512)(v1), (int)(scale))
7422
7423#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7424 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7425 (__v16si)(__m512i)(index), \
7426 (__v16si)(__m512i)(v1), (int)(scale))
7427
7428#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7429 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7430 (__v16si)(__m512i)(index), \
7431 (__v16si)(__m512i)(v1), (int)(scale))
7432
7433#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7434 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7435 (__v8si)(__m256i)(index), \
7436 (__v8df)(__m512d)(v1), (int)(scale))
7437
7438#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7439 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7440 (__v8si)(__m256i)(index), \
7441 (__v8df)(__m512d)(v1), (int)(scale))
7442
7443#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7444 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7445 (__v8si)(__m256i)(index), \
7446 (__v8di)(__m512i)(v1), (int)(scale))
7447
7448#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7449 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7450 (__v8si)(__m256i)(index), \
7451 (__v8di)(__m512i)(v1), (int)(scale))
7452
7453static __inline__ __m128 __DEFAULT_FN_ATTRS128
7454_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7455{
7456 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7457 (__v4sf)__A,
7458 (__v4sf)__B,
7459 (__mmask8)__U,
7461}
7462
7463#define _mm_fmadd_round_ss(A, B, C, R) \
7464 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7465 (__v4sf)(__m128)(B), \
7466 (__v4sf)(__m128)(C), (__mmask8)-1, \
7467 (int)(R)))
7468
7469#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7470 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7471 (__v4sf)(__m128)(A), \
7472 (__v4sf)(__m128)(B), (__mmask8)(U), \
7473 (int)(R)))
7474
7475static __inline__ __m128 __DEFAULT_FN_ATTRS128
7476_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7477{
7478 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7479 (__v4sf)__B,
7480 (__v4sf)__C,
7481 (__mmask8)__U,
7483}
7484
7485#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7486 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7487 (__v4sf)(__m128)(B), \
7488 (__v4sf)(__m128)(C), (__mmask8)(U), \
7489 (int)(R)))
7490
7491static __inline__ __m128 __DEFAULT_FN_ATTRS128
7492_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7493{
7494 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7495 (__v4sf)__X,
7496 (__v4sf)__Y,
7497 (__mmask8)__U,
7499}
7500
7501#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7502 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7503 (__v4sf)(__m128)(X), \
7504 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7505 (int)(R)))
7506
7507static __inline__ __m128 __DEFAULT_FN_ATTRS128
7508_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7509{
7510 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7511 (__v4sf)__A,
7512 -(__v4sf)__B,
7513 (__mmask8)__U,
7515}
7516
7517#define _mm_fmsub_round_ss(A, B, C, R) \
7518 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7519 (__v4sf)(__m128)(B), \
7520 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7521 (int)(R)))
7522
7523#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7524 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7525 (__v4sf)(__m128)(A), \
7526 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7527 (int)(R)))
7528
7529static __inline__ __m128 __DEFAULT_FN_ATTRS128
7530_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7531{
7532 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7533 (__v4sf)__B,
7534 -(__v4sf)__C,
7535 (__mmask8)__U,
7537}
7538
7539#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7540 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7541 (__v4sf)(__m128)(B), \
7542 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7543 (int)(R)))
7544
7545static __inline__ __m128 __DEFAULT_FN_ATTRS128
7546_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7547{
7548 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7549 (__v4sf)__X,
7550 (__v4sf)__Y,
7551 (__mmask8)__U,
7553}
7554
7555#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7556 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7557 (__v4sf)(__m128)(X), \
7558 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7559 (int)(R)))
7560
7561static __inline__ __m128 __DEFAULT_FN_ATTRS128
7562_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7563{
7564 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7565 -(__v4sf)__A,
7566 (__v4sf)__B,
7567 (__mmask8)__U,
7569}
7570
7571#define _mm_fnmadd_round_ss(A, B, C, R) \
7572 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7573 -(__v4sf)(__m128)(B), \
7574 (__v4sf)(__m128)(C), (__mmask8)-1, \
7575 (int)(R)))
7576
7577#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7578 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7579 -(__v4sf)(__m128)(A), \
7580 (__v4sf)(__m128)(B), (__mmask8)(U), \
7581 (int)(R)))
7582
7583static __inline__ __m128 __DEFAULT_FN_ATTRS128
7584_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7585{
7586 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7587 -(__v4sf)__B,
7588 (__v4sf)__C,
7589 (__mmask8)__U,
7591}
7592
7593#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7594 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7595 -(__v4sf)(__m128)(B), \
7596 (__v4sf)(__m128)(C), (__mmask8)(U), \
7597 (int)(R)))
7598
7599static __inline__ __m128 __DEFAULT_FN_ATTRS128
7600_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7601{
7602 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7603 -(__v4sf)__X,
7604 (__v4sf)__Y,
7605 (__mmask8)__U,
7607}
7608
7609#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7610 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7611 -(__v4sf)(__m128)(X), \
7612 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7613 (int)(R)))
7614
7615static __inline__ __m128 __DEFAULT_FN_ATTRS128
7616_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7617{
7618 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7619 -(__v4sf)__A,
7620 -(__v4sf)__B,
7621 (__mmask8)__U,
7623}
7624
7625#define _mm_fnmsub_round_ss(A, B, C, R) \
7626 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7627 -(__v4sf)(__m128)(B), \
7628 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7629 (int)(R)))
7630
7631#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7632 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7633 -(__v4sf)(__m128)(A), \
7634 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7635 (int)(R)))
7636
7637static __inline__ __m128 __DEFAULT_FN_ATTRS128
7638_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7639{
7640 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7641 -(__v4sf)__B,
7642 -(__v4sf)__C,
7643 (__mmask8)__U,
7645}
7646
7647#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
7648 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7649 -(__v4sf)(__m128)(B), \
7650 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7651 (int)(R)))
7652
7653static __inline__ __m128 __DEFAULT_FN_ATTRS128
7654_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7655{
7656 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7657 -(__v4sf)__X,
7658 (__v4sf)__Y,
7659 (__mmask8)__U,
7661}
7662
7663#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
7664 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7665 -(__v4sf)(__m128)(X), \
7666 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7667 (int)(R)))
7668
7669static __inline__ __m128d __DEFAULT_FN_ATTRS128
7670_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7671{
7672 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7673 (__v2df)__A,
7674 (__v2df)__B,
7675 (__mmask8)__U,
7677}
7678
7679#define _mm_fmadd_round_sd(A, B, C, R) \
7680 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7681 (__v2df)(__m128d)(B), \
7682 (__v2df)(__m128d)(C), (__mmask8)-1, \
7683 (int)(R)))
7684
7685#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
7686 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7687 (__v2df)(__m128d)(A), \
7688 (__v2df)(__m128d)(B), (__mmask8)(U), \
7689 (int)(R)))
7690
7691static __inline__ __m128d __DEFAULT_FN_ATTRS128
7692_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7693{
7694 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7695 (__v2df)__B,
7696 (__v2df)__C,
7697 (__mmask8)__U,
7699}
7700
7701#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
7702 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7703 (__v2df)(__m128d)(B), \
7704 (__v2df)(__m128d)(C), (__mmask8)(U), \
7705 (int)(R)))
7706
7707static __inline__ __m128d __DEFAULT_FN_ATTRS128
7708_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7709{
7710 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7711 (__v2df)__X,
7712 (__v2df)__Y,
7713 (__mmask8)__U,
7715}
7716
7717#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
7718 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7719 (__v2df)(__m128d)(X), \
7720 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7721 (int)(R)))
7722
7723static __inline__ __m128d __DEFAULT_FN_ATTRS128
7724_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7725{
7726 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7727 (__v2df)__A,
7728 -(__v2df)__B,
7729 (__mmask8)__U,
7731}
7732
7733#define _mm_fmsub_round_sd(A, B, C, R) \
7734 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7735 (__v2df)(__m128d)(B), \
7736 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7737 (int)(R)))
7738
7739#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
7740 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7741 (__v2df)(__m128d)(A), \
7742 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7743 (int)(R)))
7744
7745static __inline__ __m128d __DEFAULT_FN_ATTRS128
7746_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7747{
7748 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7749 (__v2df)__B,
7750 -(__v2df)__C,
7751 (__mmask8)__U,
7753}
7754
7755#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
7756 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7757 (__v2df)(__m128d)(B), \
7758 -(__v2df)(__m128d)(C), \
7759 (__mmask8)(U), (int)(R)))
7760
7761static __inline__ __m128d __DEFAULT_FN_ATTRS128
7762_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7763{
7764 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7765 (__v2df)__X,
7766 (__v2df)__Y,
7767 (__mmask8)__U,
7769}
7770
7771#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
7772 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7773 (__v2df)(__m128d)(X), \
7774 (__v2df)(__m128d)(Y), \
7775 (__mmask8)(U), (int)(R)))
7776
7777static __inline__ __m128d __DEFAULT_FN_ATTRS128
7778_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7779{
7780 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7781 -(__v2df)__A,
7782 (__v2df)__B,
7783 (__mmask8)__U,
7785}
7786
7787#define _mm_fnmadd_round_sd(A, B, C, R) \
7788 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7789 -(__v2df)(__m128d)(B), \
7790 (__v2df)(__m128d)(C), (__mmask8)-1, \
7791 (int)(R)))
7792
7793#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
7794 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7795 -(__v2df)(__m128d)(A), \
7796 (__v2df)(__m128d)(B), (__mmask8)(U), \
7797 (int)(R)))
7798
7799static __inline__ __m128d __DEFAULT_FN_ATTRS128
7800_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7801{
7802 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7803 -(__v2df)__B,
7804 (__v2df)__C,
7805 (__mmask8)__U,
7807}
7808
7809#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
7810 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7811 -(__v2df)(__m128d)(B), \
7812 (__v2df)(__m128d)(C), (__mmask8)(U), \
7813 (int)(R)))
7814
7815static __inline__ __m128d __DEFAULT_FN_ATTRS128
7816_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7817{
7818 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7819 -(__v2df)__X,
7820 (__v2df)__Y,
7821 (__mmask8)__U,
7823}
7824
7825#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
7826 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7827 -(__v2df)(__m128d)(X), \
7828 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7829 (int)(R)))
7830
7831static __inline__ __m128d __DEFAULT_FN_ATTRS128
7832_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7833{
7834 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7835 -(__v2df)__A,
7836 -(__v2df)__B,
7837 (__mmask8)__U,
7839}
7840
7841#define _mm_fnmsub_round_sd(A, B, C, R) \
7842 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7843 -(__v2df)(__m128d)(B), \
7844 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7845 (int)(R)))
7846
7847#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
7848 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7849 -(__v2df)(__m128d)(A), \
7850 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7851 (int)(R)))
7852
7853static __inline__ __m128d __DEFAULT_FN_ATTRS128
7854_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7855{
7856 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7857 -(__v2df)__B,
7858 -(__v2df)__C,
7859 (__mmask8)__U,
7861}
7862
7863#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
7864 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7865 -(__v2df)(__m128d)(B), \
7866 -(__v2df)(__m128d)(C), \
7867 (__mmask8)(U), \
7868 (int)(R)))
7869
7870static __inline__ __m128d __DEFAULT_FN_ATTRS128
7871_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7872{
7873 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7874 -(__v2df)__X,
7875 (__v2df)__Y,
7876 (__mmask8)__U,
7878}
7879
7880#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
7881 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7882 -(__v2df)(__m128d)(X), \
7883 (__v2df)(__m128d)(Y), \
7884 (__mmask8)(U), (int)(R)))
7885
7886#define _mm512_permutex_pd(X, C) \
7887 ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
7888
7889#define _mm512_mask_permutex_pd(W, U, X, C) \
7890 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7891 (__v8df)_mm512_permutex_pd((X), (C)), \
7892 (__v8df)(__m512d)(W)))
7893
7894#define _mm512_maskz_permutex_pd(U, X, C) \
7895 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7896 (__v8df)_mm512_permutex_pd((X), (C)), \
7897 (__v8df)_mm512_setzero_pd()))
7898
7899#define _mm512_permutex_epi64(X, C) \
7900 ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
7901
7902#define _mm512_mask_permutex_epi64(W, U, X, C) \
7903 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7904 (__v8di)_mm512_permutex_epi64((X), (C)), \
7905 (__v8di)(__m512i)(W)))
7906
7907#define _mm512_maskz_permutex_epi64(U, X, C) \
7908 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7909 (__v8di)_mm512_permutex_epi64((X), (C)), \
7910 (__v8di)_mm512_setzero_si512()))
7911
7912static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
7913_mm512_permutexvar_pd(__m512i __X, __m512d __Y) {
7914 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
7915}
7916
7917static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
7918_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X,
7919 __m512d __Y) {
7920 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
7921 (__v8df)_mm512_permutexvar_pd(__X, __Y),
7922 (__v8df)__W);
7923}
7924
7925static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
7926_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) {
7927 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
7928 (__v8df)_mm512_permutexvar_pd(__X, __Y),
7929 (__v8df)_mm512_setzero_pd());
7930}
7931
7932static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7933_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) {
7934 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
7935}
7936
7937static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7938_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) {
7939 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7940 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
7941 (__v8di)_mm512_setzero_si512());
7942}
7943
7944static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7945_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X,
7946 __m512i __Y) {
7947 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7948 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
7949 (__v8di)__W);
7950}
7951
7952static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
7953_mm512_permutexvar_ps(__m512i __X, __m512 __Y) {
7954 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
7955}
7956
7957static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
7958_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) {
7959 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
7960 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
7961 (__v16sf)__W);
7962}
7963
7964static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
7965_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) {
7966 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
7967 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
7968 (__v16sf)_mm512_setzero_ps());
7969}
7970
7971static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7972_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) {
7973 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
7974}
7975
7976#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
7977
7978static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7980 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7981 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
7982 (__v16si)_mm512_setzero_si512());
7983}
7984
7985static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7986_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X,
7987 __m512i __Y) {
7988 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7989 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
7990 (__v16si)__W);
7991}
7992
7993#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
7994
7995static __inline__ __mmask16
7997 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
7998}
7999
8002 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8003}
8004
8007 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8008}
8009
8010static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
8012 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8013}
8014
8015static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
8017 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8018}
8019
8020static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
8022 return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8023}
8024
8025static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
8027 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8028}
8029
8030static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
8031_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8032 *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8033 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8034}
8035
8038 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8039}
8040
8043 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8044}
8045
8048 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8049}
8050
8051#define _kand_mask16 _mm512_kand
8052#define _kandn_mask16 _mm512_kandn
8053#define _knot_mask16 _mm512_knot
8054#define _kor_mask16 _mm512_kor
8055#define _kxnor_mask16 _mm512_kxnor
8056#define _kxor_mask16 _mm512_kxor
8057
8058#define _kshiftli_mask16(A, I) \
8059 ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
8060
8061#define _kshiftri_mask16(A, I) \
8062 ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
8063
8064static __inline__ unsigned int
8066 return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8067}
8068
8070_cvtu32_mask16(unsigned int __A) {
8071 return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8072}
8073
8074static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8076 return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8077}
8078
8079static __inline__ void __DEFAULT_FN_ATTRS
8081 *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8082}
8083
8084static __inline__ void __DEFAULT_FN_ATTRS512
8085_mm512_stream_si512 (void * __P, __m512i __A)
8086{
8087 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8088 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8089}
8090
8091static __inline__ __m512i __DEFAULT_FN_ATTRS512
8093{
8094 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8095 return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8096}
8097
8098static __inline__ void __DEFAULT_FN_ATTRS512
8099_mm512_stream_pd (void *__P, __m512d __A)
8100{
8101 typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8102 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8103}
8104
8105static __inline__ void __DEFAULT_FN_ATTRS512
8106_mm512_stream_ps (void *__P, __m512 __A)
8107{
8108 typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8109 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8110}
8111
8112static __inline__ __m512d __DEFAULT_FN_ATTRS512
8113_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8114{
8115 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8116 (__v8df) __W,
8117 (__mmask8) __U);
8118}
8119
8120static __inline__ __m512d __DEFAULT_FN_ATTRS512
8122{
8123 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8124 (__v8df)
8126 (__mmask8) __U);
8127}
8128
8129static __inline__ __m512i __DEFAULT_FN_ATTRS512
8130_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8131{
8132 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8133 (__v8di) __W,
8134 (__mmask8) __U);
8135}
8136
8137static __inline__ __m512i __DEFAULT_FN_ATTRS512
8139{
8140 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8141 (__v8di)
8143 (__mmask8) __U);
8144}
8145
8146static __inline__ __m512 __DEFAULT_FN_ATTRS512
8147_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8148{
8149 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8150 (__v16sf) __W,
8151 (__mmask16) __U);
8152}
8153
8154static __inline__ __m512 __DEFAULT_FN_ATTRS512
8156{
8157 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8158 (__v16sf)
8160 (__mmask16) __U);
8161}
8162
8163static __inline__ __m512i __DEFAULT_FN_ATTRS512
8164_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8165{
8166 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8167 (__v16si) __W,
8168 (__mmask16) __U);
8169}
8170
8171static __inline__ __m512i __DEFAULT_FN_ATTRS512
8173{
8174 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8175 (__v16si)
8177 (__mmask16) __U);
8178}
8179
8180#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8181 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8182 (__v4sf)(__m128)(Y), (int)(P), \
8183 (__mmask8)-1, (int)(R)))
8184
8185#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8186 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8187 (__v4sf)(__m128)(Y), (int)(P), \
8188 (__mmask8)(M), (int)(R)))
8189
8190#define _mm_cmp_ss_mask(X, Y, P) \
8191 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8192 (__v4sf)(__m128)(Y), (int)(P), \
8193 (__mmask8)-1, \
8194 _MM_FROUND_CUR_DIRECTION))
8195
8196#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8197 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8198 (__v4sf)(__m128)(Y), (int)(P), \
8199 (__mmask8)(M), \
8200 _MM_FROUND_CUR_DIRECTION))
8201
8202#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8203 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8204 (__v2df)(__m128d)(Y), (int)(P), \
8205 (__mmask8)-1, (int)(R)))
8206
8207#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8208 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8209 (__v2df)(__m128d)(Y), (int)(P), \
8210 (__mmask8)(M), (int)(R)))
8211
8212#define _mm_cmp_sd_mask(X, Y, P) \
8213 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8214 (__v2df)(__m128d)(Y), (int)(P), \
8215 (__mmask8)-1, \
8216 _MM_FROUND_CUR_DIRECTION))
8217
8218#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8219 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8220 (__v2df)(__m128d)(Y), (int)(P), \
8221 (__mmask8)(M), \
8222 _MM_FROUND_CUR_DIRECTION))
8223
8224/* Bit Test */
8225
8226static __inline __mmask16 __DEFAULT_FN_ATTRS512
8227_mm512_test_epi32_mask (__m512i __A, __m512i __B)
8228{
8231}
8232
8233static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8234_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8235{
8236 return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8238}
8239
8240static __inline __mmask8 __DEFAULT_FN_ATTRS512
8241_mm512_test_epi64_mask (__m512i __A, __m512i __B)
8242{
8243 return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8245}
8246
8247static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8248_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8249{
8250 return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8252}
8253
8254static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8255_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8256{
8257 return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8259}
8260
8261static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8262_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8263{
8264 return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8266}
8267
8268static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8269_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8270{
8271 return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8273}
8274
8275static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8276_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8277{
8278 return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8280}
8281
8282static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8284{
8285 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8286 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8287}
8288
8289static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8290_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8291 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8292 (__v16sf)_mm512_movehdup_ps(__A),
8293 (__v16sf)__W);
8294}
8295
8296static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8298 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8299 (__v16sf)_mm512_movehdup_ps(__A),
8300 (__v16sf)_mm512_setzero_ps());
8301}
8302
8303static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8305{
8306 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8307 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8308}
8309
8310static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8311_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8312 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8313 (__v16sf)_mm512_moveldup_ps(__A),
8314 (__v16sf)__W);
8315}
8316
8317static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8319 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8320 (__v16sf)_mm512_moveldup_ps(__A),
8321 (__v16sf)_mm512_setzero_ps());
8322}
8323
8324static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8325_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
8326 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8327}
8328
8329static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8330_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
8331 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8332 _mm_setzero_ps());
8333}
8334
8335static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8336_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
8337 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8338}
8339
8340static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8341_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
8342 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8343 _mm_setzero_pd());
8344}
8345
8346static __inline__ void __DEFAULT_FN_ATTRS128
8347_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8348{
8349 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8350}
8351
8352static __inline__ void __DEFAULT_FN_ATTRS128
8353_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8354{
8355 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8356}
8357
8358static __inline__ __m128 __DEFAULT_FN_ATTRS128
8359_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8360{
8361 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8362 (__v4sf)_mm_setzero_ps(),
8363 0, 4, 4, 4);
8364
8365 return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
8366}
8367
8368static __inline__ __m128 __DEFAULT_FN_ATTRS128
8369_mm_maskz_load_ss (__mmask8 __U, const float* __A)
8370{
8371 return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
8372 (__v4sf) _mm_setzero_ps(),
8373 __U & 1);
8374}
8375
8376static __inline__ __m128d __DEFAULT_FN_ATTRS128
8377_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8378{
8379 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8380 (__v2df)_mm_setzero_pd(),
8381 0, 2);
8382
8383 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
8384}
8385
8386static __inline__ __m128d __DEFAULT_FN_ATTRS128
8387_mm_maskz_load_sd (__mmask8 __U, const double* __A)
8388{
8389 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
8390 (__v2df) _mm_setzero_pd(),
8391 __U & 1);
8392}
8393
8394#define _mm512_shuffle_epi32(A, I) \
8395 ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
8396
8397#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8398 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8399 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8400 (__v16si)(__m512i)(W)))
8401
8402#define _mm512_maskz_shuffle_epi32(U, A, I) \
8403 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8404 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8405 (__v16si)_mm512_setzero_si512()))
8406
8407static __inline__ __m512d __DEFAULT_FN_ATTRS512
8408_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8409{
8410 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8411 (__v8df) __W,
8412 (__mmask8) __U);
8413}
8414
8415static __inline__ __m512d __DEFAULT_FN_ATTRS512
8417{
8418 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8419 (__v8df) _mm512_setzero_pd (),
8420 (__mmask8) __U);
8421}
8422
8423static __inline__ __m512i __DEFAULT_FN_ATTRS512
8424_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8425{
8426 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8427 (__v8di) __W,
8428 (__mmask8) __U);
8429}
8430
8431static __inline__ __m512i __DEFAULT_FN_ATTRS512
8433{
8434 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8435 (__v8di) _mm512_setzero_si512 (),
8436 (__mmask8) __U);
8437}
8438
8439static __inline__ __m512d __DEFAULT_FN_ATTRS512
8440_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8441{
8442 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8443 (__v8df) __W,
8444 (__mmask8) __U);
8445}
8446
8447static __inline__ __m512d __DEFAULT_FN_ATTRS512
8449{
8450 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8451 (__v8df) _mm512_setzero_pd(),
8452 (__mmask8) __U);
8453}
8454
8455static __inline__ __m512i __DEFAULT_FN_ATTRS512
8456_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8457{
8458 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8459 (__v8di) __W,
8460 (__mmask8) __U);
8461}
8462
8463static __inline__ __m512i __DEFAULT_FN_ATTRS512
8465{
8466 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8467 (__v8di) _mm512_setzero_si512(),
8468 (__mmask8) __U);
8469}
8470
8471static __inline__ __m512 __DEFAULT_FN_ATTRS512
8472_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8473{
8474 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8475 (__v16sf) __W,
8476 (__mmask16) __U);
8477}
8478
8479static __inline__ __m512 __DEFAULT_FN_ATTRS512
8481{
8482 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8483 (__v16sf) _mm512_setzero_ps(),
8484 (__mmask16) __U);
8485}
8486
8487static __inline__ __m512i __DEFAULT_FN_ATTRS512
8488_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8489{
8490 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8491 (__v16si) __W,
8492 (__mmask16) __U);
8493}
8494
8495static __inline__ __m512i __DEFAULT_FN_ATTRS512
8497{
8498 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8499 (__v16si) _mm512_setzero_si512(),
8500 (__mmask16) __U);
8501}
8502
8503static __inline__ __m512 __DEFAULT_FN_ATTRS512
8504_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8505{
8506 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8507 (__v16sf) __W,
8508 (__mmask16) __U);
8509}
8510
8511static __inline__ __m512 __DEFAULT_FN_ATTRS512
8513{
8514 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8515 (__v16sf) _mm512_setzero_ps(),
8516 (__mmask16) __U);
8517}
8518
8519static __inline__ __m512i __DEFAULT_FN_ATTRS512
8520_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8521{
8522 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8523 (__v16si) __W,
8524 (__mmask16) __U);
8525}
8526
8527static __inline__ __m512i __DEFAULT_FN_ATTRS512
8529{
8530 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8531 (__v16si) _mm512_setzero_si512(),
8532 (__mmask16) __U);
8533}
8534
8535#define _mm512_cvt_roundps_pd(A, R) \
8536 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8537 (__v8df)_mm512_undefined_pd(), \
8538 (__mmask8)-1, (int)(R)))
8539
8540#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8541 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8542 (__v8df)(__m512d)(W), \
8543 (__mmask8)(U), (int)(R)))
8544
8545#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8546 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8547 (__v8df)_mm512_setzero_pd(), \
8548 (__mmask8)(U), (int)(R)))
8549
8550static __inline__ __m512d
8552 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8553}
8554
8555static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8556_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) {
8557 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8558 (__v8df)_mm512_cvtps_pd(__A),
8559 (__v8df)__W);
8560}
8561
8562static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8564 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8565 (__v8df)_mm512_cvtps_pd(__A),
8566 (__v8df)_mm512_setzero_pd());
8567}
8568
8569static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8571 return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8572}
8573
8574static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8575_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) {
8576 return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8577}
8578
8579static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8580_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) {
8581 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8582 (__v8df)__W);
8583}
8584
8585static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8587 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8588 (__v8df)_mm512_setzero_pd());
8589}
8590
8591static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8592_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8593 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8594 (__v16sf)__W);
8595}
8596
8597static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8599 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8600 (__v16sf)_mm512_setzero_ps());
8601}
8602
8603static __inline__ void __DEFAULT_FN_ATTRS512
8605{
8606 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8607 (__mmask8) __U);
8608}
8609
8610static __inline__ void __DEFAULT_FN_ATTRS512
8612{
8613 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8614 (__mmask8) __U);
8615}
8616
8617static __inline__ void __DEFAULT_FN_ATTRS512
8619{
8620 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
8621 (__mmask16) __U);
8622}
8623
8624static __inline__ void __DEFAULT_FN_ATTRS512
8626{
8627 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
8628 (__mmask16) __U);
8629}
8630
8631#define _mm_cvt_roundsd_ss(A, B, R) \
8632 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8633 (__v2df)(__m128d)(B), \
8634 (__v4sf)_mm_undefined_ps(), \
8635 (__mmask8)-1, (int)(R)))
8636
8637#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
8638 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8639 (__v2df)(__m128d)(B), \
8640 (__v4sf)(__m128)(W), \
8641 (__mmask8)(U), (int)(R)))
8642
8643#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
8644 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8645 (__v2df)(__m128d)(B), \
8646 (__v4sf)_mm_setzero_ps(), \
8647 (__mmask8)(U), (int)(R)))
8648
8649static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8650_mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) {
8651 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8652 (__v2df)__B,
8653 (__v4sf)__W,
8655}
8656
8657static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8658_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) {
8659 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8660 (__v2df)__B,
8661 (__v4sf)_mm_setzero_ps(),
8663}
8664
8665#define _mm_cvtss_i32 _mm_cvtss_si32
8666#define _mm_cvtsd_i32 _mm_cvtsd_si32
8667#define _mm_cvti32_sd _mm_cvtsi32_sd
8668#define _mm_cvti32_ss _mm_cvtsi32_ss
8669#ifdef __x86_64__
8670#define _mm_cvtss_i64 _mm_cvtss_si64
8671#define _mm_cvtsd_i64 _mm_cvtsd_si64
8672#define _mm_cvti64_sd _mm_cvtsi64_sd
8673#define _mm_cvti64_ss _mm_cvtsi64_ss
8674#endif
8675
8676#ifdef __x86_64__
8677#define _mm_cvt_roundi64_sd(A, B, R) \
8678 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8679 (int)(R)))
8680
8681#define _mm_cvt_roundsi64_sd(A, B, R) \
8682 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8683 (int)(R)))
8684#endif
8685
8686#define _mm_cvt_roundsi32_ss(A, B, R) \
8687 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8688
8689#define _mm_cvt_roundi32_ss(A, B, R) \
8690 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8691
8692#ifdef __x86_64__
8693#define _mm_cvt_roundsi64_ss(A, B, R) \
8694 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8695 (int)(R)))
8696
8697#define _mm_cvt_roundi64_ss(A, B, R) \
8698 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8699 (int)(R)))
8700#endif
8701
8702#define _mm_cvt_roundss_sd(A, B, R) \
8703 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8704 (__v4sf)(__m128)(B), \
8705 (__v2df)_mm_undefined_pd(), \
8706 (__mmask8)-1, (int)(R)))
8707
8708#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
8709 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8710 (__v4sf)(__m128)(B), \
8711 (__v2df)(__m128d)(W), \
8712 (__mmask8)(U), (int)(R)))
8713
8714#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
8715 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8716 (__v4sf)(__m128)(B), \
8717 (__v2df)_mm_setzero_pd(), \
8718 (__mmask8)(U), (int)(R)))
8719
8720static __inline__ __m128d __DEFAULT_FN_ATTRS128
8721_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
8722{
8723 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8724 (__v4sf)__B,
8725 (__v2df)__W,
8727}
8728
8729static __inline__ __m128d __DEFAULT_FN_ATTRS128
8730_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
8731{
8732 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8733 (__v4sf)__B,
8734 (__v2df)_mm_setzero_pd(),
8736}
8737
8738static __inline__ __m128d __DEFAULT_FN_ATTRS128
8739_mm_cvtu32_sd (__m128d __A, unsigned __B)
8740{
8741 __A[0] = __B;
8742 return __A;
8743}
8744
8745#ifdef __x86_64__
8746#define _mm_cvt_roundu64_sd(A, B, R) \
8747 ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
8748 (unsigned long long)(B), (int)(R)))
8749
8750static __inline__ __m128d __DEFAULT_FN_ATTRS128
8751_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
8752{
8753 __A[0] = __B;
8754 return __A;
8755}
8756#endif
8757
8758#define _mm_cvt_roundu32_ss(A, B, R) \
8759 ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
8760 (int)(R)))
8761
8762static __inline__ __m128 __DEFAULT_FN_ATTRS128
8763_mm_cvtu32_ss (__m128 __A, unsigned __B)
8764{
8765 __A[0] = __B;
8766 return __A;
8767}
8768
8769#ifdef __x86_64__
8770#define _mm_cvt_roundu64_ss(A, B, R) \
8771 ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
8772 (unsigned long long)(B), (int)(R)))
8773
8774static __inline__ __m128 __DEFAULT_FN_ATTRS128
8775_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
8776{
8777 __A[0] = __B;
8778 return __A;
8779}
8780#endif
8781
8782static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8783_mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A) {
8784 return (__m512i) __builtin_ia32_selectd_512(__M,
8785 (__v16si) _mm512_set1_epi32(__A),
8786 (__v16si) __O);
8787}
8788
8789static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8790_mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A) {
8791 return (__m512i) __builtin_ia32_selectq_512(__M,
8792 (__v8di) _mm512_set1_epi64(__A),
8793 (__v8di) __O);
8794}
8795
8797 char __e63, char __e62, char __e61, char __e60, char __e59, char __e58,
8798 char __e57, char __e56, char __e55, char __e54, char __e53, char __e52,
8799 char __e51, char __e50, char __e49, char __e48, char __e47, char __e46,
8800 char __e45, char __e44, char __e43, char __e42, char __e41, char __e40,
8801 char __e39, char __e38, char __e37, char __e36, char __e35, char __e34,
8802 char __e33, char __e32, char __e31, char __e30, char __e29, char __e28,
8803 char __e27, char __e26, char __e25, char __e24, char __e23, char __e22,
8804 char __e21, char __e20, char __e19, char __e18, char __e17, char __e16,
8805 char __e15, char __e14, char __e13, char __e12, char __e11, char __e10,
8806 char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3,
8807 char __e2, char __e1, char __e0) {
8808
8809 return __extension__ (__m512i)(__v64qi)
8810 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8811 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8812 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8813 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
8814 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
8815 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
8816 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
8817 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
8818}
8819
8821 short __e31, short __e30, short __e29, short __e28, short __e27,
8822 short __e26, short __e25, short __e24, short __e23, short __e22,
8823 short __e21, short __e20, short __e19, short __e18, short __e17,
8824 short __e16, short __e15, short __e14, short __e13, short __e12,
8825 short __e11, short __e10, short __e9, short __e8, short __e7, short __e6,
8826 short __e5, short __e4, short __e3, short __e2, short __e1, short __e0) {
8827 return __extension__ (__m512i)(__v32hi)
8828 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8829 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8830 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8831 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
8832}
8833
8835 int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H,
8836 int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P) {
8837 return __extension__ (__m512i)(__v16si)
8838 { __P, __O, __N, __M, __L, __K, __J, __I,
8839 __H, __G, __F, __E, __D, __C, __B, __A };
8840}
8841
8843 int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8,
8844 int e9, int e10, int e11, int e12, int e13, int e14, int e15) {
8845 return _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4,
8846 e3, e2, e1, e0);
8847}
8848
8849static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8850_mm512_set_epi64(long long __A, long long __B, long long __C, long long __D,
8851 long long __E, long long __F, long long __G, long long __H) {
8852 return __extension__ (__m512i) (__v8di)
8853 { __H, __G, __F, __E, __D, __C, __B, __A };
8854}
8855
8856static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8857_mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3,
8858 long long e4, long long e5, long long e6, long long e7) {
8859 return _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0);
8860}
8861
8862static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8863_mm512_set_pd(double __A, double __B, double __C, double __D, double __E,
8864 double __F, double __G, double __H) {
8865 return __extension__ (__m512d)
8866 { __H, __G, __F, __E, __D, __C, __B, __A };
8867}
8868
8869static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8870_mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5,
8871 double e6, double e7) {
8872 return _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0);
8873}
8874
8875static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8876_mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F,
8877 float __G, float __H, float __I, float __J, float __K, float __L,
8878 float __M, float __N, float __O, float __P) {
8879 return __extension__ (__m512)
8880 { __P, __O, __N, __M, __L, __K, __J, __I,
8881 __H, __G, __F, __E, __D, __C, __B, __A };
8882}
8883
8884static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8885_mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5,
8886 float e6, float e7, float e8, float e9, float e10, float e11,
8887 float e12, float e13, float e14, float e15) {
8888 return _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
8889 e2, e1, e0);
8890}
8891
8892static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8893_mm512_abs_ps(__m512 __A) {
8894 return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
8895}
8896
8897static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8898_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) {
8899 return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
8900}
8901
8902static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8903_mm512_abs_pd(__m512d __A) {
8904 return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
8905}
8906
8907static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8908_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) {
8909 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
8910}
8911
8912/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
8913 * outputs. This class of vector operation forms the basis of many scientific
8914 * computations. In vector-reduction arithmetic, the evaluation order is
8915 * independent of the order of the input elements of V.
8916
8917 * For floating-point intrinsics:
8918 * 1. When using fadd/fmul intrinsics, the order of operations within the
8919 * vector is unspecified (associative math).
8920 * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector
8921 * produce unspecified results.
8922
8923 * Used bisection method. At each step, we partition the vector with previous
8924 * step in half, and the operation is performed on its two halves.
8925 * This takes log2(n) steps where n is the number of elements in the vector.
8926 */
8927
8928static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8930 return __builtin_reduce_add((__v8di)__W);
8931}
8932
8933static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8935 return __builtin_reduce_mul((__v8di)__W);
8936}
8937
8938static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8940 return __builtin_reduce_and((__v8di)__W);
8941}
8942
8943static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8945 return __builtin_reduce_or((__v8di)__W);
8946}
8947
8948static __inline__ long long __DEFAULT_FN_ATTRS512
8950 __W = _mm512_maskz_mov_epi64(__M, __W);
8951 return __builtin_reduce_add((__v8di)__W);
8952}
8953
8954static __inline__ long long __DEFAULT_FN_ATTRS512
8956 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
8957 return __builtin_reduce_mul((__v8di)__W);
8958}
8959
8960static __inline__ long long __DEFAULT_FN_ATTRS512
8962 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __W);
8963 return __builtin_reduce_and((__v8di)__W);
8964}
8965
8966static __inline__ long long __DEFAULT_FN_ATTRS512
8968 __W = _mm512_maskz_mov_epi64(__M, __W);
8969 return __builtin_reduce_or((__v8di)__W);
8970}
8971
8972// -0.0 is used to ignore the start value since it is the neutral value of
8973// floating point addition. For more information, please refer to
8974// https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic
8975static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
8976 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
8977}
8978
8979static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
8980 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
8981}
8982
8983static __inline__ double __DEFAULT_FN_ATTRS512
8985 __W = _mm512_maskz_mov_pd(__M, __W);
8986 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
8987}
8988
8989static __inline__ double __DEFAULT_FN_ATTRS512
8991 __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
8992 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
8993}
8994
8995static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
8997 return __builtin_reduce_add((__v16si)__W);
8998}
8999
9000static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9002 return __builtin_reduce_mul((__v16si)__W);
9003}
9004
9005static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9007 return __builtin_reduce_and((__v16si)__W);
9008}
9009
9010static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9012 return __builtin_reduce_or((__v16si)__W);
9013}
9014
9015static __inline__ int __DEFAULT_FN_ATTRS512
9017 __W = _mm512_maskz_mov_epi32(__M, __W);
9018 return __builtin_reduce_add((__v16si)__W);
9019}
9020
9021static __inline__ int __DEFAULT_FN_ATTRS512
9023 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
9024 return __builtin_reduce_mul((__v16si)__W);
9025}
9026
9027static __inline__ int __DEFAULT_FN_ATTRS512
9029 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __W);
9030 return __builtin_reduce_and((__v16si)__W);
9031}
9032
9033static __inline__ int __DEFAULT_FN_ATTRS512
9035 __W = _mm512_maskz_mov_epi32(__M, __W);
9036 return __builtin_reduce_or((__v16si)__W);
9037}
9038
9039static __inline__ float __DEFAULT_FN_ATTRS512
9041 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9042}
9043
9044static __inline__ float __DEFAULT_FN_ATTRS512
9046 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9047}
9048
9049static __inline__ float __DEFAULT_FN_ATTRS512
9051 __W = _mm512_maskz_mov_ps(__M, __W);
9052 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9053}
9054
9055static __inline__ float __DEFAULT_FN_ATTRS512
9057 __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9058 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9059}
9060
9061static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9063 return __builtin_reduce_max((__v8di)__V);
9064}
9065
9066static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9068 return __builtin_reduce_max((__v8du)__V);
9069}
9070
9071static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9073 return __builtin_reduce_min((__v8di)__V);
9074}
9075
9076static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9078 return __builtin_reduce_min((__v8du)__V);
9079}
9080
9081static __inline__ long long __DEFAULT_FN_ATTRS512
9083 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9084 return __builtin_reduce_max((__v8di)__V);
9085}
9086
9087static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9089 __V = _mm512_maskz_mov_epi64(__M, __V);
9090 return __builtin_reduce_max((__v8du)__V);
9091}
9092
9093static __inline__ long long __DEFAULT_FN_ATTRS512
9095 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9096 return __builtin_reduce_min((__v8di)__V);
9097}
9098
9099static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9101 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V);
9102 return __builtin_reduce_min((__v8du)__V);
9103}
9104static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9106 return __builtin_reduce_max((__v16si)__V);
9107}
9108
9109static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9111 return __builtin_reduce_max((__v16su)__V);
9112}
9113
9114static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9116 return __builtin_reduce_min((__v16si)__V);
9117}
9118
9119static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9121 return __builtin_reduce_min((__v16su)__V);
9122}
9123
9124static __inline__ int __DEFAULT_FN_ATTRS512
9126 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9127 return __builtin_reduce_max((__v16si)__V);
9128}
9129
9130static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9132 __V = _mm512_maskz_mov_epi32(__M, __V);
9133 return __builtin_reduce_max((__v16su)__V);
9134}
9135
9136static __inline__ int __DEFAULT_FN_ATTRS512
9138 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9139 return __builtin_reduce_min((__v16si)__V);
9140}
9141
9142static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9144 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __V);
9145 return __builtin_reduce_min((__v16su)__V);
9146}
9147
9148static __inline__ double __DEFAULT_FN_ATTRS512
9150 return __builtin_ia32_reduce_fmax_pd512(__V);
9151}
9152
9153static __inline__ double __DEFAULT_FN_ATTRS512
9155 return __builtin_ia32_reduce_fmin_pd512(__V);
9156}
9157
9158static __inline__ double __DEFAULT_FN_ATTRS512
9160 __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9161 return __builtin_ia32_reduce_fmax_pd512(__V);
9162}
9163
9164static __inline__ double __DEFAULT_FN_ATTRS512
9166 __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9167 return __builtin_ia32_reduce_fmin_pd512(__V);
9168}
9169
9170static __inline__ float __DEFAULT_FN_ATTRS512
9172 return __builtin_ia32_reduce_fmax_ps512(__V);
9173}
9174
9175static __inline__ float __DEFAULT_FN_ATTRS512
9177 return __builtin_ia32_reduce_fmin_ps512(__V);
9178}
9179
9180static __inline__ float __DEFAULT_FN_ATTRS512
9182 __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9183 return __builtin_ia32_reduce_fmax_ps512(__V);
9184}
9185
9186static __inline__ float __DEFAULT_FN_ATTRS512
9188 __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9189 return __builtin_ia32_reduce_fmin_ps512(__V);
9190}
9191
9192/// Moves the least significant 32 bits of a vector of [16 x i32] to a
9193/// 32-bit signed integer value.
9194///
9195/// \headerfile <x86intrin.h>
9196///
9197/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
9198///
9199/// \param __A
9200/// A vector of [16 x i32]. The least significant 32 bits are moved to the
9201/// destination.
9202/// \returns A 32-bit signed integer containing the moved value.
9203static __inline__ int __DEFAULT_FN_ATTRS512
9205 __v16si __b = (__v16si)__A;
9206 return __b[0];
9207}
9208
9209/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
9210/// locations starting at location \a base_addr at packed 32-bit integer indices
9211/// stored in the lower half of \a vindex scaled by \a scale them in dst.
9212///
9213/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9214///
9215/// \code{.operation}
9216/// FOR j := 0 to 7
9217/// i := j*64
9218/// m := j*32
9219/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9220/// dst[i+63:i] := MEM[addr+63:addr]
9221/// ENDFOR
9222/// dst[MAX:512] := 0
9223/// \endcode
9224#define _mm512_i32logather_pd(vindex, base_addr, scale) \
9225 _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9226
9227/// Loads 8 double-precision (64-bit) floating-point elements from memory
9228/// starting at location \a base_addr at packed 32-bit integer indices stored in
9229/// the lower half of \a vindex scaled by \a scale into dst using writemask
9230/// \a mask (elements are copied from \a src when the corresponding mask bit is
9231/// not set).
9232///
9233/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9234///
9235/// \code{.operation}
9236/// FOR j := 0 to 7
9237/// i := j*64
9238/// m := j*32
9239/// IF mask[j]
9240/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9241/// dst[i+63:i] := MEM[addr+63:addr]
9242/// ELSE
9243/// dst[i+63:i] := src[i+63:i]
9244/// FI
9245/// ENDFOR
9246/// dst[MAX:512] := 0
9247/// \endcode
9248#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \
9249 _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \
9250 (base_addr), (scale))
9251
9252/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9253/// at packed 32-bit integer indices stored in the lower half of \a vindex
9254/// scaled by \a scale and stores them in dst.
9255///
9256/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9257///
9258/// \code{.operation}
9259/// FOR j := 0 to 7
9260/// i := j*64
9261/// m := j*32
9262/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9263/// dst[i+63:i] := MEM[addr+63:addr]
9264/// ENDFOR
9265/// dst[MAX:512] := 0
9266/// \endcode
9267#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
9268 _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9269
9270/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9271/// at packed 32-bit integer indices stored in the lower half of \a vindex
9272/// scaled by \a scale and stores them in dst using writemask \a mask (elements
9273/// are copied from \a src when the corresponding mask bit is not set).
9274///
9275/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9276///
9277/// \code{.operation}
9278/// FOR j := 0 to 7
9279/// i := j*64
9280/// m := j*32
9281/// IF mask[j]
9282/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9283/// dst[i+63:i] := MEM[addr+63:addr]
9284/// ELSE
9285/// dst[i+63:i] := src[i+63:i]
9286/// FI
9287/// ENDFOR
9288/// dst[MAX:512] := 0
9289/// \endcode
9290#define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \
9291 _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \
9292 (base_addr), (scale))
9293
9294/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9295/// and to memory locations starting at location \a base_addr at packed 32-bit
9296/// integer indices stored in \a vindex scaled by \a scale.
9297///
9298/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9299///
9300/// \code{.operation}
9301/// FOR j := 0 to 7
9302/// i := j*64
9303/// m := j*32
9304/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9305/// MEM[addr+63:addr] := v1[i+63:i]
9306/// ENDFOR
9307/// \endcode
9308#define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \
9309 _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9310
9311/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9312/// to memory locations starting at location \a base_addr at packed 32-bit
9313/// integer indices stored in \a vindex scaled by \a scale. Only those elements
9314/// whose corresponding mask bit is set in writemask \a mask are written to
9315/// memory.
9316///
9317/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9318///
9319/// \code{.operation}
9320/// FOR j := 0 to 7
9321/// i := j*64
9322/// m := j*32
9323/// IF mask[j]
9324/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9325/// MEM[addr+63:addr] := a[i+63:i]
9326/// FI
9327/// ENDFOR
9328/// \endcode
9329#define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \
9330 _mm512_mask_i32scatter_pd((base_addr), (mask), \
9331 _mm512_castsi512_si256(vindex), (v1), (scale))
9332
9333/// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in
9334/// memory locations starting at location \a base_addr at packed 32-bit integer
9335/// indices stored in \a vindex scaled by \a scale.
9336///
9337/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9338///
9339/// \code{.operation}
9340/// FOR j := 0 to 7
9341/// i := j*64
9342/// m := j*32
9343/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9344/// MEM[addr+63:addr] := a[i+63:i]
9345/// ENDFOR
9346/// \endcode
9347#define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \
9348 _mm512_i32scatter_epi64((base_addr), \
9349 _mm512_castsi512_si256(vindex), (v1), (scale))
9350
9351/// Stores 8 packed 64-bit integer elements located in a and stores them in
9352/// memory locations starting at location \a base_addr at packed 32-bit integer
9353/// indices stored in \a vindex scaled by scale using writemask \a mask (elements
9354/// whose corresponding mask bit is not set are not written to memory).
9355///
9356/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9357///
9358/// \code{.operation}
9359/// FOR j := 0 to 7
9360/// i := j*64
9361/// m := j*32
9362/// IF mask[j]
9363/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9364/// MEM[addr+63:addr] := a[i+63:i]
9365/// FI
9366/// ENDFOR
9367/// \endcode
9368#define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \
9369 _mm512_mask_i32scatter_epi64((base_addr), (mask), \
9370 _mm512_castsi512_si256(vindex), (v1), (scale))
9371
9372#undef __DEFAULT_FN_ATTRS512
9373#undef __DEFAULT_FN_ATTRS128
9374#undef __DEFAULT_FN_ATTRS
9375#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
9376#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
9377#undef __DEFAULT_FN_ATTRS_CONSTEXPR
9378
9379#endif /* __AVX512FINTRIN_H */
#define __L(__X)
#define __DEFAULT_FN_ATTRS
static __inline__ vector float vector float __b
Definition altivec.h:578
static __inline__ uint32_t volatile uint32_t * __p
Definition arm_acle.h:57
return __v
Definition arm_acle.h:88
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
Definition avx2intrin.h:30
#define __DEFAULT_FN_ATTRS512_CONSTEXPR
#define __DEFAULT_FN_ATTRS512
#define __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR _cvtmask16_u32(__mmask16 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _cvtu32_mask16(unsigned int __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi64(long long __d)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi64(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f32x4(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi64(__m512i __A, unsigned int __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov(__mmask16 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A)
Moves the least significant 32 bits of a vector of [16 x i32] to a 32-bit signed integer value.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_ps(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi64(__m512i __W)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(void *__P, __m512d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi32(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi32(int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8, int e9, int e10, int e11, int e12, int e13, int e14, int e15)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi64(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_si256(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movehdup_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5, float e6, float e7, float e8, float e9, float e10, float e11, float e12, float e13, float e14, float e15)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_pd(double __w)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
_MM_MANTISSA_NORM_ENUM
@ _MM_MANT_NORM_p5_1
@ _MM_MANT_NORM_p5_2
@ _MM_MANT_NORM_1_2
@ _MM_MANT_NORM_p75_1p5
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i64x4(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5, double e6, double e7)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps512_ps256(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi8(char __w)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3, long long e4, long long e5, long long e6, long long e7)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_int2mask(int __a)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask2int(__mmask16 __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_pd(__m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi64(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movedup_pd(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_pd(__m512d __a, __m512d __b)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtps_pd(__m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
_MM_CMPINT_ENUM
@ _MM_CMPINT_NE
@ _MM_CMPINT_NLT
@ _MM_CMPINT_LE
@ _MM_CMPINT_EQ
@ _MM_CMPINT_LT
@ _MM_CMPINT_UNUSED
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_knot(__mmask16 __M)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastq_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_pd(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
unsigned char __mmask8
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
_MM_TERNLOG_ENUM
A helper to represent the ternary logic operations among vector A, B and C.
@ _MM_TERNLOG_A
@ _MM_TERNLOG_B
@ _MM_TERNLOG_C
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi32(__m512i __W)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(void *__P, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
_MM_MANTISSA_SIGN_ENUM
@ _MM_MANT_SIGN_zero
@ _MM_MANT_SIGN_src
@ _MM_MANT_SIGN_nan
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_ps(__m512 __a, __m512 __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi32(int e0, int e1, int e2, int e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_ps(float __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi32(__m512i __A, __m512i __B)
_MM_PERM_ENUM
@ _MM_PERM_BBCA
@ _MM_PERM_BCBD
@ _MM_PERM_DAAB
@ _MM_PERM_CBBD
@ _MM_PERM_DCCC
@ _MM_PERM_CDBB
@ _MM_PERM_DDDD
@ _MM_PERM_CCCC
@ _MM_PERM_CADA
@ _MM_PERM_BACD
@ _MM_PERM_CCAD
@ _MM_PERM_ABDB
@ _MM_PERM_BBBD
@ _MM_PERM_DCAB
@ _MM_PERM_BABC
@ _MM_PERM_AACD
@ _MM_PERM_BBAB
@ _MM_PERM_DCDB
@ _MM_PERM_BACC
@ _MM_PERM_ABDA
@ _MM_PERM_ACCC
@ _MM_PERM_ADAC
@ _MM_PERM_CCCD
@ _MM_PERM_CADD
@ _MM_PERM_ACCB
@ _MM_PERM_BBDB
@ _MM_PERM_ABBB
@ _MM_PERM_BACB
@ _MM_PERM_CDCA
@ _MM_PERM_ACBC
@ _MM_PERM_ADCB
@ _MM_PERM_BBBC
@ _MM_PERM_DBBA
@ _MM_PERM_BBBB
@ _MM_PERM_DDDB
@ _MM_PERM_CAAA
@ _MM_PERM_ADBB
@ _MM_PERM_ACDB
@ _MM_PERM_DCAD
@ _MM_PERM_DBBC
@ _MM_PERM_BAAB
@ _MM_PERM_BDDD
@ _MM_PERM_BBAD
@ _MM_PERM_DDBA
@ _MM_PERM_CDCD
@ _MM_PERM_CCCA
@ _MM_PERM_DBBB
@ _MM_PERM_DAAD
@ _MM_PERM_DCBA
@ _MM_PERM_CCBC
@ _MM_PERM_ADDD
@ _MM_PERM_DBAC
@ _MM_PERM_ABAB
@ _MM_PERM_CBDB
@ _MM_PERM_CDBC
@ _MM_PERM_AABC
@ _MM_PERM_DABD
@ _MM_PERM_CBBA
@ _MM_PERM_CBAA
@ _MM_PERM_BDDB
@ _MM_PERM_CABC
@ _MM_PERM_BDBD
@ _MM_PERM_BCAD
@ _MM_PERM_ACBA
@ _MM_PERM_ADBA
@ _MM_PERM_ADBC
@ _MM_PERM_DBCB
@ _MM_PERM_CBDC
@ _MM_PERM_CBAD
@ _MM_PERM_ABCC
@ _MM_PERM_AAAD
@ _MM_PERM_CBAC
@ _MM_PERM_CCDA
@ _MM_PERM_CDAC
@ _MM_PERM_BADD
@ _MM_PERM_DAAC
@ _MM_PERM_BCCC
@ _MM_PERM_DBBD
@ _MM_PERM_DDAC
@ _MM_PERM_DACD
@ _MM_PERM_BAAC
@ _MM_PERM_ACCA
@ _MM_PERM_ABDD
@ _MM_PERM_BBCC
@ _MM_PERM_DAAA
@ _MM_PERM_CAAB
@ _MM_PERM_BCDB
@ _MM_PERM_ACBB
@ _MM_PERM_CDAB
@ _MM_PERM_DBDB
@ _MM_PERM_AABB
@ _MM_PERM_DBDA
@ _MM_PERM_BCBA
@ _MM_PERM_CBAB
@ _MM_PERM_DCDC
@ _MM_PERM_BBCB
@ _MM_PERM_CDCB
@ _MM_PERM_AACA
@ _MM_PERM_ACBD
@ _MM_PERM_AAAB
@ _MM_PERM_DCCB
@ _MM_PERM_ADDB
@ _MM_PERM_AAAA
@ _MM_PERM_AACC
@ _MM_PERM_BDDC
@ _MM_PERM_CBBC
@ _MM_PERM_DDCC
@ _MM_PERM_CABD
@ _MM_PERM_AADC
@ _MM_PERM_BCBC
@ _MM_PERM_BCCA
@ _MM_PERM_CCBD
@ _MM_PERM_CBBB
@ _MM_PERM_CDBA
@ _MM_PERM_CACD
@ _MM_PERM_BDAD
@ _MM_PERM_ADCA
@ _MM_PERM_AAAC
@ _MM_PERM_BDDA
@ _MM_PERM_CCAC
@ _MM_PERM_ACDC
@ _MM_PERM_DBCA
@ _MM_PERM_DBAA
@ _MM_PERM_AABD
@ _MM_PERM_CDCC
@ _MM_PERM_DCAA
@ _MM_PERM_DDDC
@ _MM_PERM_CDDB
@ _MM_PERM_AABA
@ _MM_PERM_DDBB
@ _MM_PERM_CDDA
@ _MM_PERM_AADD
@ _MM_PERM_BADC
@ _MM_PERM_BDBA
@ _MM_PERM_DBDD
@ _MM_PERM_BDAC
@ _MM_PERM_DBDC
@ _MM_PERM_BBBA
@ _MM_PERM_DDBC
@ _MM_PERM_BAAA
@ _MM_PERM_BDCC
@ _MM_PERM_DDAB
@ _MM_PERM_BCCB
@ _MM_PERM_BCCD
@ _MM_PERM_ADBD
@ _MM_PERM_ADCC
@ _MM_PERM_CCBB
@ _MM_PERM_CDAA
@ _MM_PERM_BBDA
@ _MM_PERM_CACC
@ _MM_PERM_DCBB
@ _MM_PERM_DABA
@ _MM_PERM_BADB
@ _MM_PERM_ABCA
@ _MM_PERM_CBCC
@ _MM_PERM_ABAD
@ _MM_PERM_BDBC
@ _MM_PERM_DDDA
@ _MM_PERM_ADAB
@ _MM_PERM_CADB
@ _MM_PERM_ADAA
@ _MM_PERM_ACAC
@ _MM_PERM_DADD
@ _MM_PERM_BABD
@ _MM_PERM_ACCD
@ _MM_PERM_CCAA
@ _MM_PERM_AADA
@ _MM_PERM_BDCA
@ _MM_PERM_CDDD
@ _MM_PERM_ABBD
@ _MM_PERM_ACAA
@ _MM_PERM_ACDD
@ _MM_PERM_DABB
@ _MM_PERM_CCCB
@ _MM_PERM_AADB
@ _MM_PERM_DBAD
@ _MM_PERM_BBDD
@ _MM_PERM_BCDC
@ _MM_PERM_CABA
@ _MM_PERM_BBAA
@ _MM_PERM_ADAD
@ _MM_PERM_BADA
@ _MM_PERM_DCDA
@ _MM_PERM_ABBA
@ _MM_PERM_ACAB
@ _MM_PERM_CCDD
@ _MM_PERM_CADC
@ _MM_PERM_DDCB
@ _MM_PERM_BABB
@ _MM_PERM_CCDB
@ _MM_PERM_DDAD
@ _MM_PERM_DBCC
@ _MM_PERM_BCBB
@ _MM_PERM_ADDC
@ _MM_PERM_CCBA
@ _MM_PERM_ABCD
@ _MM_PERM_BCAB
@ _MM_PERM_DCBC
@ _MM_PERM_BCDD
@ _MM_PERM_CCDC
@ _MM_PERM_ABAC
@ _MM_PERM_CBCB
@ _MM_PERM_CCAB
@ _MM_PERM_DDCD
@ _MM_PERM_DACA
@ _MM_PERM_ACAD
@ _MM_PERM_BABA
@ _MM_PERM_CBCD
@ _MM_PERM_CAAD
@ _MM_PERM_DCDD
@ _MM_PERM_BDBB
@ _MM_PERM_BCAA
@ _MM_PERM_ABDC
@ _MM_PERM_BBCD
@ _MM_PERM_CAAC
@ _MM_PERM_BBAC
@ _MM_PERM_CBCA
@ _MM_PERM_DCAC
@ _MM_PERM_ABAA
@ _MM_PERM_CACB
@ _MM_PERM_BBDC
@ _MM_PERM_CDAD
@ _MM_PERM_ADCD
@ _MM_PERM_DADB
@ _MM_PERM_DBCD
@ _MM_PERM_DACC
@ _MM_PERM_DACB
@ _MM_PERM_DCBD
@ _MM_PERM_CACA
@ _MM_PERM_ABBC
@ _MM_PERM_DCCA
@ _MM_PERM_DABC
@ _MM_PERM_CBDD
@ _MM_PERM_DDBD
@ _MM_PERM_DDCA
@ _MM_PERM_BDCD
@ _MM_PERM_CDBD
@ _MM_PERM_ABCB
@ _MM_PERM_CDDC
@ _MM_PERM_AACB
@ _MM_PERM_DDAA
@ _MM_PERM_ADDA
@ _MM_PERM_DADA
@ _MM_PERM_BCDA
@ _MM_PERM_BDAB
@ _MM_PERM_BAAD
@ _MM_PERM_DBAB
@ _MM_PERM_DCCD
@ _MM_PERM_CABB
@ _MM_PERM_BDAA
@ _MM_PERM_BDCB
@ _MM_PERM_ACDA
@ _MM_PERM_DADC
@ _MM_PERM_CBDA
@ _MM_PERM_BCAC
@ _MM_PERM_BACA
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi32(int __s)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_pd(double e0, double e1, double e2, double e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
unsigned short __mmask16
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_si512(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_ps(float e0, float e1, float e2, float e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi16(short __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd(__m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_moveldup_ps(__m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition avxintrin.h:3620
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition avxintrin.h:4291
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition avxintrin.h:4279
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4303
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
Definition emmintrin.h:120
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
Definition emmintrin.h:199
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
Definition emmintrin.h:80
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3878
static __inline__ void int __a
Definition emmintrin.h:4077
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3493
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
Definition emmintrin.h:1887
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition emmintrin.h:1867
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
Definition emmintrin.h:159
static __inline__ void short __D
Definition immintrin.h:342
static __inline__ void const void * __src
__inline unsigned int unsigned int unsigned int * __P
Definition bmi2intrin.h:25
__inline unsigned int unsigned int __Y
Definition bmi2intrin.h:19
#define _MM_FROUND_FLOOR
Definition smmintrin.h:41
#define _MM_FROUND_CEIL
Definition smmintrin.h:42
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
Definition xmmintrin.h:2801
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:160
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
Definition xmmintrin.h:119
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:79
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:2018
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
Definition xmmintrin.h:200