clang 22.0.0git
avx512fintrin.h
Go to the documentation of this file.
1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
15
16typedef char __v64qi __attribute__((__vector_size__(64)));
17typedef short __v32hi __attribute__((__vector_size__(64)));
18typedef double __v8df __attribute__((__vector_size__(64)));
19typedef float __v16sf __attribute__((__vector_size__(64)));
20typedef long long __v8di __attribute__((__vector_size__(64)));
21typedef int __v16si __attribute__((__vector_size__(64)));
22
23/* Unsigned types */
24typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28
29/* We need an explicitly signed variant for char. Note that this shouldn't
30 * appear in the interface though. */
31typedef signed char __v64qs __attribute__((__vector_size__(64)));
32
33typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
34typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
35typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
36
37typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
38typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
39typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
40
41typedef unsigned char __mmask8;
42typedef unsigned short __mmask16;
43
44/* Rounding mode macros. */
45#define _MM_FROUND_TO_NEAREST_INT 0x00
46#define _MM_FROUND_TO_NEG_INF 0x01
47#define _MM_FROUND_TO_POS_INF 0x02
48#define _MM_FROUND_TO_ZERO 0x03
49#define _MM_FROUND_CUR_DIRECTION 0x04
50
51/* Constants for integer comparison predicates */
52typedef enum {
53 _MM_CMPINT_EQ, /* Equal */
54 _MM_CMPINT_LT, /* Less than */
55 _MM_CMPINT_LE, /* Less than or Equal */
57 _MM_CMPINT_NE, /* Not Equal */
58 _MM_CMPINT_NLT, /* Not Less than */
59#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
60 _MM_CMPINT_NLE /* Not Less than or Equal */
61#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
63
64typedef enum
65{
153
154typedef enum
155{
156 _MM_MANT_NORM_1_2, /* interval [1, 2) */
157 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
158 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
159 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
161
162typedef enum
163{
164 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
165 _MM_MANT_SIGN_zero, /* sign = 0 */
166 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
168
169/* Define the default attributes for the functions in this file. */
170#define __DEFAULT_FN_ATTRS512 \
171 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
172 __min_vector_width__(512)))
173#define __DEFAULT_FN_ATTRS128 \
174 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
175 __min_vector_width__(128)))
176#define __DEFAULT_FN_ATTRS \
177 __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
178
179#if defined(__cplusplus) && (__cplusplus >= 201103L)
180#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
181#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
182#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
183#else
184#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
185#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
186#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
187#endif
188
189/* Create vectors with repeated elements */
190
191static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
193 return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0};
194}
195
196#define _mm512_setzero_epi32 _mm512_setzero_si512
197
198static __inline__ __m512d __DEFAULT_FN_ATTRS512
200{
201 return (__m512d)__builtin_ia32_undef512();
202}
203
204static __inline__ __m512 __DEFAULT_FN_ATTRS512
206{
207 return (__m512)__builtin_ia32_undef512();
208}
209
210static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void) {
211 return (__m512)__builtin_ia32_undef512();
212}
213
214static __inline__ __m512i __DEFAULT_FN_ATTRS512
216{
217 return (__m512i)__builtin_ia32_undef512();
218}
219
220static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
222 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
224}
225
226static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
227_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) {
228 return (__m512i)__builtin_ia32_selectd_512(__M,
229 (__v16si) _mm512_broadcastd_epi32(__A),
230 (__v16si) __O);
231}
232
233static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
235 return (__m512i)__builtin_ia32_selectd_512(__M,
236 (__v16si) _mm512_broadcastd_epi32(__A),
237 (__v16si) _mm512_setzero_si512());
238}
239
240static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
242 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
243 0, 0, 0, 0, 0, 0, 0, 0);
244}
245
246static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
247_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) {
248 return (__m512i)__builtin_ia32_selectq_512(
249 __M, (__v8di)_mm512_broadcastq_epi64(__A), (__v8di)__O);
250}
251
252static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
254 return (__m512i)__builtin_ia32_selectq_512(__M,
255 (__v8di) _mm512_broadcastq_epi64(__A),
256 (__v8di) _mm512_setzero_si512());
257}
258
260 return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
261 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
262}
263
264#define _mm512_setzero _mm512_setzero_ps
265
266static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
268 return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
269}
270
271static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
273{
274 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
275 __w, __w, __w, __w, __w, __w, __w, __w };
276}
277
278static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
279_mm512_set1_pd(double __w)
280{
281 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
282}
283
284static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
286{
287 return __extension__ (__m512i)(__v64qi){
288 __w, __w, __w, __w, __w, __w, __w, __w,
289 __w, __w, __w, __w, __w, __w, __w, __w,
290 __w, __w, __w, __w, __w, __w, __w, __w,
291 __w, __w, __w, __w, __w, __w, __w, __w,
292 __w, __w, __w, __w, __w, __w, __w, __w,
293 __w, __w, __w, __w, __w, __w, __w, __w,
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w };
296}
297
298static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
300{
301 return __extension__ (__m512i)(__v32hi){
302 __w, __w, __w, __w, __w, __w, __w, __w,
303 __w, __w, __w, __w, __w, __w, __w, __w,
304 __w, __w, __w, __w, __w, __w, __w, __w,
305 __w, __w, __w, __w, __w, __w, __w, __w };
306}
307
308static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
310{
311 return __extension__ (__m512i)(__v16si){
312 __s, __s, __s, __s, __s, __s, __s, __s,
313 __s, __s, __s, __s, __s, __s, __s, __s };
314}
315
316static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
318 return (__m512i)__builtin_ia32_selectd_512(__M,
319 (__v16si)_mm512_set1_epi32(__A),
320 (__v16si)_mm512_setzero_si512());
321}
322
323static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
324_mm512_set1_epi64(long long __d)
325{
326 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
327}
328
329static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
331 return (__m512i)__builtin_ia32_selectq_512(__M,
332 (__v8di)_mm512_set1_epi64(__A),
333 (__v8di)_mm512_setzero_si512());
334}
335
336static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
338 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
339 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
340}
341
342static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
343_mm512_set4_epi32(int __A, int __B, int __C, int __D) {
344 return __extension__ (__m512i)(__v16si)
345 { __D, __C, __B, __A, __D, __C, __B, __A,
346 __D, __C, __B, __A, __D, __C, __B, __A };
347}
348
349static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
350_mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D) {
351 return __extension__ (__m512i) (__v8di)
352 { __D, __C, __B, __A, __D, __C, __B, __A };
353}
354
355static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
356_mm512_set4_pd(double __A, double __B, double __C, double __D) {
357 return __extension__ (__m512d)
358 { __D, __C, __B, __A, __D, __C, __B, __A };
359}
360
361static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
362_mm512_set4_ps(float __A, float __B, float __C, float __D) {
363 return __extension__ (__m512)
364 { __D, __C, __B, __A, __D, __C, __B, __A,
365 __D, __C, __B, __A, __D, __C, __B, __A };
366}
367
368static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
369_mm512_setr4_epi32(int e0, int e1, int e2, int e3) {
370 return _mm512_set4_epi32(e3, e2, e1, e0);
371}
372
373static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
374_mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3) {
375 return _mm512_set4_epi64(e3, e2, e1, e0);
376}
377
378static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
379_mm512_setr4_pd(double e0, double e1, double e2, double e3) {
380 return _mm512_set4_pd(e3, e2, e1, e0);
381}
382
383static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
384_mm512_setr4_ps(float e0, float e1, float e2, float e3) {
385 return _mm512_set4_ps(e3, e2, e1, e0);
386}
387
388static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
390 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
391 0, 0, 0, 0, 0, 0, 0, 0);
392}
393
394/* Cast between vector types */
395
396static __inline __m512d __DEFAULT_FN_ATTRS512
398{
399 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
400 1, 2, 3, 4, 5, 6, 7);
401}
402
403static __inline __m512 __DEFAULT_FN_ATTRS512
405{
406 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
407 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
408}
409
410static __inline __m128d __DEFAULT_FN_ATTRS512_CONSTEXPR
412{
413 return __builtin_shufflevector(__a, __a, 0, 1);
414}
415
416static __inline __m256d __DEFAULT_FN_ATTRS512_CONSTEXPR
418{
419 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
420}
421
422static __inline __m128 __DEFAULT_FN_ATTRS512_CONSTEXPR
424{
425 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
426}
427
428static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
430 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
431}
432
433static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
434_mm512_castpd_ps (__m512d __A)
435{
436 return (__m512) (__A);
437}
438
439static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
441{
442 return (__m512i) (__A);
443}
444
445static __inline__ __m512d __DEFAULT_FN_ATTRS512
447{
448 __m256d __B = __builtin_nondeterministic_value(__B);
449 return __builtin_shufflevector(
450 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
451 __B, 0, 1, 2, 3, 4, 5, 6, 7);
452}
453
454static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
456{
457 return (__m512d) (__A);
458}
459
460static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
462{
463 return (__m512i) (__A);
464}
465
466static __inline__ __m512 __DEFAULT_FN_ATTRS512
468{
469 __m256 __B = __builtin_nondeterministic_value(__B);
470 return __builtin_shufflevector(
471 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
472 __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
473}
474
475static __inline__ __m512i __DEFAULT_FN_ATTRS512
477{
478 __m256i __B = __builtin_nondeterministic_value(__B);
479 return __builtin_shufflevector(
480 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
481 __B, 0, 1, 2, 3, 4, 5, 6, 7);
482}
483
484static __inline__ __m512i __DEFAULT_FN_ATTRS512
486{
487 return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
488}
489
490static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
492{
493 return (__m512) (__A);
494}
495
496static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
498{
499 return (__m512d) (__A);
500}
501
502static __inline __m128i __DEFAULT_FN_ATTRS512_CONSTEXPR
504{
505 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
506}
507
508static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR
510 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
511}
512
515 return (__mmask16)__a;
516}
517
518static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
520 return (int)__a;
521}
522
523/// Constructs a 512-bit floating-point vector of [8 x double] from a
524/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
525/// contain the value of the source vector. The upper 384 bits are set
526/// to zero.
527///
528/// \headerfile <x86intrin.h>
529///
530/// This intrinsic has no corresponding instruction.
531///
532/// \param __a
533/// A 128-bit vector of [2 x double].
534/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
535/// contain the value of the parameter. The upper 384 bits are set to zero.
536static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
538 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
539}
540
541/// Constructs a 512-bit floating-point vector of [8 x double] from a
542/// 256-bit floating-point vector of [4 x double]. The lower 256 bits
543/// contain the value of the source vector. The upper 256 bits are set
544/// to zero.
545///
546/// \headerfile <x86intrin.h>
547///
548/// This intrinsic has no corresponding instruction.
549///
550/// \param __a
551/// A 256-bit vector of [4 x double].
552/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
553/// contain the value of the parameter. The upper 256 bits are set to zero.
554static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
556 return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
557}
558
559/// Constructs a 512-bit floating-point vector of [16 x float] from a
560/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
561/// the value of the source vector. The upper 384 bits are set to zero.
562///
563/// \headerfile <x86intrin.h>
564///
565/// This intrinsic has no corresponding instruction.
566///
567/// \param __a
568/// A 128-bit vector of [4 x float].
569/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
570/// contain the value of the parameter. The upper 384 bits are set to zero.
571static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
573 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
574}
575
576/// Constructs a 512-bit floating-point vector of [16 x float] from a
577/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
578/// the value of the source vector. The upper 256 bits are set to zero.
579///
580/// \headerfile <x86intrin.h>
581///
582/// This intrinsic has no corresponding instruction.
583///
584/// \param __a
585/// A 256-bit vector of [8 x float].
586/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
587/// contain the value of the parameter. The upper 256 bits are set to zero.
588static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
590 return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
591}
592
593/// Constructs a 512-bit integer vector from a 128-bit integer vector.
594/// The lower 128 bits contain the value of the source vector. The upper
595/// 384 bits are set to zero.
596///
597/// \headerfile <x86intrin.h>
598///
599/// This intrinsic has no corresponding instruction.
600///
601/// \param __a
602/// A 128-bit integer vector.
603/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
604/// the parameter. The upper 384 bits are set to zero.
605static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
607 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
608}
609
610/// Constructs a 512-bit integer vector from a 256-bit integer vector.
611/// The lower 256 bits contain the value of the source vector. The upper
612/// 256 bits are set to zero.
613///
614/// \headerfile <x86intrin.h>
615///
616/// This intrinsic has no corresponding instruction.
617///
618/// \param __a
619/// A 256-bit integer vector.
620/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
621/// the parameter. The upper 256 bits are set to zero.
622static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
624 return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
625}
626
627/* Bitwise operators */
628static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
629_mm512_and_epi32(__m512i __a, __m512i __b)
630{
631 return (__m512i)((__v16su)__a & (__v16su)__b);
632}
633
634static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
635_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) {
636 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
637 (__v16si) _mm512_and_epi32(__a, __b),
638 (__v16si) __src);
639}
640
641static __inline__ __m512i __DEFAULT_FN_ATTRS512
643{
645 __k, __a, __b);
646}
647
648static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
649_mm512_and_epi64(__m512i __a, __m512i __b)
650{
651 return (__m512i)((__v8du)__a & (__v8du)__b);
652}
653
654static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
655_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) {
656 return (__m512i)__builtin_ia32_selectq_512(
657 (__mmask8)__k, (__v8di)_mm512_and_epi64(__a, __b), (__v8di)__src);
658}
659
660static __inline__ __m512i __DEFAULT_FN_ATTRS512
662{
664 __k, __a, __b);
665}
666
667static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
668_mm512_andnot_si512 (__m512i __A, __m512i __B)
669{
670 return (__m512i)(~(__v8du)__A & (__v8du)__B);
671}
672
673static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
674_mm512_andnot_epi32 (__m512i __A, __m512i __B)
675{
676 return (__m512i)(~(__v16su)__A & (__v16su)__B);
677}
678
679static __inline__ __m512i __DEFAULT_FN_ATTRS512
680_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
681{
682 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
683 (__v16si)_mm512_andnot_epi32(__A, __B),
684 (__v16si)__W);
685}
686
687static __inline__ __m512i __DEFAULT_FN_ATTRS512
688_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
689{
691 __U, __A, __B);
692}
693
694static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
695_mm512_andnot_epi64(__m512i __A, __m512i __B)
696{
697 return (__m512i)(~(__v8du)__A & (__v8du)__B);
698}
699
700static __inline__ __m512i __DEFAULT_FN_ATTRS512
701_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
702{
703 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
704 (__v8di)_mm512_andnot_epi64(__A, __B),
705 (__v8di)__W);
706}
707
708static __inline__ __m512i __DEFAULT_FN_ATTRS512
709_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
710{
712 __U, __A, __B);
713}
714
715static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
716_mm512_or_epi32(__m512i __a, __m512i __b)
717{
718 return (__m512i)((__v16su)__a | (__v16su)__b);
719}
720
721static __inline__ __m512i __DEFAULT_FN_ATTRS512
722_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
723{
724 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
725 (__v16si)_mm512_or_epi32(__a, __b),
726 (__v16si)__src);
727}
728
729static __inline__ __m512i __DEFAULT_FN_ATTRS512
731{
732 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
733}
734
735static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
736_mm512_or_epi64(__m512i __a, __m512i __b)
737{
738 return (__m512i)((__v8du)__a | (__v8du)__b);
739}
740
741static __inline__ __m512i __DEFAULT_FN_ATTRS512
742_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
743{
744 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
745 (__v8di)_mm512_or_epi64(__a, __b),
746 (__v8di)__src);
747}
748
749static __inline__ __m512i __DEFAULT_FN_ATTRS512
750_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
751{
752 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
753}
754
755static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
756_mm512_xor_epi32(__m512i __a, __m512i __b)
757{
758 return (__m512i)((__v16su)__a ^ (__v16su)__b);
759}
760
761static __inline__ __m512i __DEFAULT_FN_ATTRS512
762_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
763{
764 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
765 (__v16si)_mm512_xor_epi32(__a, __b),
766 (__v16si)__src);
767}
768
769static __inline__ __m512i __DEFAULT_FN_ATTRS512
771{
772 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
773}
774
775static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
776_mm512_xor_epi64(__m512i __a, __m512i __b)
777{
778 return (__m512i)((__v8du)__a ^ (__v8du)__b);
779}
780
781static __inline__ __m512i __DEFAULT_FN_ATTRS512
782_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
783{
784 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
785 (__v8di)_mm512_xor_epi64(__a, __b),
786 (__v8di)__src);
787}
788
789static __inline__ __m512i __DEFAULT_FN_ATTRS512
791{
792 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
793}
794
795static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
796_mm512_and_si512(__m512i __a, __m512i __b)
797{
798 return (__m512i)((__v8du)__a & (__v8du)__b);
799}
800
801static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
802_mm512_or_si512(__m512i __a, __m512i __b)
803{
804 return (__m512i)((__v8du)__a | (__v8du)__b);
805}
806
807static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
808_mm512_xor_si512(__m512i __a, __m512i __b)
809{
810 return (__m512i)((__v8du)__a ^ (__v8du)__b);
811}
812
813/* Arithmetic */
814
815static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
816_mm512_add_pd(__m512d __a, __m512d __b) {
817 return (__m512d)((__v8df)__a + (__v8df)__b);
818}
819
820static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
821_mm512_add_ps(__m512 __a, __m512 __b) {
822 return (__m512)((__v16sf)__a + (__v16sf)__b);
823}
824
825static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
826_mm512_mul_pd(__m512d __a, __m512d __b) {
827 return (__m512d)((__v8df)__a * (__v8df)__b);
828}
829
830static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
831_mm512_mul_ps(__m512 __a, __m512 __b) {
832 return (__m512)((__v16sf)__a * (__v16sf)__b);
833}
834
835static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
836_mm512_sub_pd(__m512d __a, __m512d __b) {
837 return (__m512d)((__v8df)__a - (__v8df)__b);
838}
839
840static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
841_mm512_sub_ps(__m512 __a, __m512 __b) {
842 return (__m512)((__v16sf)__a - (__v16sf)__b);
843}
844
845static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
846_mm512_add_epi64(__m512i __A, __m512i __B) {
847 return (__m512i) ((__v8du) __A + (__v8du) __B);
848}
849
850static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
851_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
852 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
853 (__v8di)_mm512_add_epi64(__A, __B),
854 (__v8di)__W);
855}
856
857static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
858_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
859 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
860 (__v8di)_mm512_add_epi64(__A, __B),
861 (__v8di)_mm512_setzero_si512());
862}
863
864static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
865_mm512_sub_epi64(__m512i __A, __m512i __B) {
866 return (__m512i) ((__v8du) __A - (__v8du) __B);
867}
868
869static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
870_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
871 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
872 (__v8di)_mm512_sub_epi64(__A, __B),
873 (__v8di)__W);
874}
875
876static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
877_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
878 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
879 (__v8di)_mm512_sub_epi64(__A, __B),
880 (__v8di)_mm512_setzero_si512());
881}
882
883static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
884_mm512_add_epi32(__m512i __A, __m512i __B) {
885 return (__m512i) ((__v16su) __A + (__v16su) __B);
886}
887
888static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
889_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
890 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
891 (__v16si)_mm512_add_epi32(__A, __B),
892 (__v16si)__W);
893}
894
895static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
896_mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
897 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
898 (__v16si)_mm512_add_epi32(__A, __B),
899 (__v16si)_mm512_setzero_si512());
900}
901
902static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
903_mm512_sub_epi32(__m512i __A, __m512i __B) {
904 return (__m512i) ((__v16su) __A - (__v16su) __B);
905}
906
907static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
908_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
909 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
910 (__v16si)_mm512_sub_epi32(__A, __B),
911 (__v16si)__W);
912}
913
914static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
915_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
916 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
917 (__v16si)_mm512_sub_epi32(__A, __B),
918 (__v16si)_mm512_setzero_si512());
919}
920
921#define _mm512_max_round_pd(A, B, R) \
922 ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
923 (__v8df)(__m512d)(B), (int)(R)))
924
925#define _mm512_mask_max_round_pd(W, U, A, B, R) \
926 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
927 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
928 (__v8df)(W)))
929
930#define _mm512_maskz_max_round_pd(U, A, B, R) \
931 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
932 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
933 (__v8df)_mm512_setzero_pd()))
934
935static __inline__ __m512d __DEFAULT_FN_ATTRS512
936_mm512_max_pd(__m512d __A, __m512d __B)
937{
938 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
940}
941
942static __inline__ __m512d __DEFAULT_FN_ATTRS512
943_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
944{
945 return (__m512d)__builtin_ia32_selectpd_512(__U,
946 (__v8df)_mm512_max_pd(__A, __B),
947 (__v8df)__W);
948}
949
950static __inline__ __m512d __DEFAULT_FN_ATTRS512
951_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
952{
953 return (__m512d)__builtin_ia32_selectpd_512(__U,
954 (__v8df)_mm512_max_pd(__A, __B),
955 (__v8df)_mm512_setzero_pd());
956}
957
958#define _mm512_max_round_ps(A, B, R) \
959 ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
960 (__v16sf)(__m512)(B), (int)(R)))
961
962#define _mm512_mask_max_round_ps(W, U, A, B, R) \
963 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
964 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
965 (__v16sf)(W)))
966
967#define _mm512_maskz_max_round_ps(U, A, B, R) \
968 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
969 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
970 (__v16sf)_mm512_setzero_ps()))
971
972static __inline__ __m512 __DEFAULT_FN_ATTRS512
973_mm512_max_ps(__m512 __A, __m512 __B)
974{
975 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
977}
978
979static __inline__ __m512 __DEFAULT_FN_ATTRS512
980_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
981{
982 return (__m512)__builtin_ia32_selectps_512(__U,
983 (__v16sf)_mm512_max_ps(__A, __B),
984 (__v16sf)__W);
985}
986
987static __inline__ __m512 __DEFAULT_FN_ATTRS512
988_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
989{
990 return (__m512)__builtin_ia32_selectps_512(__U,
991 (__v16sf)_mm512_max_ps(__A, __B),
992 (__v16sf)_mm512_setzero_ps());
993}
994
995static __inline__ __m128 __DEFAULT_FN_ATTRS128
996_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
997 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
998 (__v4sf) __B,
999 (__v4sf) __W,
1000 (__mmask8) __U,
1002}
1003
1004static __inline__ __m128 __DEFAULT_FN_ATTRS128
1005_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1006 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1007 (__v4sf) __B,
1008 (__v4sf) _mm_setzero_ps (),
1009 (__mmask8) __U,
1011}
1012
1013#define _mm_max_round_ss(A, B, R) \
1014 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1015 (__v4sf)(__m128)(B), \
1016 (__v4sf)_mm_setzero_ps(), \
1017 (__mmask8)-1, (int)(R)))
1018
1019#define _mm_mask_max_round_ss(W, U, A, B, R) \
1020 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1021 (__v4sf)(__m128)(B), \
1022 (__v4sf)(__m128)(W), (__mmask8)(U), \
1023 (int)(R)))
1024
1025#define _mm_maskz_max_round_ss(U, A, B, R) \
1026 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1027 (__v4sf)(__m128)(B), \
1028 (__v4sf)_mm_setzero_ps(), \
1029 (__mmask8)(U), (int)(R)))
1030
1031static __inline__ __m128d __DEFAULT_FN_ATTRS128
1032_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1033 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1034 (__v2df) __B,
1035 (__v2df) __W,
1036 (__mmask8) __U,
1038}
1039
1040static __inline__ __m128d __DEFAULT_FN_ATTRS128
1041_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1042 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1043 (__v2df) __B,
1044 (__v2df) _mm_setzero_pd (),
1045 (__mmask8) __U,
1047}
1048
1049#define _mm_max_round_sd(A, B, R) \
1050 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1051 (__v2df)(__m128d)(B), \
1052 (__v2df)_mm_setzero_pd(), \
1053 (__mmask8)-1, (int)(R)))
1054
1055#define _mm_mask_max_round_sd(W, U, A, B, R) \
1056 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1057 (__v2df)(__m128d)(B), \
1058 (__v2df)(__m128d)(W), \
1059 (__mmask8)(U), (int)(R)))
1060
1061#define _mm_maskz_max_round_sd(U, A, B, R) \
1062 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1063 (__v2df)(__m128d)(B), \
1064 (__v2df)_mm_setzero_pd(), \
1065 (__mmask8)(U), (int)(R)))
1066
1067static __inline __m512i
1069 return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1070}
1071
1072static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1073_mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1074 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1075 (__v16si)_mm512_max_epi32(__A, __B),
1076 (__v16si)__W);
1077}
1078
1079static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1080_mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1081 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1082 (__v16si)_mm512_max_epi32(__A, __B),
1083 (__v16si)_mm512_setzero_si512());
1084}
1085
1086static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1087_mm512_max_epu32(__m512i __A, __m512i __B) {
1088 return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1089}
1090
1091static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1092_mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1093 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1094 (__v16si)_mm512_max_epu32(__A, __B),
1095 (__v16si)__W);
1096}
1097
1098static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1099_mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
1100 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1101 (__v16si)_mm512_max_epu32(__A, __B),
1102 (__v16si)_mm512_setzero_si512());
1103}
1104
1105static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1106_mm512_max_epi64(__m512i __A, __m512i __B) {
1107 return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1108}
1109
1110static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1111_mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1112 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1113 (__v8di)_mm512_max_epi64(__A, __B),
1114 (__v8di)__W);
1115}
1116
1117static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1118_mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
1119 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1120 (__v8di)_mm512_max_epi64(__A, __B),
1121 (__v8di)_mm512_setzero_si512());
1122}
1123
1124static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1125_mm512_max_epu64(__m512i __A, __m512i __B) {
1126 return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1127}
1128
1129static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1130_mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1131 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1132 (__v8di)_mm512_max_epu64(__A, __B),
1133 (__v8di)__W);
1134}
1135
1136static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1137_mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
1138 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1139 (__v8di)_mm512_max_epu64(__A, __B),
1140 (__v8di)_mm512_setzero_si512());
1141}
1142
1143#define _mm512_min_round_pd(A, B, R) \
1144 ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1145 (__v8df)(__m512d)(B), (int)(R)))
1146
1147#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1148 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1149 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1150 (__v8df)(W)))
1151
1152#define _mm512_maskz_min_round_pd(U, A, B, R) \
1153 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1154 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1155 (__v8df)_mm512_setzero_pd()))
1156
1157static __inline__ __m512d __DEFAULT_FN_ATTRS512
1158_mm512_min_pd(__m512d __A, __m512d __B)
1159{
1160 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1162}
1163
1164static __inline__ __m512d __DEFAULT_FN_ATTRS512
1165_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1166{
1167 return (__m512d)__builtin_ia32_selectpd_512(__U,
1168 (__v8df)_mm512_min_pd(__A, __B),
1169 (__v8df)__W);
1170}
1171
1172static __inline__ __m512d __DEFAULT_FN_ATTRS512
1173_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1174{
1175 return (__m512d)__builtin_ia32_selectpd_512(__U,
1176 (__v8df)_mm512_min_pd(__A, __B),
1177 (__v8df)_mm512_setzero_pd());
1178}
1179
1180#define _mm512_min_round_ps(A, B, R) \
1181 ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1182 (__v16sf)(__m512)(B), (int)(R)))
1183
1184#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1185 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1186 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1187 (__v16sf)(W)))
1188
1189#define _mm512_maskz_min_round_ps(U, A, B, R) \
1190 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1191 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1192 (__v16sf)_mm512_setzero_ps()))
1193
1194static __inline__ __m512 __DEFAULT_FN_ATTRS512
1195_mm512_min_ps(__m512 __A, __m512 __B)
1196{
1197 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1199}
1200
1201static __inline__ __m512 __DEFAULT_FN_ATTRS512
1202_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1203{
1204 return (__m512)__builtin_ia32_selectps_512(__U,
1205 (__v16sf)_mm512_min_ps(__A, __B),
1206 (__v16sf)__W);
1207}
1208
1209static __inline__ __m512 __DEFAULT_FN_ATTRS512
1210_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1211{
1212 return (__m512)__builtin_ia32_selectps_512(__U,
1213 (__v16sf)_mm512_min_ps(__A, __B),
1214 (__v16sf)_mm512_setzero_ps());
1215}
1216
1217static __inline__ __m128 __DEFAULT_FN_ATTRS128
1218_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1219 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1220 (__v4sf) __B,
1221 (__v4sf) __W,
1222 (__mmask8) __U,
1224}
1225
1226static __inline__ __m128 __DEFAULT_FN_ATTRS128
1227_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1228 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1229 (__v4sf) __B,
1230 (__v4sf) _mm_setzero_ps (),
1231 (__mmask8) __U,
1233}
1234
1235#define _mm_min_round_ss(A, B, R) \
1236 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1237 (__v4sf)(__m128)(B), \
1238 (__v4sf)_mm_setzero_ps(), \
1239 (__mmask8)-1, (int)(R)))
1240
1241#define _mm_mask_min_round_ss(W, U, A, B, R) \
1242 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1243 (__v4sf)(__m128)(B), \
1244 (__v4sf)(__m128)(W), (__mmask8)(U), \
1245 (int)(R)))
1246
1247#define _mm_maskz_min_round_ss(U, A, B, R) \
1248 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1249 (__v4sf)(__m128)(B), \
1250 (__v4sf)_mm_setzero_ps(), \
1251 (__mmask8)(U), (int)(R)))
1252
1253static __inline__ __m128d __DEFAULT_FN_ATTRS128
1254_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1255 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1256 (__v2df) __B,
1257 (__v2df) __W,
1258 (__mmask8) __U,
1260}
1261
1262static __inline__ __m128d __DEFAULT_FN_ATTRS128
1263_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1264 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1265 (__v2df) __B,
1266 (__v2df) _mm_setzero_pd (),
1267 (__mmask8) __U,
1269}
1270
1271#define _mm_min_round_sd(A, B, R) \
1272 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1273 (__v2df)(__m128d)(B), \
1274 (__v2df)_mm_setzero_pd(), \
1275 (__mmask8)-1, (int)(R)))
1276
1277#define _mm_mask_min_round_sd(W, U, A, B, R) \
1278 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1279 (__v2df)(__m128d)(B), \
1280 (__v2df)(__m128d)(W), \
1281 (__mmask8)(U), (int)(R)))
1282
1283#define _mm_maskz_min_round_sd(U, A, B, R) \
1284 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1285 (__v2df)(__m128d)(B), \
1286 (__v2df)_mm_setzero_pd(), \
1287 (__mmask8)(U), (int)(R)))
1288
1289static __inline __m512i
1291 return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1292}
1293
1294static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1295_mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1296 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1297 (__v16si)_mm512_min_epi32(__A, __B),
1298 (__v16si)__W);
1299}
1300
1301static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1302_mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1303 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1304 (__v16si)_mm512_min_epi32(__A, __B),
1305 (__v16si)_mm512_setzero_si512());
1306}
1307
1308static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1309_mm512_min_epu32(__m512i __A, __m512i __B) {
1310 return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1311}
1312
1313static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1314_mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1315 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1316 (__v16si)_mm512_min_epu32(__A, __B),
1317 (__v16si)__W);
1318}
1319
1320static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1321_mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
1322 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1323 (__v16si)_mm512_min_epu32(__A, __B),
1324 (__v16si)_mm512_setzero_si512());
1325}
1326
1327static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1328_mm512_min_epi64(__m512i __A, __m512i __B) {
1329 return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1330}
1331
1332static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1333_mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1334 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1335 (__v8di)_mm512_min_epi64(__A, __B),
1336 (__v8di)__W);
1337}
1338
1339static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1340_mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
1341 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1342 (__v8di)_mm512_min_epi64(__A, __B),
1343 (__v8di)_mm512_setzero_si512());
1344}
1345
1346static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1347_mm512_min_epu64(__m512i __A, __m512i __B) {
1348 return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1349}
1350
1351static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1352_mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1353 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1354 (__v8di)_mm512_min_epu64(__A, __B),
1355 (__v8di)__W);
1356}
1357
1358static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1359_mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
1360 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1361 (__v8di)_mm512_min_epu64(__A, __B),
1362 (__v8di)_mm512_setzero_si512());
1363}
1364
1365static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1366_mm512_mul_epi32(__m512i __X, __m512i __Y) {
1367 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1368}
1369
1370static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1371_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
1372 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1373 (__v8di)_mm512_mul_epi32(__X, __Y),
1374 (__v8di)__W);
1375}
1376
1377static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1378_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) {
1379 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1380 (__v8di)_mm512_mul_epi32(__X, __Y),
1381 (__v8di)_mm512_setzero_si512 ());
1382}
1383
1384static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1385_mm512_mul_epu32(__m512i __X, __m512i __Y) {
1386 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1387}
1388
1389static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1390_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
1391 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1392 (__v8di)_mm512_mul_epu32(__X, __Y),
1393 (__v8di)__W);
1394}
1395
1396static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1397_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) {
1398 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1399 (__v8di)_mm512_mul_epu32(__X, __Y),
1400 (__v8di)_mm512_setzero_si512 ());
1401}
1402
1403static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1404_mm512_mullo_epi32(__m512i __A, __m512i __B) {
1405 return (__m512i) ((__v16su) __A * (__v16su) __B);
1406}
1407
1408static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1409_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1410 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1411 (__v16si)_mm512_mullo_epi32(__A, __B),
1412 (__v16si)_mm512_setzero_si512());
1413}
1414
1415static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1416_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1417 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1418 (__v16si)_mm512_mullo_epi32(__A, __B),
1419 (__v16si)__W);
1420}
1421
1422static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1423_mm512_mullox_epi64(__m512i __A, __m512i __B) {
1424 return (__m512i) ((__v8du) __A * (__v8du) __B);
1425}
1426
1427static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1428_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1429 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1430 (__v8di)_mm512_mullox_epi64(__A, __B),
1431 (__v8di)__W);
1432}
1433
1434#define _mm512_sqrt_round_pd(A, R) \
1435 ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1436
1437#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1438 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1439 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1440 (__v8df)(__m512d)(W)))
1441
1442#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1443 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1444 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1445 (__v8df)_mm512_setzero_pd()))
1446
1447static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A) {
1448 return (__m512d)__builtin_elementwise_sqrt((__v8df)__A);
1449}
1450
1451static __inline__ __m512d __DEFAULT_FN_ATTRS512
1452_mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A) {
1453 return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A),
1454 (__v8df)__W);
1455}
1456
1457static __inline__ __m512d __DEFAULT_FN_ATTRS512
1459 return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A),
1460 (__v8df)_mm512_setzero_pd());
1461}
1462
1463#define _mm512_sqrt_round_ps(A, R) \
1464 ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1465
1466#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1467 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1468 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1469 (__v16sf)(__m512)(W)))
1470
1471#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1472 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1473 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1474 (__v16sf)_mm512_setzero_ps()))
1475
1476static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A) {
1477 return (__m512)__builtin_elementwise_sqrt((__v16sf)__A);
1478}
1479
1480static __inline__ __m512 __DEFAULT_FN_ATTRS512
1481_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) {
1482 return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A),
1483 (__v16sf)__W);
1484}
1485
1486static __inline__ __m512 __DEFAULT_FN_ATTRS512
1488 return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A),
1489 (__v16sf)_mm512_setzero_ps());
1490}
1491
1492static __inline__ __m512d __DEFAULT_FN_ATTRS512
1494{
1495 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1496 (__v8df)
1498 (__mmask8) -1);}
1499
1500static __inline__ __m512d __DEFAULT_FN_ATTRS512
1501_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1502{
1503 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1504 (__v8df) __W,
1505 (__mmask8) __U);
1506}
1507
1508static __inline__ __m512d __DEFAULT_FN_ATTRS512
1510{
1511 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1512 (__v8df)
1514 (__mmask8) __U);
1515}
1516
1517static __inline__ __m512 __DEFAULT_FN_ATTRS512
1519{
1520 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1521 (__v16sf)
1523 (__mmask16) -1);
1524}
1525
1526static __inline__ __m512 __DEFAULT_FN_ATTRS512
1527_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1528{
1529 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1530 (__v16sf) __W,
1531 (__mmask16) __U);
1532}
1533
1534static __inline__ __m512 __DEFAULT_FN_ATTRS512
1536{
1537 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1538 (__v16sf)
1540 (__mmask16) __U);
1541}
1542
1543static __inline__ __m128 __DEFAULT_FN_ATTRS128
1544_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1545{
1546 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1547 (__v4sf) __B,
1548 (__v4sf)
1549 _mm_setzero_ps (),
1550 (__mmask8) -1);
1551}
1552
1553static __inline__ __m128 __DEFAULT_FN_ATTRS128
1554_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1555{
1556 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1557 (__v4sf) __B,
1558 (__v4sf) __W,
1559 (__mmask8) __U);
1560}
1561
1562static __inline__ __m128 __DEFAULT_FN_ATTRS128
1563_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1564{
1565 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1566 (__v4sf) __B,
1567 (__v4sf) _mm_setzero_ps (),
1568 (__mmask8) __U);
1569}
1570
1571static __inline__ __m128d __DEFAULT_FN_ATTRS128
1572_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1573{
1574 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1575 (__v2df) __B,
1576 (__v2df)
1577 _mm_setzero_pd (),
1578 (__mmask8) -1);
1579}
1580
1581static __inline__ __m128d __DEFAULT_FN_ATTRS128
1582_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1583{
1584 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1585 (__v2df) __B,
1586 (__v2df) __W,
1587 (__mmask8) __U);
1588}
1589
1590static __inline__ __m128d __DEFAULT_FN_ATTRS128
1591_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1592{
1593 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1594 (__v2df) __B,
1595 (__v2df) _mm_setzero_pd (),
1596 (__mmask8) __U);
1597}
1598
1599static __inline__ __m512d __DEFAULT_FN_ATTRS512
1601{
1602 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1603 (__v8df)
1605 (__mmask8) -1);
1606}
1607
1608static __inline__ __m512d __DEFAULT_FN_ATTRS512
1609_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1610{
1611 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1612 (__v8df) __W,
1613 (__mmask8) __U);
1614}
1615
1616static __inline__ __m512d __DEFAULT_FN_ATTRS512
1618{
1619 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1620 (__v8df)
1622 (__mmask8) __U);
1623}
1624
1625static __inline__ __m512 __DEFAULT_FN_ATTRS512
1627{
1628 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1629 (__v16sf)
1631 (__mmask16) -1);
1632}
1633
1634static __inline__ __m512 __DEFAULT_FN_ATTRS512
1635_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1636{
1637 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1638 (__v16sf) __W,
1639 (__mmask16) __U);
1640}
1641
1642static __inline__ __m512 __DEFAULT_FN_ATTRS512
1644{
1645 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1646 (__v16sf)
1648 (__mmask16) __U);
1649}
1650
1651static __inline__ __m128 __DEFAULT_FN_ATTRS128
1652_mm_rcp14_ss(__m128 __A, __m128 __B)
1653{
1654 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1655 (__v4sf) __B,
1656 (__v4sf)
1657 _mm_setzero_ps (),
1658 (__mmask8) -1);
1659}
1660
1661static __inline__ __m128 __DEFAULT_FN_ATTRS128
1662_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1663{
1664 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1665 (__v4sf) __B,
1666 (__v4sf) __W,
1667 (__mmask8) __U);
1668}
1669
1670static __inline__ __m128 __DEFAULT_FN_ATTRS128
1671_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1672{
1673 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1674 (__v4sf) __B,
1675 (__v4sf) _mm_setzero_ps (),
1676 (__mmask8) __U);
1677}
1678
1679static __inline__ __m128d __DEFAULT_FN_ATTRS128
1680_mm_rcp14_sd(__m128d __A, __m128d __B)
1681{
1682 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1683 (__v2df) __B,
1684 (__v2df)
1685 _mm_setzero_pd (),
1686 (__mmask8) -1);
1687}
1688
1689static __inline__ __m128d __DEFAULT_FN_ATTRS128
1690_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1691{
1692 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1693 (__v2df) __B,
1694 (__v2df) __W,
1695 (__mmask8) __U);
1696}
1697
1698static __inline__ __m128d __DEFAULT_FN_ATTRS128
1699_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1700{
1701 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1702 (__v2df) __B,
1703 (__v2df) _mm_setzero_pd (),
1704 (__mmask8) __U);
1705}
1706
1707static __inline __m512 __DEFAULT_FN_ATTRS512
1709{
1710 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1712 (__v16sf) __A, (unsigned short)-1,
1714}
1715
1716static __inline__ __m512 __DEFAULT_FN_ATTRS512
1717_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1718{
1719 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1721 (__v16sf) __W, __U,
1723}
1724
1725static __inline __m512d __DEFAULT_FN_ATTRS512
1727{
1728 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1730 (__v8df) __A, (unsigned char)-1,
1732}
1733
1734static __inline__ __m512d __DEFAULT_FN_ATTRS512
1735_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1736{
1737 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1739 (__v8df) __W, __U,
1741}
1742
1743static __inline__ __m512 __DEFAULT_FN_ATTRS512
1744_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1745{
1746 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1748 (__v16sf) __W, __U,
1750}
1751
1752static __inline __m512 __DEFAULT_FN_ATTRS512
1754{
1755 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1757 (__v16sf) __A, (unsigned short)-1,
1759}
1760
1761static __inline __m512d __DEFAULT_FN_ATTRS512
1762_mm512_ceil_pd(__m512d __A)
1763{
1764 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1766 (__v8df) __A, (unsigned char)-1,
1768}
1769
1770static __inline__ __m512d __DEFAULT_FN_ATTRS512
1771_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1772{
1773 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1775 (__v8df) __W, __U,
1777}
1778
1779static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1780_mm512_abs_epi64(__m512i __A) {
1781 return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1782}
1783
1784static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1785_mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
1786 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1787 (__v8di)_mm512_abs_epi64(__A),
1788 (__v8di)__W);
1789}
1790
1791static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1793 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1794 (__v8di)_mm512_abs_epi64(__A),
1795 (__v8di)_mm512_setzero_si512());
1796}
1797
1798static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1799_mm512_abs_epi32(__m512i __A) {
1800 return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1801}
1802
1803static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1804_mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
1805 return (__m512i)__builtin_ia32_selectd_512(__U,
1806 (__v16si)_mm512_abs_epi32(__A),
1807 (__v16si)__W);
1808}
1809
1810static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1812 return (__m512i)__builtin_ia32_selectd_512(__U,
1813 (__v16si)_mm512_abs_epi32(__A),
1814 (__v16si)_mm512_setzero_si512());
1815}
1816
1817static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1818_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1819 __A = _mm_add_ss(__A, __B);
1820 return __builtin_ia32_selectss_128(__U, __A, __W);
1821}
1822
1823static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1824_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
1825 __A = _mm_add_ss(__A, __B);
1826 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1827}
1828
1829#define _mm_add_round_ss(A, B, R) \
1830 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1831 (__v4sf)(__m128)(B), \
1832 (__v4sf)_mm_setzero_ps(), \
1833 (__mmask8)-1, (int)(R)))
1834
1835#define _mm_mask_add_round_ss(W, U, A, B, R) \
1836 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1837 (__v4sf)(__m128)(B), \
1838 (__v4sf)(__m128)(W), (__mmask8)(U), \
1839 (int)(R)))
1840
1841#define _mm_maskz_add_round_ss(U, A, B, R) \
1842 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1843 (__v4sf)(__m128)(B), \
1844 (__v4sf)_mm_setzero_ps(), \
1845 (__mmask8)(U), (int)(R)))
1846
1847static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1848_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1849 __A = _mm_add_sd(__A, __B);
1850 return __builtin_ia32_selectsd_128(__U, __A, __W);
1851}
1852
1853static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1854_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
1855 __A = _mm_add_sd(__A, __B);
1856 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1857}
1858#define _mm_add_round_sd(A, B, R) \
1859 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1860 (__v2df)(__m128d)(B), \
1861 (__v2df)_mm_setzero_pd(), \
1862 (__mmask8)-1, (int)(R)))
1863
1864#define _mm_mask_add_round_sd(W, U, A, B, R) \
1865 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1866 (__v2df)(__m128d)(B), \
1867 (__v2df)(__m128d)(W), \
1868 (__mmask8)(U), (int)(R)))
1869
1870#define _mm_maskz_add_round_sd(U, A, B, R) \
1871 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1872 (__v2df)(__m128d)(B), \
1873 (__v2df)_mm_setzero_pd(), \
1874 (__mmask8)(U), (int)(R)))
1875
1876static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1877_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1878 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1879 (__v8df)_mm512_add_pd(__A, __B),
1880 (__v8df)__W);
1881}
1882
1883static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1884_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1885 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1886 (__v8df)_mm512_add_pd(__A, __B),
1887 (__v8df)_mm512_setzero_pd());
1888}
1889
1890static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1891_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1892 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1893 (__v16sf)_mm512_add_ps(__A, __B),
1894 (__v16sf)__W);
1895}
1896
1897static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1898_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1899 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1900 (__v16sf)_mm512_add_ps(__A, __B),
1901 (__v16sf)_mm512_setzero_ps());
1902}
1903
1904#define _mm512_add_round_pd(A, B, R) \
1905 ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1906 (__v8df)(__m512d)(B), (int)(R)))
1907
1908#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1909 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1910 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1911 (__v8df)(__m512d)(W)))
1912
1913#define _mm512_maskz_add_round_pd(U, A, B, R) \
1914 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1915 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1916 (__v8df)_mm512_setzero_pd()))
1917
1918#define _mm512_add_round_ps(A, B, R) \
1919 ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1920 (__v16sf)(__m512)(B), (int)(R)))
1921
1922#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1923 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1924 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1925 (__v16sf)(__m512)(W)))
1926
1927#define _mm512_maskz_add_round_ps(U, A, B, R) \
1928 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1929 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1930 (__v16sf)_mm512_setzero_ps()))
1931
1932static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1933_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1934 __A = _mm_sub_ss(__A, __B);
1935 return __builtin_ia32_selectss_128(__U, __A, __W);
1936}
1937
1938static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1939_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
1940 __A = _mm_sub_ss(__A, __B);
1941 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1942}
1943#define _mm_sub_round_ss(A, B, R) \
1944 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1945 (__v4sf)(__m128)(B), \
1946 (__v4sf)_mm_setzero_ps(), \
1947 (__mmask8)-1, (int)(R)))
1948
1949#define _mm_mask_sub_round_ss(W, U, A, B, R) \
1950 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1951 (__v4sf)(__m128)(B), \
1952 (__v4sf)(__m128)(W), (__mmask8)(U), \
1953 (int)(R)))
1954
1955#define _mm_maskz_sub_round_ss(U, A, B, R) \
1956 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1957 (__v4sf)(__m128)(B), \
1958 (__v4sf)_mm_setzero_ps(), \
1959 (__mmask8)(U), (int)(R)))
1960
1961static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1962_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1963 __A = _mm_sub_sd(__A, __B);
1964 return __builtin_ia32_selectsd_128(__U, __A, __W);
1965}
1966
1967static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1968_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
1969 __A = _mm_sub_sd(__A, __B);
1970 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1971}
1972
1973#define _mm_sub_round_sd(A, B, R) \
1974 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1975 (__v2df)(__m128d)(B), \
1976 (__v2df)_mm_setzero_pd(), \
1977 (__mmask8)-1, (int)(R)))
1978
1979#define _mm_mask_sub_round_sd(W, U, A, B, R) \
1980 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1981 (__v2df)(__m128d)(B), \
1982 (__v2df)(__m128d)(W), \
1983 (__mmask8)(U), (int)(R)))
1984
1985#define _mm_maskz_sub_round_sd(U, A, B, R) \
1986 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1987 (__v2df)(__m128d)(B), \
1988 (__v2df)_mm_setzero_pd(), \
1989 (__mmask8)(U), (int)(R)))
1990
1991static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1992_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1993 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1994 (__v8df)_mm512_sub_pd(__A, __B),
1995 (__v8df)__W);
1996}
1997
1998static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1999_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2000 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2001 (__v8df)_mm512_sub_pd(__A, __B),
2002 (__v8df)_mm512_setzero_pd());
2003}
2004
2005static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2006_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2007 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2008 (__v16sf)_mm512_sub_ps(__A, __B),
2009 (__v16sf)__W);
2010}
2011
2012static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2013_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2014 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2015 (__v16sf)_mm512_sub_ps(__A, __B),
2016 (__v16sf)_mm512_setzero_ps());
2017}
2018
2019#define _mm512_sub_round_pd(A, B, R) \
2020 ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2021 (__v8df)(__m512d)(B), (int)(R)))
2022
2023#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2024 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2025 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2026 (__v8df)(__m512d)(W)))
2027
2028#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2029 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2030 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2031 (__v8df)_mm512_setzero_pd()))
2032
2033#define _mm512_sub_round_ps(A, B, R) \
2034 ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2035 (__v16sf)(__m512)(B), (int)(R)))
2036
2037#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2038 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2039 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2040 (__v16sf)(__m512)(W)))
2041
2042#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2043 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2044 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2045 (__v16sf)_mm512_setzero_ps()))
2046
2047static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2048_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2049 __A = _mm_mul_ss(__A, __B);
2050 return __builtin_ia32_selectss_128(__U, __A, __W);
2051}
2052
2053static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2054_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
2055 __A = _mm_mul_ss(__A, __B);
2056 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2057}
2058#define _mm_mul_round_ss(A, B, R) \
2059 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2060 (__v4sf)(__m128)(B), \
2061 (__v4sf)_mm_setzero_ps(), \
2062 (__mmask8)-1, (int)(R)))
2063
2064#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2065 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2066 (__v4sf)(__m128)(B), \
2067 (__v4sf)(__m128)(W), (__mmask8)(U), \
2068 (int)(R)))
2069
2070#define _mm_maskz_mul_round_ss(U, A, B, R) \
2071 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2072 (__v4sf)(__m128)(B), \
2073 (__v4sf)_mm_setzero_ps(), \
2074 (__mmask8)(U), (int)(R)))
2075
2076static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2077_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2078 __A = _mm_mul_sd(__A, __B);
2079 return __builtin_ia32_selectsd_128(__U, __A, __W);
2080}
2081
2082static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2083_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
2084 __A = _mm_mul_sd(__A, __B);
2085 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2086}
2087
2088#define _mm_mul_round_sd(A, B, R) \
2089 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2090 (__v2df)(__m128d)(B), \
2091 (__v2df)_mm_setzero_pd(), \
2092 (__mmask8)-1, (int)(R)))
2093
2094#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2095 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2096 (__v2df)(__m128d)(B), \
2097 (__v2df)(__m128d)(W), \
2098 (__mmask8)(U), (int)(R)))
2099
2100#define _mm_maskz_mul_round_sd(U, A, B, R) \
2101 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2102 (__v2df)(__m128d)(B), \
2103 (__v2df)_mm_setzero_pd(), \
2104 (__mmask8)(U), (int)(R)))
2105
2106static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2107_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2108 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2109 (__v8df)_mm512_mul_pd(__A, __B),
2110 (__v8df)__W);
2111}
2112
2113static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2114_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2115 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2116 (__v8df)_mm512_mul_pd(__A, __B),
2117 (__v8df)_mm512_setzero_pd());
2118}
2119
2120static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2121_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2122 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2123 (__v16sf)_mm512_mul_ps(__A, __B),
2124 (__v16sf)__W);
2125}
2126
2127static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2128_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2129 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2130 (__v16sf)_mm512_mul_ps(__A, __B),
2131 (__v16sf)_mm512_setzero_ps());
2132}
2133
2134#define _mm512_mul_round_pd(A, B, R) \
2135 ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2136 (__v8df)(__m512d)(B), (int)(R)))
2137
2138#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2139 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2140 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2141 (__v8df)(__m512d)(W)))
2142
2143#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2144 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2145 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2146 (__v8df)_mm512_setzero_pd()))
2147
2148#define _mm512_mul_round_ps(A, B, R) \
2149 ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2150 (__v16sf)(__m512)(B), (int)(R)))
2151
2152#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2153 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2154 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2155 (__v16sf)(__m512)(W)))
2156
2157#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2158 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2159 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2160 (__v16sf)_mm512_setzero_ps()))
2161
2162static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2163_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2164 __A = _mm_div_ss(__A, __B);
2165 return __builtin_ia32_selectss_128(__U, __A, __W);
2166}
2167
2168static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2169_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
2170 __A = _mm_div_ss(__A, __B);
2171 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2172}
2173
2174#define _mm_div_round_ss(A, B, R) \
2175 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2176 (__v4sf)(__m128)(B), \
2177 (__v4sf)_mm_setzero_ps(), \
2178 (__mmask8)-1, (int)(R)))
2179
2180#define _mm_mask_div_round_ss(W, U, A, B, R) \
2181 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2182 (__v4sf)(__m128)(B), \
2183 (__v4sf)(__m128)(W), (__mmask8)(U), \
2184 (int)(R)))
2185
2186#define _mm_maskz_div_round_ss(U, A, B, R) \
2187 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2188 (__v4sf)(__m128)(B), \
2189 (__v4sf)_mm_setzero_ps(), \
2190 (__mmask8)(U), (int)(R)))
2191
2192static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2193_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2194 __A = _mm_div_sd(__A, __B);
2195 return __builtin_ia32_selectsd_128(__U, __A, __W);
2196}
2197
2198static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2199_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
2200 __A = _mm_div_sd(__A, __B);
2201 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2202}
2203
2204#define _mm_div_round_sd(A, B, R) \
2205 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2206 (__v2df)(__m128d)(B), \
2207 (__v2df)_mm_setzero_pd(), \
2208 (__mmask8)-1, (int)(R)))
2209
2210#define _mm_mask_div_round_sd(W, U, A, B, R) \
2211 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2212 (__v2df)(__m128d)(B), \
2213 (__v2df)(__m128d)(W), \
2214 (__mmask8)(U), (int)(R)))
2215
2216#define _mm_maskz_div_round_sd(U, A, B, R) \
2217 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2218 (__v2df)(__m128d)(B), \
2219 (__v2df)_mm_setzero_pd(), \
2220 (__mmask8)(U), (int)(R)))
2221
2222static __inline __m512d
2224 return (__m512d)((__v8df)__a/(__v8df)__b);
2225}
2226
2227static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2228_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2229 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2230 (__v8df)_mm512_div_pd(__A, __B),
2231 (__v8df)__W);
2232}
2233
2234static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2235_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2236 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2237 (__v8df)_mm512_div_pd(__A, __B),
2238 (__v8df)_mm512_setzero_pd());
2239}
2240
2241static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2242_mm512_div_ps(__m512 __a, __m512 __b) {
2243 return (__m512)((__v16sf)__a/(__v16sf)__b);
2244}
2245
2246static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2247_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2248 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2249 (__v16sf)_mm512_div_ps(__A, __B),
2250 (__v16sf)__W);
2251}
2252
2253static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2254_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2255 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2256 (__v16sf)_mm512_div_ps(__A, __B),
2257 (__v16sf)_mm512_setzero_ps());
2258}
2259
2260#define _mm512_div_round_pd(A, B, R) \
2261 ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2262 (__v8df)(__m512d)(B), (int)(R)))
2263
2264#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2265 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2266 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2267 (__v8df)(__m512d)(W)))
2268
2269#define _mm512_maskz_div_round_pd(U, A, B, R) \
2270 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2271 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2272 (__v8df)_mm512_setzero_pd()))
2273
2274#define _mm512_div_round_ps(A, B, R) \
2275 ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2276 (__v16sf)(__m512)(B), (int)(R)))
2277
2278#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2279 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2280 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2281 (__v16sf)(__m512)(W)))
2282
2283#define _mm512_maskz_div_round_ps(U, A, B, R) \
2284 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2285 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2286 (__v16sf)_mm512_setzero_ps()))
2287
2288#define _mm512_roundscale_ps(A, B) \
2289 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2290 (__v16sf)_mm512_undefined_ps(), \
2291 (__mmask16)-1, \
2292 _MM_FROUND_CUR_DIRECTION))
2293
2294#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2295 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2296 (__v16sf)(__m512)(A), (__mmask16)(B), \
2297 _MM_FROUND_CUR_DIRECTION))
2298
2299#define _mm512_maskz_roundscale_ps(A, B, imm) \
2300 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2301 (__v16sf)_mm512_setzero_ps(), \
2302 (__mmask16)(A), \
2303 _MM_FROUND_CUR_DIRECTION))
2304
2305#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2306 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2307 (__v16sf)(__m512)(A), (__mmask16)(B), \
2308 (int)(R)))
2309
2310#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2311 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2312 (__v16sf)_mm512_setzero_ps(), \
2313 (__mmask16)(A), (int)(R)))
2314
2315#define _mm512_roundscale_round_ps(A, imm, R) \
2316 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2317 (__v16sf)_mm512_undefined_ps(), \
2318 (__mmask16)-1, (int)(R)))
2319
2320#define _mm512_roundscale_pd(A, B) \
2321 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2322 (__v8df)_mm512_undefined_pd(), \
2323 (__mmask8)-1, \
2324 _MM_FROUND_CUR_DIRECTION))
2325
2326#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2327 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2328 (__v8df)(__m512d)(A), (__mmask8)(B), \
2329 _MM_FROUND_CUR_DIRECTION))
2330
2331#define _mm512_maskz_roundscale_pd(A, B, imm) \
2332 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2333 (__v8df)_mm512_setzero_pd(), \
2334 (__mmask8)(A), \
2335 _MM_FROUND_CUR_DIRECTION))
2336
2337#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2338 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2339 (__v8df)(__m512d)(A), (__mmask8)(B), \
2340 (int)(R)))
2341
2342#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2343 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2344 (__v8df)_mm512_setzero_pd(), \
2345 (__mmask8)(A), (int)(R)))
2346
2347#define _mm512_roundscale_round_pd(A, imm, R) \
2348 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2349 (__v8df)_mm512_undefined_pd(), \
2350 (__mmask8)-1, (int)(R)))
2351
2352#define _mm512_fmadd_round_pd(A, B, C, R) \
2353 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2354 (__v8df)(__m512d)(B), \
2355 (__v8df)(__m512d)(C), \
2356 (__mmask8)-1, (int)(R)))
2357
2358
2359#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2360 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2361 (__v8df)(__m512d)(B), \
2362 (__v8df)(__m512d)(C), \
2363 (__mmask8)(U), (int)(R)))
2364
2365
2366#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2367 ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2368 (__v8df)(__m512d)(B), \
2369 (__v8df)(__m512d)(C), \
2370 (__mmask8)(U), (int)(R)))
2371
2372
2373#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2374 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2375 (__v8df)(__m512d)(B), \
2376 (__v8df)(__m512d)(C), \
2377 (__mmask8)(U), (int)(R)))
2378
2379
2380#define _mm512_fmsub_round_pd(A, B, C, R) \
2381 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2382 (__v8df)(__m512d)(B), \
2383 -(__v8df)(__m512d)(C), \
2384 (__mmask8)-1, (int)(R)))
2385
2386
2387#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2388 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2389 (__v8df)(__m512d)(B), \
2390 -(__v8df)(__m512d)(C), \
2391 (__mmask8)(U), (int)(R)))
2392
2393
2394#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2395 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2396 (__v8df)(__m512d)(B), \
2397 -(__v8df)(__m512d)(C), \
2398 (__mmask8)(U), (int)(R)))
2399
2400
2401#define _mm512_fnmadd_round_pd(A, B, C, R) \
2402 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2403 (__v8df)(__m512d)(B), \
2404 (__v8df)(__m512d)(C), \
2405 (__mmask8)-1, (int)(R)))
2406
2407
2408#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2409 ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2410 (__v8df)(__m512d)(B), \
2411 (__v8df)(__m512d)(C), \
2412 (__mmask8)(U), (int)(R)))
2413
2414
2415#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2416 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2417 (__v8df)(__m512d)(B), \
2418 (__v8df)(__m512d)(C), \
2419 (__mmask8)(U), (int)(R)))
2420
2421
2422#define _mm512_fnmsub_round_pd(A, B, C, R) \
2423 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2424 (__v8df)(__m512d)(B), \
2425 -(__v8df)(__m512d)(C), \
2426 (__mmask8)-1, (int)(R)))
2427
2428
2429#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2430 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2431 (__v8df)(__m512d)(B), \
2432 -(__v8df)(__m512d)(C), \
2433 (__mmask8)(U), (int)(R)))
2434
2435static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2436_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
2437 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2438 (__v8df)__C);
2439}
2440
2441static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2442_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2443 return (__m512d)__builtin_ia32_selectpd_512(
2444 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__A);
2445}
2446
2447static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2448_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2449 return (__m512d)__builtin_ia32_selectpd_512(
2450 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__C);
2451}
2452
2453static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2454_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2455 return (__m512d)__builtin_ia32_selectpd_512(
2456 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C),
2457 (__v8df)_mm512_setzero_pd());
2458}
2459
2460static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2461_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
2462 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2463 -(__v8df)__C);
2464}
2465
2466static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2467_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2468 return (__m512d)__builtin_ia32_selectpd_512(
2469 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__A);
2470}
2471
2472static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2473_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2474 return (__m512d)__builtin_ia32_selectpd_512(
2475 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__C);
2476}
2477
2478static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2479_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2480 return (__m512d)__builtin_ia32_selectpd_512(
2481 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C),
2482 (__v8df)_mm512_setzero_pd());
2483}
2484
2485static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2486_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
2487 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2488 (__v8df)__C);
2489}
2490
2491static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2492_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2493 return (__m512d)__builtin_ia32_selectpd_512(
2494 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__A);
2495}
2496
2497static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2498_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2499 return (__m512d)__builtin_ia32_selectpd_512(
2500 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__C);
2501}
2502
2503static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2504_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2505 return (__m512d)__builtin_ia32_selectpd_512(
2506 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C),
2507 (__v8df)_mm512_setzero_pd());
2508}
2509
2510static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2511_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
2512 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2513 -(__v8df)__C);
2514}
2515
2516static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2517_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2518 return (__m512d)__builtin_ia32_selectpd_512(
2519 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__A);
2520}
2521
2522static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2523_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2524 return (__m512d)__builtin_ia32_selectpd_512(
2525 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__C);
2526}
2527
2528static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2529_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2530 return (__m512d)__builtin_ia32_selectpd_512(
2531 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C),
2532 (__v8df)_mm512_setzero_pd());
2533}
2534
2535#define _mm512_fmadd_round_ps(A, B, C, R) \
2536 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2537 (__v16sf)(__m512)(B), \
2538 (__v16sf)(__m512)(C), \
2539 (__mmask16)-1, (int)(R)))
2540
2541
2542#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2543 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2544 (__v16sf)(__m512)(B), \
2545 (__v16sf)(__m512)(C), \
2546 (__mmask16)(U), (int)(R)))
2547
2548
2549#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2550 ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2551 (__v16sf)(__m512)(B), \
2552 (__v16sf)(__m512)(C), \
2553 (__mmask16)(U), (int)(R)))
2554
2555
2556#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2557 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2558 (__v16sf)(__m512)(B), \
2559 (__v16sf)(__m512)(C), \
2560 (__mmask16)(U), (int)(R)))
2561
2562
2563#define _mm512_fmsub_round_ps(A, B, C, R) \
2564 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2565 (__v16sf)(__m512)(B), \
2566 -(__v16sf)(__m512)(C), \
2567 (__mmask16)-1, (int)(R)))
2568
2569
2570#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2571 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2572 (__v16sf)(__m512)(B), \
2573 -(__v16sf)(__m512)(C), \
2574 (__mmask16)(U), (int)(R)))
2575
2576
2577#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2578 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2579 (__v16sf)(__m512)(B), \
2580 -(__v16sf)(__m512)(C), \
2581 (__mmask16)(U), (int)(R)))
2582
2583
2584#define _mm512_fnmadd_round_ps(A, B, C, R) \
2585 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2586 -(__v16sf)(__m512)(B), \
2587 (__v16sf)(__m512)(C), \
2588 (__mmask16)-1, (int)(R)))
2589
2590
2591#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2592 ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2593 (__v16sf)(__m512)(B), \
2594 (__v16sf)(__m512)(C), \
2595 (__mmask16)(U), (int)(R)))
2596
2597
2598#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2599 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2600 (__v16sf)(__m512)(B), \
2601 (__v16sf)(__m512)(C), \
2602 (__mmask16)(U), (int)(R)))
2603
2604
2605#define _mm512_fnmsub_round_ps(A, B, C, R) \
2606 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2607 -(__v16sf)(__m512)(B), \
2608 -(__v16sf)(__m512)(C), \
2609 (__mmask16)-1, (int)(R)))
2610
2611
2612#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2613 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2614 (__v16sf)(__m512)(B), \
2615 -(__v16sf)(__m512)(C), \
2616 (__mmask16)(U), (int)(R)))
2617
2618static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2619_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
2620 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2621 (__v16sf)__C);
2622}
2623
2624static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2625_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2626 return (__m512)__builtin_ia32_selectps_512(
2627 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__A);
2628}
2629
2630static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2631_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2632 return (__m512)__builtin_ia32_selectps_512(
2633 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__C);
2634}
2635
2636static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2637_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2638 return (__m512)__builtin_ia32_selectps_512(
2639 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C),
2640 (__v16sf)_mm512_setzero_ps());
2641}
2642
2643static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2644_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
2645 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2646 -(__v16sf)__C);
2647}
2648
2649static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2650_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2651 return (__m512)__builtin_ia32_selectps_512(
2652 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__A);
2653}
2654
2655static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2656_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2657 return (__m512)__builtin_ia32_selectps_512(
2658 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__C);
2659}
2660
2661static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2662_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2663 return (__m512)__builtin_ia32_selectps_512(
2664 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C),
2665 (__v16sf)_mm512_setzero_ps());
2666}
2667
2668static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2669_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
2670 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2671 (__v16sf)__C);
2672}
2673
2674static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2675_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2676 return (__m512)__builtin_ia32_selectps_512(
2677 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__A);
2678}
2679
2680static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2681_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2682 return (__m512)__builtin_ia32_selectps_512(
2683 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__C);
2684}
2685
2686static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2687_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2688 return (__m512)__builtin_ia32_selectps_512(
2689 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C),
2690 (__v16sf)_mm512_setzero_ps());
2691}
2692
2693static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2694_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
2695 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2696 -(__v16sf)__C);
2697}
2698
2699static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2700_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2701 return (__m512)__builtin_ia32_selectps_512(
2702 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__A);
2703}
2704
2705static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2706_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2707 return (__m512)__builtin_ia32_selectps_512(
2708 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__C);
2709}
2710
2711static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2712_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2713 return (__m512)__builtin_ia32_selectps_512(
2714 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C),
2715 (__v16sf)_mm512_setzero_ps());
2716}
2717
2718#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2719 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2720 (__v8df)(__m512d)(B), \
2721 (__v8df)(__m512d)(C), \
2722 (__mmask8)-1, (int)(R)))
2723
2724
2725#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2726 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2727 (__v8df)(__m512d)(B), \
2728 (__v8df)(__m512d)(C), \
2729 (__mmask8)(U), (int)(R)))
2730
2731
2732#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2733 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2734 (__v8df)(__m512d)(B), \
2735 (__v8df)(__m512d)(C), \
2736 (__mmask8)(U), (int)(R)))
2737
2738
2739#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2740 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2741 (__v8df)(__m512d)(B), \
2742 (__v8df)(__m512d)(C), \
2743 (__mmask8)(U), (int)(R)))
2744
2745
2746#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2747 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2748 (__v8df)(__m512d)(B), \
2749 -(__v8df)(__m512d)(C), \
2750 (__mmask8)-1, (int)(R)))
2751
2752
2753#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2754 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2755 (__v8df)(__m512d)(B), \
2756 -(__v8df)(__m512d)(C), \
2757 (__mmask8)(U), (int)(R)))
2758
2759
2760#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2761 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2762 (__v8df)(__m512d)(B), \
2763 -(__v8df)(__m512d)(C), \
2764 (__mmask8)(U), (int)(R)))
2765
2766
2767static __inline__ __m512d __DEFAULT_FN_ATTRS512
2768_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2769{
2770 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2771 (__v8df) __B,
2772 (__v8df) __C,
2773 (__mmask8) -1,
2775}
2776
2777static __inline__ __m512d __DEFAULT_FN_ATTRS512
2778_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2779{
2780 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2781 (__v8df) __B,
2782 (__v8df) __C,
2783 (__mmask8) __U,
2785}
2786
2787static __inline__ __m512d __DEFAULT_FN_ATTRS512
2788_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2789{
2790 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2791 (__v8df) __B,
2792 (__v8df) __C,
2793 (__mmask8) __U,
2795}
2796
2797static __inline__ __m512d __DEFAULT_FN_ATTRS512
2798_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2799{
2800 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2801 (__v8df) __B,
2802 (__v8df) __C,
2803 (__mmask8) __U,
2805}
2806
2807static __inline__ __m512d __DEFAULT_FN_ATTRS512
2808_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2809{
2810 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2811 (__v8df) __B,
2812 -(__v8df) __C,
2813 (__mmask8) -1,
2815}
2816
2817static __inline__ __m512d __DEFAULT_FN_ATTRS512
2818_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2819{
2820 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2821 (__v8df) __B,
2822 -(__v8df) __C,
2823 (__mmask8) __U,
2825}
2826
2827static __inline__ __m512d __DEFAULT_FN_ATTRS512
2828_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2829{
2830 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2831 (__v8df) __B,
2832 -(__v8df) __C,
2833 (__mmask8) __U,
2835}
2836
2837#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2838 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2839 (__v16sf)(__m512)(B), \
2840 (__v16sf)(__m512)(C), \
2841 (__mmask16)-1, (int)(R)))
2842
2843
2844#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2845 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2846 (__v16sf)(__m512)(B), \
2847 (__v16sf)(__m512)(C), \
2848 (__mmask16)(U), (int)(R)))
2849
2850
2851#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2852 ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2853 (__v16sf)(__m512)(B), \
2854 (__v16sf)(__m512)(C), \
2855 (__mmask16)(U), (int)(R)))
2856
2857
2858#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2859 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2860 (__v16sf)(__m512)(B), \
2861 (__v16sf)(__m512)(C), \
2862 (__mmask16)(U), (int)(R)))
2863
2864
2865#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2866 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2867 (__v16sf)(__m512)(B), \
2868 -(__v16sf)(__m512)(C), \
2869 (__mmask16)-1, (int)(R)))
2870
2871
2872#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2873 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2874 (__v16sf)(__m512)(B), \
2875 -(__v16sf)(__m512)(C), \
2876 (__mmask16)(U), (int)(R)))
2877
2878
2879#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2880 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2881 (__v16sf)(__m512)(B), \
2882 -(__v16sf)(__m512)(C), \
2883 (__mmask16)(U), (int)(R)))
2884
2885
2886static __inline__ __m512 __DEFAULT_FN_ATTRS512
2887_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
2888{
2889 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2890 (__v16sf) __B,
2891 (__v16sf) __C,
2892 (__mmask16) -1,
2894}
2895
2896static __inline__ __m512 __DEFAULT_FN_ATTRS512
2897_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2898{
2899 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2900 (__v16sf) __B,
2901 (__v16sf) __C,
2902 (__mmask16) __U,
2904}
2905
2906static __inline__ __m512 __DEFAULT_FN_ATTRS512
2907_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2908{
2909 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2910 (__v16sf) __B,
2911 (__v16sf) __C,
2912 (__mmask16) __U,
2914}
2915
2916static __inline__ __m512 __DEFAULT_FN_ATTRS512
2917_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2918{
2919 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2920 (__v16sf) __B,
2921 (__v16sf) __C,
2922 (__mmask16) __U,
2924}
2925
2926static __inline__ __m512 __DEFAULT_FN_ATTRS512
2927_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
2928{
2929 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2930 (__v16sf) __B,
2931 -(__v16sf) __C,
2932 (__mmask16) -1,
2934}
2935
2936static __inline__ __m512 __DEFAULT_FN_ATTRS512
2937_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2938{
2939 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2940 (__v16sf) __B,
2941 -(__v16sf) __C,
2942 (__mmask16) __U,
2944}
2945
2946static __inline__ __m512 __DEFAULT_FN_ATTRS512
2947_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2948{
2949 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2950 (__v16sf) __B,
2951 -(__v16sf) __C,
2952 (__mmask16) __U,
2954}
2955
2956#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
2957 ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
2958 (__v8df)(__m512d)(B), \
2959 (__v8df)(__m512d)(C), \
2960 (__mmask8)(U), (int)(R)))
2961
2962#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
2963 ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
2964 (__v16sf)(__m512)(B), \
2965 (__v16sf)(__m512)(C), \
2966 (__mmask16)(U), (int)(R)))
2967
2968#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
2969 ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
2970 (__v8df)(__m512d)(B), \
2971 (__v8df)(__m512d)(C), \
2972 (__mmask8)(U), (int)(R)))
2973
2974
2975static __inline__ __m512d __DEFAULT_FN_ATTRS512
2976_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2977{
2978 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2979 (__v8df) __B,
2980 (__v8df) __C,
2981 (__mmask8) __U,
2983}
2984
2985#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
2986 ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
2987 (__v16sf)(__m512)(B), \
2988 (__v16sf)(__m512)(C), \
2989 (__mmask16)(U), (int)(R)))
2990
2991
2992static __inline__ __m512 __DEFAULT_FN_ATTRS512
2993_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2994{
2995 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2996 (__v16sf) __B,
2997 (__v16sf) __C,
2998 (__mmask16) __U,
3000}
3001
3002#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3003 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3004 -(__v8df)(__m512d)(B), \
3005 (__v8df)(__m512d)(C), \
3006 (__mmask8)(U), (int)(R)))
3007
3008#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3009 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3010 -(__v16sf)(__m512)(B), \
3011 (__v16sf)(__m512)(C), \
3012 (__mmask16)(U), (int)(R)))
3013
3014#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3015 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3016 -(__v8df)(__m512d)(B), \
3017 -(__v8df)(__m512d)(C), \
3018 (__mmask8)(U), (int)(R)))
3019
3020
3021#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3022 ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3023 (__v8df)(__m512d)(B), \
3024 (__v8df)(__m512d)(C), \
3025 (__mmask8)(U), (int)(R)))
3026
3027#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3028 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3029 -(__v16sf)(__m512)(B), \
3030 -(__v16sf)(__m512)(C), \
3031 (__mmask16)(U), (int)(R)))
3032
3033
3034#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3035 ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3036 (__v16sf)(__m512)(B), \
3037 (__v16sf)(__m512)(C), \
3038 (__mmask16)(U), (int)(R)))
3039
3040/* Vector permutations */
3041
3042static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3043_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) {
3044 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3045 (__v16si) __B);
3046}
3047
3048static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3049_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3050 __m512i __B) {
3051 return (__m512i)__builtin_ia32_selectd_512(__U,
3052 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3053 (__v16si)__A);
3054}
3055
3056static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3057_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3058 __m512i __B) {
3059 return (__m512i)__builtin_ia32_selectd_512(__U,
3060 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3061 (__v16si)__I);
3062}
3063
3064static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3065_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3066 __m512i __B) {
3067 return (__m512i)__builtin_ia32_selectd_512(__U,
3068 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3069 (__v16si)_mm512_setzero_si512());
3070}
3071
3072static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3073_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) {
3074 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3075 (__v8di) __B);
3076}
3077
3078static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3079_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3080 __m512i __B) {
3081 return (__m512i)__builtin_ia32_selectq_512(__U,
3082 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3083 (__v8di)__A);
3084}
3085
3086static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3087_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3088 __m512i __B) {
3089 return (__m512i)__builtin_ia32_selectq_512(__U,
3090 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3091 (__v8di)__I);
3092}
3093
3094static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3095_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3096 __m512i __B) {
3097 return (__m512i)__builtin_ia32_selectq_512(__U,
3098 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3099 (__v8di)_mm512_setzero_si512());
3100}
3101
3102#define _mm512_alignr_epi64(A, B, I) \
3103 ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3104 (__v8di)(__m512i)(B), (int)(I)))
3105
3106#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3107 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3108 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3109 (__v8di)(__m512i)(W)))
3110
3111#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3112 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3113 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3114 (__v8di)_mm512_setzero_si512()))
3115
3116#define _mm512_alignr_epi32(A, B, I) \
3117 ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3118 (__v16si)(__m512i)(B), (int)(I)))
3119
3120#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3121 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3122 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3123 (__v16si)(__m512i)(W)))
3124
3125#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3126 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3127 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3128 (__v16si)_mm512_setzero_si512()))
3129/* Vector Extract */
3130
3131#define _mm512_extractf64x4_pd(A, I) \
3132 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3133 (__v4df)_mm256_setzero_pd(), \
3134 (__mmask8) - 1))
3135
3136#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3137 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3138 (__v4df)(__m256d)(W), \
3139 (__mmask8)(U)))
3140
3141#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3142 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3143 (__v4df)_mm256_setzero_pd(), \
3144 (__mmask8)(U)))
3145
3146#define _mm512_extractf32x4_ps(A, I) \
3147 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3148 (__v4sf)_mm_setzero_ps(), \
3149 (__mmask8) - 1))
3150
3151#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3152 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3153 (__v4sf)(__m128)(W), \
3154 (__mmask8)(U)))
3155
3156#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3157 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3158 (__v4sf)_mm_setzero_ps(), \
3159 (__mmask8)(U)))
3160
3161/* Vector Blend */
3162
3163static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3164_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
3165 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3166 (__v8df) __W,
3167 (__v8df) __A);
3168}
3169
3170static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3171_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) {
3172 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3173 (__v16sf) __W,
3174 (__v16sf) __A);
3175}
3176
3177static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3178_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
3179 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3180 (__v8di) __W,
3181 (__v8di) __A);
3182}
3183
3184static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3185_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
3186 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3187 (__v16si) __W,
3188 (__v16si) __A);
3189}
3190
3191/* Compare */
3192
3193#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3194 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3195 (__v16sf)(__m512)(B), (int)(P), \
3196 (__mmask16)-1, (int)(R)))
3197
3198#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3199 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3200 (__v16sf)(__m512)(B), (int)(P), \
3201 (__mmask16)(U), (int)(R)))
3202
3203#define _mm512_cmp_ps_mask(A, B, P) \
3204 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3205#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3206 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3207
3208#define _mm512_cmpeq_ps_mask(A, B) \
3209 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3210#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3211 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3212
3213#define _mm512_cmplt_ps_mask(A, B) \
3214 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3215#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3216 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3217
3218#define _mm512_cmple_ps_mask(A, B) \
3219 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3220#define _mm512_mask_cmple_ps_mask(k, A, B) \
3221 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3222
3223#define _mm512_cmpunord_ps_mask(A, B) \
3224 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3225#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3226 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3227
3228#define _mm512_cmpneq_ps_mask(A, B) \
3229 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3230#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3231 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3232
3233#define _mm512_cmpnlt_ps_mask(A, B) \
3234 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3235#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3236 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3237
3238#define _mm512_cmpnle_ps_mask(A, B) \
3239 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3240#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3241 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3242
3243#define _mm512_cmpord_ps_mask(A, B) \
3244 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3245#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3246 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3247
3248#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3249 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3250 (__v8df)(__m512d)(B), (int)(P), \
3251 (__mmask8)-1, (int)(R)))
3252
3253#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3254 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3255 (__v8df)(__m512d)(B), (int)(P), \
3256 (__mmask8)(U), (int)(R)))
3257
3258#define _mm512_cmp_pd_mask(A, B, P) \
3259 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3260#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3261 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3262
3263#define _mm512_cmpeq_pd_mask(A, B) \
3264 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3265#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3266 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3267
3268#define _mm512_cmplt_pd_mask(A, B) \
3269 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3270#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3271 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3272
3273#define _mm512_cmple_pd_mask(A, B) \
3274 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3275#define _mm512_mask_cmple_pd_mask(k, A, B) \
3276 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3277
3278#define _mm512_cmpunord_pd_mask(A, B) \
3279 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3280#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3281 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3282
3283#define _mm512_cmpneq_pd_mask(A, B) \
3284 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3285#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3286 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3287
3288#define _mm512_cmpnlt_pd_mask(A, B) \
3289 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3290#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3291 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3292
3293#define _mm512_cmpnle_pd_mask(A, B) \
3294 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3295#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3296 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3297
3298#define _mm512_cmpord_pd_mask(A, B) \
3299 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3300#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3301 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3302
3303/* Conversion */
3304
3305#define _mm512_cvtt_roundps_epu32(A, R) \
3306 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3307 (__v16si)_mm512_undefined_epi32(), \
3308 (__mmask16)-1, (int)(R)))
3309
3310#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3311 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3312 (__v16si)(__m512i)(W), \
3313 (__mmask16)(U), (int)(R)))
3314
3315#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3316 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3317 (__v16si)_mm512_setzero_si512(), \
3318 (__mmask16)(U), (int)(R)))
3319
3320
3321static __inline __m512i __DEFAULT_FN_ATTRS512
3323{
3324 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3325 (__v16si)
3327 (__mmask16) -1,
3329}
3330
3331static __inline__ __m512i __DEFAULT_FN_ATTRS512
3332_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3333{
3334 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3335 (__v16si) __W,
3336 (__mmask16) __U,
3338}
3339
3340static __inline__ __m512i __DEFAULT_FN_ATTRS512
3342{
3343 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3344 (__v16si) _mm512_setzero_si512 (),
3345 (__mmask16) __U,
3347}
3348
3349#define _mm512_cvt_roundepi32_ps(A, R) \
3350 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3351 (__v16sf)_mm512_setzero_ps(), \
3352 (__mmask16)-1, (int)(R)))
3353
3354#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3355 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3356 (__v16sf)(__m512)(W), \
3357 (__mmask16)(U), (int)(R)))
3358
3359#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3360 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3361 (__v16sf)_mm512_setzero_ps(), \
3362 (__mmask16)(U), (int)(R)))
3363
3364#define _mm512_cvt_roundepu32_ps(A, R) \
3365 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3366 (__v16sf)_mm512_setzero_ps(), \
3367 (__mmask16)-1, (int)(R)))
3368
3369#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3370 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3371 (__v16sf)(__m512)(W), \
3372 (__mmask16)(U), (int)(R)))
3373
3374#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3375 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3376 (__v16sf)_mm512_setzero_ps(), \
3377 (__mmask16)(U), (int)(R)))
3378
3379static __inline__ __m512
3381 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3382}
3383
3384static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3385_mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
3386 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3387 (__v16sf)_mm512_cvtepu32_ps(__A),
3388 (__v16sf)__W);
3389}
3390
3391static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3393 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3394 (__v16sf)_mm512_cvtepu32_ps(__A),
3395 (__v16sf)_mm512_setzero_ps());
3396}
3397
3398static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3400 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3401}
3402
3403static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3404_mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
3405 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3406 (__v8df)_mm512_cvtepi32_pd(__A),
3407 (__v8df)__W);
3408}
3409
3410static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3412 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3413 (__v8df)_mm512_cvtepi32_pd(__A),
3414 (__v8df)_mm512_setzero_pd());
3415}
3416
3417static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3419 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3420}
3421
3422static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3423_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
3424 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3425}
3426
3427static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3429 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3430}
3431
3432static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3433_mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
3434 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3435 (__v16sf)_mm512_cvtepi32_ps(__A),
3436 (__v16sf)__W);
3437}
3438
3439static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3441 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3442 (__v16sf)_mm512_cvtepi32_ps(__A),
3443 (__v16sf)_mm512_setzero_ps());
3444}
3445
3446static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3448 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3449}
3450
3451static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3452_mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
3453 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3454 (__v8df)_mm512_cvtepu32_pd(__A),
3455 (__v8df)__W);
3456}
3457
3458static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3460 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3461 (__v8df)_mm512_cvtepu32_pd(__A),
3462 (__v8df)_mm512_setzero_pd());
3463}
3464
3465static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3467 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3468}
3469
3470static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3471_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
3472 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3473}
3474
3475#define _mm512_cvt_roundpd_ps(A, R) \
3476 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3477 (__v8sf)_mm256_setzero_ps(), \
3478 (__mmask8)-1, (int)(R)))
3479
3480#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3481 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3482 (__v8sf)(__m256)(W), (__mmask8)(U), \
3483 (int)(R)))
3484
3485#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3486 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3487 (__v8sf)_mm256_setzero_ps(), \
3488 (__mmask8)(U), (int)(R)))
3489
3490static __inline__ __m256
3492 return (__m256)__builtin_ia32_cvtpd2ps512_mask(
3493 (__v8df)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)-1,
3495}
3496
3497static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
3498_mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A) {
3499 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3500 (__v8sf) __W,
3501 (__mmask8) __U,
3503}
3504
3505static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
3507 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3508 (__v8sf) _mm256_setzero_ps (),
3509 (__mmask8) __U,
3511}
3512
3513static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3514_mm512_cvtpd_pslo(__m512d __A) {
3515 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3516 (__v8sf) _mm256_setzero_ps (),
3517 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3518}
3519
3520static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3521_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) {
3522 return (__m512) __builtin_shufflevector (
3524 __U, __A),
3525 (__v8sf) _mm256_setzero_ps (),
3526 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3527}
3528
3529#define _mm512_cvt_roundps_ph(A, I) \
3530 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3531 (__v16hi)_mm256_undefined_si256(), \
3532 (__mmask16)-1))
3533
3534#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3535 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3536 (__v16hi)(__m256i)(U), \
3537 (__mmask16)(W)))
3538
3539#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3540 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3541 (__v16hi)_mm256_setzero_si256(), \
3542 (__mmask16)(W)))
3543
3544#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3545#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3546#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3547
3548#define _mm512_cvt_roundph_ps(A, R) \
3549 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3550 (__v16sf)_mm512_undefined_ps(), \
3551 (__mmask16)-1, (int)(R)))
3552
3553#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3554 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3555 (__v16sf)(__m512)(W), \
3556 (__mmask16)(U), (int)(R)))
3557
3558#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3559 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3560 (__v16sf)_mm512_setzero_ps(), \
3561 (__mmask16)(U), (int)(R)))
3562
3563
3564static __inline __m512 __DEFAULT_FN_ATTRS512
3566{
3567 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3568 (__v16sf)
3570 (__mmask16) -1,
3572}
3573
3574static __inline__ __m512 __DEFAULT_FN_ATTRS512
3575_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3576{
3577 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3578 (__v16sf) __W,
3579 (__mmask16) __U,
3581}
3582
3583static __inline__ __m512 __DEFAULT_FN_ATTRS512
3585{
3586 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3587 (__v16sf) _mm512_setzero_ps (),
3588 (__mmask16) __U,
3590}
3591
3592#define _mm512_cvtt_roundpd_epi32(A, R) \
3593 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3594 (__v8si)_mm256_setzero_si256(), \
3595 (__mmask8)-1, (int)(R)))
3596
3597#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3598 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3599 (__v8si)(__m256i)(W), \
3600 (__mmask8)(U), (int)(R)))
3601
3602#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3603 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3604 (__v8si)_mm256_setzero_si256(), \
3605 (__mmask8)(U), (int)(R)))
3606
3607static __inline __m256i __DEFAULT_FN_ATTRS512
3609{
3610 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3611 (__v8si)_mm256_setzero_si256(),
3612 (__mmask8) -1,
3614}
3615
3616static __inline__ __m256i __DEFAULT_FN_ATTRS512
3617_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3618{
3619 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3620 (__v8si) __W,
3621 (__mmask8) __U,
3623}
3624
3625static __inline__ __m256i __DEFAULT_FN_ATTRS512
3627{
3628 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3629 (__v8si) _mm256_setzero_si256 (),
3630 (__mmask8) __U,
3632}
3633
3634#define _mm512_cvtt_roundps_epi32(A, R) \
3635 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3636 (__v16si)_mm512_setzero_si512(), \
3637 (__mmask16)-1, (int)(R)))
3638
3639#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3640 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3641 (__v16si)(__m512i)(W), \
3642 (__mmask16)(U), (int)(R)))
3643
3644#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3645 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3646 (__v16si)_mm512_setzero_si512(), \
3647 (__mmask16)(U), (int)(R)))
3648
3649static __inline __m512i __DEFAULT_FN_ATTRS512
3651{
3652 return (__m512i)
3653 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3654 (__v16si) _mm512_setzero_si512 (),
3656}
3657
3658static __inline__ __m512i __DEFAULT_FN_ATTRS512
3659_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3660{
3661 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3662 (__v16si) __W,
3663 (__mmask16) __U,
3665}
3666
3667static __inline__ __m512i __DEFAULT_FN_ATTRS512
3669{
3670 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3671 (__v16si) _mm512_setzero_si512 (),
3672 (__mmask16) __U,
3674}
3675
3676#define _mm512_cvt_roundps_epi32(A, R) \
3677 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3678 (__v16si)_mm512_setzero_si512(), \
3679 (__mmask16)-1, (int)(R)))
3680
3681#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3682 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3683 (__v16si)(__m512i)(W), \
3684 (__mmask16)(U), (int)(R)))
3685
3686#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3687 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3688 (__v16si)_mm512_setzero_si512(), \
3689 (__mmask16)(U), (int)(R)))
3690
3691static __inline__ __m512i __DEFAULT_FN_ATTRS512
3693{
3694 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3695 (__v16si) _mm512_undefined_epi32 (),
3696 (__mmask16) -1,
3698}
3699
3700static __inline__ __m512i __DEFAULT_FN_ATTRS512
3701_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3702{
3703 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3704 (__v16si) __W,
3705 (__mmask16) __U,
3707}
3708
3709static __inline__ __m512i __DEFAULT_FN_ATTRS512
3711{
3712 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3713 (__v16si)
3715 (__mmask16) __U,
3717}
3718
3719#define _mm512_cvt_roundpd_epi32(A, R) \
3720 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3721 (__v8si)_mm256_setzero_si256(), \
3722 (__mmask8)-1, (int)(R)))
3723
3724#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3725 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3726 (__v8si)(__m256i)(W), \
3727 (__mmask8)(U), (int)(R)))
3728
3729#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3730 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3731 (__v8si)_mm256_setzero_si256(), \
3732 (__mmask8)(U), (int)(R)))
3733
3734static __inline__ __m256i __DEFAULT_FN_ATTRS512
3736{
3737 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3738 (__v8si)
3740 (__mmask8) -1,
3742}
3743
3744static __inline__ __m256i __DEFAULT_FN_ATTRS512
3745_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3746{
3747 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3748 (__v8si) __W,
3749 (__mmask8) __U,
3751}
3752
3753static __inline__ __m256i __DEFAULT_FN_ATTRS512
3755{
3756 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3757 (__v8si)
3759 (__mmask8) __U,
3761}
3762
3763#define _mm512_cvt_roundps_epu32(A, R) \
3764 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3765 (__v16si)_mm512_setzero_si512(), \
3766 (__mmask16)-1, (int)(R)))
3767
3768#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
3769 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3770 (__v16si)(__m512i)(W), \
3771 (__mmask16)(U), (int)(R)))
3772
3773#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
3774 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3775 (__v16si)_mm512_setzero_si512(), \
3776 (__mmask16)(U), (int)(R)))
3777
3778static __inline__ __m512i __DEFAULT_FN_ATTRS512
3780{
3781 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
3782 (__v16si)\
3784 (__mmask16) -1,\
3786}
3787
3788static __inline__ __m512i __DEFAULT_FN_ATTRS512
3789_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3790{
3791 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3792 (__v16si) __W,
3793 (__mmask16) __U,
3795}
3796
3797static __inline__ __m512i __DEFAULT_FN_ATTRS512
3799{
3800 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3801 (__v16si)
3803 (__mmask16) __U ,
3805}
3806
3807#define _mm512_cvt_roundpd_epu32(A, R) \
3808 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3809 (__v8si)_mm256_setzero_si256(), \
3810 (__mmask8)-1, (int)(R)))
3811
3812#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
3813 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3814 (__v8si)(__m256i)(W), \
3815 (__mmask8)(U), (int)(R)))
3816
3817#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
3818 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3819 (__v8si)_mm256_setzero_si256(), \
3820 (__mmask8)(U), (int)(R)))
3821
3822static __inline__ __m256i __DEFAULT_FN_ATTRS512
3824{
3825 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3826 (__v8si)
3828 (__mmask8) -1,
3830}
3831
3832static __inline__ __m256i __DEFAULT_FN_ATTRS512
3833_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
3834{
3835 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3836 (__v8si) __W,
3837 (__mmask8) __U,
3839}
3840
3841static __inline__ __m256i __DEFAULT_FN_ATTRS512
3843{
3844 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3845 (__v8si)
3847 (__mmask8) __U,
3849}
3850
3851static __inline__ double __DEFAULT_FN_ATTRS512
3853{
3854 return __a[0];
3855}
3856
3857static __inline__ float __DEFAULT_FN_ATTRS512
3859{
3860 return __a[0];
3861}
3862
3863/* Unpack and Interleave */
3864
3865static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3866_mm512_unpackhi_pd(__m512d __a, __m512d __b) {
3867 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
3868 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
3869}
3870
3871static __inline__ __m512d __DEFAULT_FN_ATTRS512
3872_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
3873{
3874 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3875 (__v8df)_mm512_unpackhi_pd(__A, __B),
3876 (__v8df)__W);
3877}
3878
3879static __inline__ __m512d __DEFAULT_FN_ATTRS512
3880_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
3881{
3882 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3883 (__v8df)_mm512_unpackhi_pd(__A, __B),
3884 (__v8df)_mm512_setzero_pd());
3885}
3886
3887static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3888_mm512_unpacklo_pd(__m512d __a, __m512d __b) {
3889 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
3890 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
3891}
3892
3893static __inline__ __m512d __DEFAULT_FN_ATTRS512
3894_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
3895{
3896 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3897 (__v8df)_mm512_unpacklo_pd(__A, __B),
3898 (__v8df)__W);
3899}
3900
3901static __inline__ __m512d __DEFAULT_FN_ATTRS512
3902_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
3903{
3904 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3905 (__v8df)_mm512_unpacklo_pd(__A, __B),
3906 (__v8df)_mm512_setzero_pd());
3907}
3908
3909static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3910_mm512_unpackhi_ps(__m512 __a, __m512 __b) {
3911 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
3912 2, 18, 3, 19,
3913 2+4, 18+4, 3+4, 19+4,
3914 2+8, 18+8, 3+8, 19+8,
3915 2+12, 18+12, 3+12, 19+12);
3916}
3917
3918static __inline__ __m512 __DEFAULT_FN_ATTRS512
3919_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
3920{
3921 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3922 (__v16sf)_mm512_unpackhi_ps(__A, __B),
3923 (__v16sf)__W);
3924}
3925
3926static __inline__ __m512 __DEFAULT_FN_ATTRS512
3927_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
3928{
3929 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3930 (__v16sf)_mm512_unpackhi_ps(__A, __B),
3931 (__v16sf)_mm512_setzero_ps());
3932}
3933
3934static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3935_mm512_unpacklo_ps(__m512 __a, __m512 __b) {
3936 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
3937 0, 16, 1, 17,
3938 0+4, 16+4, 1+4, 17+4,
3939 0+8, 16+8, 1+8, 17+8,
3940 0+12, 16+12, 1+12, 17+12);
3941}
3942
3943static __inline__ __m512 __DEFAULT_FN_ATTRS512
3944_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
3945{
3946 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3947 (__v16sf)_mm512_unpacklo_ps(__A, __B),
3948 (__v16sf)__W);
3949}
3950
3951static __inline__ __m512 __DEFAULT_FN_ATTRS512
3952_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
3953{
3954 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3955 (__v16sf)_mm512_unpacklo_ps(__A, __B),
3956 (__v16sf)_mm512_setzero_ps());
3957}
3958
3959static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3960_mm512_unpackhi_epi32(__m512i __A, __m512i __B) {
3961 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3962 2, 18, 3, 19,
3963 2+4, 18+4, 3+4, 19+4,
3964 2+8, 18+8, 3+8, 19+8,
3965 2+12, 18+12, 3+12, 19+12);
3966}
3967
3968static __inline__ __m512i __DEFAULT_FN_ATTRS512
3969_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
3970{
3971 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3972 (__v16si)_mm512_unpackhi_epi32(__A, __B),
3973 (__v16si)__W);
3974}
3975
3976static __inline__ __m512i __DEFAULT_FN_ATTRS512
3977_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
3978{
3979 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3980 (__v16si)_mm512_unpackhi_epi32(__A, __B),
3981 (__v16si)_mm512_setzero_si512());
3982}
3983
3984static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3985_mm512_unpacklo_epi32(__m512i __A, __m512i __B) {
3986 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3987 0, 16, 1, 17,
3988 0+4, 16+4, 1+4, 17+4,
3989 0+8, 16+8, 1+8, 17+8,
3990 0+12, 16+12, 1+12, 17+12);
3991}
3992
3993static __inline__ __m512i __DEFAULT_FN_ATTRS512
3994_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
3995{
3996 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3997 (__v16si)_mm512_unpacklo_epi32(__A, __B),
3998 (__v16si)__W);
3999}
4000
4001static __inline__ __m512i __DEFAULT_FN_ATTRS512
4002_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4003{
4004 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4005 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4006 (__v16si)_mm512_setzero_si512());
4007}
4008
4009static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4010_mm512_unpackhi_epi64(__m512i __A, __m512i __B) {
4011 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4012 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4013}
4014
4015static __inline__ __m512i __DEFAULT_FN_ATTRS512
4016_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4017{
4018 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4019 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4020 (__v8di)__W);
4021}
4022
4023static __inline__ __m512i __DEFAULT_FN_ATTRS512
4024_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4025{
4026 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4027 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4028 (__v8di)_mm512_setzero_si512());
4029}
4030
4031static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4032_mm512_unpacklo_epi64(__m512i __A, __m512i __B) {
4033 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4034 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4035}
4036
4037static __inline__ __m512i __DEFAULT_FN_ATTRS512
4038_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4039{
4040 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4041 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4042 (__v8di)__W);
4043}
4044
4045static __inline__ __m512i __DEFAULT_FN_ATTRS512
4046_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4047{
4048 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4049 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4050 (__v8di)_mm512_setzero_si512());
4051}
4052
4053
4054/* SIMD load ops */
4055
4056static __inline __m512i __DEFAULT_FN_ATTRS512
4058{
4059 struct __loadu_si512 {
4060 __m512i_u __v;
4061 } __attribute__((__packed__, __may_alias__));
4062 return ((const struct __loadu_si512*)__P)->__v;
4063}
4064
4065static __inline __m512i __DEFAULT_FN_ATTRS512
4067{
4068 struct __loadu_epi32 {
4069 __m512i_u __v;
4070 } __attribute__((__packed__, __may_alias__));
4071 return ((const struct __loadu_epi32*)__P)->__v;
4072}
4073
4074static __inline __m512i __DEFAULT_FN_ATTRS512
4075_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4076{
4077 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4078 (__v16si) __W,
4079 (__mmask16) __U);
4080}
4081
4082
4083static __inline __m512i __DEFAULT_FN_ATTRS512
4085{
4086 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4087 (__v16si)
4089 (__mmask16) __U);
4090}
4091
4092static __inline __m512i __DEFAULT_FN_ATTRS512
4094{
4095 struct __loadu_epi64 {
4096 __m512i_u __v;
4097 } __attribute__((__packed__, __may_alias__));
4098 return ((const struct __loadu_epi64*)__P)->__v;
4099}
4100
4101static __inline __m512i __DEFAULT_FN_ATTRS512
4102_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4103{
4104 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4105 (__v8di) __W,
4106 (__mmask8) __U);
4107}
4108
4109static __inline __m512i __DEFAULT_FN_ATTRS512
4111{
4112 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4113 (__v8di)
4115 (__mmask8) __U);
4116}
4117
4118static __inline __m512 __DEFAULT_FN_ATTRS512
4119_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4120{
4121 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4122 (__v16sf) __W,
4123 (__mmask16) __U);
4124}
4125
4126static __inline __m512 __DEFAULT_FN_ATTRS512
4128{
4129 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4130 (__v16sf)
4132 (__mmask16) __U);
4133}
4134
4135static __inline __m512d __DEFAULT_FN_ATTRS512
4136_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4137{
4138 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4139 (__v8df) __W,
4140 (__mmask8) __U);
4141}
4142
4143static __inline __m512d __DEFAULT_FN_ATTRS512
4145{
4146 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4147 (__v8df)
4149 (__mmask8) __U);
4150}
4151
4152static __inline __m512d __DEFAULT_FN_ATTRS512
4154{
4155 struct __loadu_pd {
4156 __m512d_u __v;
4157 } __attribute__((__packed__, __may_alias__));
4158 return ((const struct __loadu_pd*)__p)->__v;
4159}
4160
4161static __inline __m512 __DEFAULT_FN_ATTRS512
4163{
4164 struct __loadu_ps {
4165 __m512_u __v;
4166 } __attribute__((__packed__, __may_alias__));
4167 return ((const struct __loadu_ps*)__p)->__v;
4168}
4169
4170static __inline __m512 __DEFAULT_FN_ATTRS512
4172{
4173 return *(const __m512*)__p;
4174}
4175
4176static __inline __m512 __DEFAULT_FN_ATTRS512
4177_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4178{
4179 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4180 (__v16sf) __W,
4181 (__mmask16) __U);
4182}
4183
4184static __inline __m512 __DEFAULT_FN_ATTRS512
4186{
4187 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4188 (__v16sf)
4190 (__mmask16) __U);
4191}
4192
4193static __inline __m512d __DEFAULT_FN_ATTRS512
4195{
4196 return *(const __m512d*)__p;
4197}
4198
4199static __inline __m512d __DEFAULT_FN_ATTRS512
4200_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4201{
4202 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4203 (__v8df) __W,
4204 (__mmask8) __U);
4205}
4206
4207static __inline __m512d __DEFAULT_FN_ATTRS512
4209{
4210 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4211 (__v8df)
4213 (__mmask8) __U);
4214}
4215
4216static __inline __m512i __DEFAULT_FN_ATTRS512
4218{
4219 return *(const __m512i *) __P;
4220}
4221
4222static __inline __m512i __DEFAULT_FN_ATTRS512
4224{
4225 return *(const __m512i *) __P;
4226}
4227
4228static __inline __m512i __DEFAULT_FN_ATTRS512
4230{
4231 return *(const __m512i *) __P;
4232}
4233
4234/* SIMD store ops */
4235
4236static __inline void __DEFAULT_FN_ATTRS512
4237_mm512_storeu_epi64 (void *__P, __m512i __A)
4238{
4239 struct __storeu_epi64 {
4240 __m512i_u __v;
4241 } __attribute__((__packed__, __may_alias__));
4242 ((struct __storeu_epi64*)__P)->__v = __A;
4243}
4244
4245static __inline void __DEFAULT_FN_ATTRS512
4246_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4247{
4248 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4249 (__mmask8) __U);
4250}
4251
4252static __inline void __DEFAULT_FN_ATTRS512
4253_mm512_storeu_si512 (void *__P, __m512i __A)
4254{
4255 struct __storeu_si512 {
4256 __m512i_u __v;
4257 } __attribute__((__packed__, __may_alias__));
4258 ((struct __storeu_si512*)__P)->__v = __A;
4259}
4260
4261static __inline void __DEFAULT_FN_ATTRS512
4262_mm512_storeu_epi32 (void *__P, __m512i __A)
4263{
4264 struct __storeu_epi32 {
4265 __m512i_u __v;
4266 } __attribute__((__packed__, __may_alias__));
4267 ((struct __storeu_epi32*)__P)->__v = __A;
4268}
4269
4270static __inline void __DEFAULT_FN_ATTRS512
4272{
4273 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4274 (__mmask16) __U);
4275}
4276
4277static __inline void __DEFAULT_FN_ATTRS512
4278_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4279{
4280 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4281}
4282
4283static __inline void __DEFAULT_FN_ATTRS512
4284_mm512_storeu_pd(void *__P, __m512d __A)
4285{
4286 struct __storeu_pd {
4287 __m512d_u __v;
4288 } __attribute__((__packed__, __may_alias__));
4289 ((struct __storeu_pd*)__P)->__v = __A;
4290}
4291
4292static __inline void __DEFAULT_FN_ATTRS512
4293_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4294{
4295 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4296 (__mmask16) __U);
4297}
4298
4299static __inline void __DEFAULT_FN_ATTRS512
4300_mm512_storeu_ps(void *__P, __m512 __A)
4301{
4302 struct __storeu_ps {
4303 __m512_u __v;
4304 } __attribute__((__packed__, __may_alias__));
4305 ((struct __storeu_ps*)__P)->__v = __A;
4306}
4307
4308static __inline void __DEFAULT_FN_ATTRS512
4309_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4310{
4311 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4312}
4313
4314static __inline void __DEFAULT_FN_ATTRS512
4315_mm512_store_pd(void *__P, __m512d __A)
4316{
4317 *(__m512d*)__P = __A;
4318}
4319
4320static __inline void __DEFAULT_FN_ATTRS512
4321_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4322{
4323 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4324 (__mmask16) __U);
4325}
4326
4327static __inline void __DEFAULT_FN_ATTRS512
4328_mm512_store_ps(void *__P, __m512 __A)
4329{
4330 *(__m512*)__P = __A;
4331}
4332
4333static __inline void __DEFAULT_FN_ATTRS512
4334_mm512_store_si512 (void *__P, __m512i __A)
4335{
4336 *(__m512i *) __P = __A;
4337}
4338
4339static __inline void __DEFAULT_FN_ATTRS512
4340_mm512_store_epi32 (void *__P, __m512i __A)
4341{
4342 *(__m512i *) __P = __A;
4343}
4344
4345static __inline void __DEFAULT_FN_ATTRS512
4346_mm512_store_epi64 (void *__P, __m512i __A)
4347{
4348 *(__m512i *) __P = __A;
4349}
4350
4351/* Mask ops */
4352
4355 return __builtin_ia32_knothi(__M);
4356}
4357
4358/* Integer compare */
4359
4360#define _mm512_cmpeq_epi32_mask(A, B) \
4361 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4362#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4363 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4364#define _mm512_cmpge_epi32_mask(A, B) \
4365 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4366#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4367 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4368#define _mm512_cmpgt_epi32_mask(A, B) \
4369 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4370#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4371 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4372#define _mm512_cmple_epi32_mask(A, B) \
4373 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4374#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4375 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4376#define _mm512_cmplt_epi32_mask(A, B) \
4377 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4378#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4379 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4380#define _mm512_cmpneq_epi32_mask(A, B) \
4381 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4382#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4383 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4384
4385#define _mm512_cmpeq_epu32_mask(A, B) \
4386 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4387#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4388 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4389#define _mm512_cmpge_epu32_mask(A, B) \
4390 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4391#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4392 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4393#define _mm512_cmpgt_epu32_mask(A, B) \
4394 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4395#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4396 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4397#define _mm512_cmple_epu32_mask(A, B) \
4398 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4399#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4400 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4401#define _mm512_cmplt_epu32_mask(A, B) \
4402 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4403#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4404 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4405#define _mm512_cmpneq_epu32_mask(A, B) \
4406 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4407#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4408 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4409
4410#define _mm512_cmpeq_epi64_mask(A, B) \
4411 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4412#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4413 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4414#define _mm512_cmpge_epi64_mask(A, B) \
4415 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4416#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4417 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4418#define _mm512_cmpgt_epi64_mask(A, B) \
4419 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4420#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4421 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4422#define _mm512_cmple_epi64_mask(A, B) \
4423 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4424#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4425 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4426#define _mm512_cmplt_epi64_mask(A, B) \
4427 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4428#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4429 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4430#define _mm512_cmpneq_epi64_mask(A, B) \
4431 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4432#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4433 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4434
4435#define _mm512_cmpeq_epu64_mask(A, B) \
4436 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4437#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4438 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4439#define _mm512_cmpge_epu64_mask(A, B) \
4440 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4441#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4442 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4443#define _mm512_cmpgt_epu64_mask(A, B) \
4444 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4445#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4446 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4447#define _mm512_cmple_epu64_mask(A, B) \
4448 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4449#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4450 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4451#define _mm512_cmplt_epu64_mask(A, B) \
4452 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4453#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4454 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4455#define _mm512_cmpneq_epu64_mask(A, B) \
4456 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4457#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4458 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4459
4460static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4462 /* This function always performs a signed extension, but __v16qi is a char
4463 which may be signed or unsigned, so use __v16qs. */
4464 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4465}
4466
4467static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4468_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
4469 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4470 (__v16si)_mm512_cvtepi8_epi32(__A),
4471 (__v16si)__W);
4472}
4473
4474static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4476 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4477 (__v16si)_mm512_cvtepi8_epi32(__A),
4478 (__v16si)_mm512_setzero_si512());
4479}
4480
4481static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4483 /* This function always performs a signed extension, but __v16qi is a char
4484 which may be signed or unsigned, so use __v16qs. */
4485 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4486}
4487
4488static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4489_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4490 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4491 (__v8di)_mm512_cvtepi8_epi64(__A),
4492 (__v8di)__W);
4493}
4494
4495static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4497 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4498 (__v8di)_mm512_cvtepi8_epi64(__A),
4499 (__v8di)_mm512_setzero_si512 ());
4500}
4501
4502static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4504 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4505}
4506
4507static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4508_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
4509 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4510 (__v8di)_mm512_cvtepi32_epi64(__X),
4511 (__v8di)__W);
4512}
4513
4514static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4516 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4517 (__v8di)_mm512_cvtepi32_epi64(__X),
4518 (__v8di)_mm512_setzero_si512());
4519}
4520
4521static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4523 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4524}
4525
4526static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4527_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
4528 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4529 (__v16si)_mm512_cvtepi16_epi32(__A),
4530 (__v16si)__W);
4531}
4532
4533static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4535 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4536 (__v16si)_mm512_cvtepi16_epi32(__A),
4537 (__v16si)_mm512_setzero_si512 ());
4538}
4539
4540static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4542 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4543}
4544
4545static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4546_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4547 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4548 (__v8di)_mm512_cvtepi16_epi64(__A),
4549 (__v8di)__W);
4550}
4551
4552static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4554 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4555 (__v8di)_mm512_cvtepi16_epi64(__A),
4556 (__v8di)_mm512_setzero_si512());
4557}
4558
4559static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4561 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4562}
4563
4564static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4565_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
4566 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4567 (__v16si)_mm512_cvtepu8_epi32(__A),
4568 (__v16si)__W);
4569}
4570
4571static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4573 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4574 (__v16si)_mm512_cvtepu8_epi32(__A),
4575 (__v16si)_mm512_setzero_si512());
4576}
4577
4578static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4580 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4581}
4582
4583static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4584_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4585 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4586 (__v8di)_mm512_cvtepu8_epi64(__A),
4587 (__v8di)__W);
4588}
4589
4590static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4592 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4593 (__v8di)_mm512_cvtepu8_epi64(__A),
4594 (__v8di)_mm512_setzero_si512());
4595}
4596
4597static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4599 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4600}
4601
4602static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4603_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
4604 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4605 (__v8di)_mm512_cvtepu32_epi64(__X),
4606 (__v8di)__W);
4607}
4608
4609static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4611 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4612 (__v8di)_mm512_cvtepu32_epi64(__X),
4613 (__v8di)_mm512_setzero_si512());
4614}
4615
4616static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4618 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4619}
4620
4621static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4622_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
4623 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4624 (__v16si)_mm512_cvtepu16_epi32(__A),
4625 (__v16si)__W);
4626}
4627
4628static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4630 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4631 (__v16si)_mm512_cvtepu16_epi32(__A),
4632 (__v16si)_mm512_setzero_si512());
4633}
4634
4635static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4637 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4638}
4639
4640static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4641_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4642 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4643 (__v8di)_mm512_cvtepu16_epi64(__A),
4644 (__v8di)__W);
4645}
4646
4647static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4649 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4650 (__v8di)_mm512_cvtepu16_epi64(__A),
4651 (__v8di)_mm512_setzero_si512());
4652}
4653
4654static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4655_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4656{
4657 return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B);
4658}
4659
4660static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4661_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4662{
4663 return (__m512i)__builtin_ia32_selectd_512(__U,
4664 (__v16si)_mm512_rorv_epi32(__A, __B),
4665 (__v16si)__W);
4666}
4667
4668static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4669_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4670{
4671 return (__m512i)__builtin_ia32_selectd_512(__U,
4672 (__v16si)_mm512_rorv_epi32(__A, __B),
4673 (__v16si)_mm512_setzero_si512());
4674}
4675
4676static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4677_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4678{
4679 return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B);
4680}
4681
4682static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4683_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4684{
4685 return (__m512i)__builtin_ia32_selectq_512(__U,
4686 (__v8di)_mm512_rorv_epi64(__A, __B),
4687 (__v8di)__W);
4688}
4689
4690static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4691_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4692{
4693 return (__m512i)__builtin_ia32_selectq_512(__U,
4694 (__v8di)_mm512_rorv_epi64(__A, __B),
4695 (__v8di)_mm512_setzero_si512());
4696}
4697
4698
4699
4700#define _mm512_cmp_epi32_mask(a, b, p) \
4701 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4702 (__v16si)(__m512i)(b), (int)(p), \
4703 (__mmask16)-1))
4704
4705#define _mm512_cmp_epu32_mask(a, b, p) \
4706 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4707 (__v16si)(__m512i)(b), (int)(p), \
4708 (__mmask16)-1))
4709
4710#define _mm512_cmp_epi64_mask(a, b, p) \
4711 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4712 (__v8di)(__m512i)(b), (int)(p), \
4713 (__mmask8)-1))
4714
4715#define _mm512_cmp_epu64_mask(a, b, p) \
4716 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4717 (__v8di)(__m512i)(b), (int)(p), \
4718 (__mmask8)-1))
4719
4720#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
4721 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4722 (__v16si)(__m512i)(b), (int)(p), \
4723 (__mmask16)(m)))
4724
4725#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
4726 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4727 (__v16si)(__m512i)(b), (int)(p), \
4728 (__mmask16)(m)))
4729
4730#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
4731 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4732 (__v8di)(__m512i)(b), (int)(p), \
4733 (__mmask8)(m)))
4734
4735#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
4736 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4737 (__v8di)(__m512i)(b), (int)(p), \
4738 (__mmask8)(m)))
4739
4740#define _mm512_rol_epi32(a, b) \
4741 ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
4742
4743#define _mm512_mask_rol_epi32(W, U, a, b) \
4744 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4745 (__v16si)_mm512_rol_epi32((a), (b)), \
4746 (__v16si)(__m512i)(W)))
4747
4748#define _mm512_maskz_rol_epi32(U, a, b) \
4749 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4750 (__v16si)_mm512_rol_epi32((a), (b)), \
4751 (__v16si)_mm512_setzero_si512()))
4752
4753#define _mm512_rol_epi64(a, b) \
4754 ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
4755
4756#define _mm512_mask_rol_epi64(W, U, a, b) \
4757 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4758 (__v8di)_mm512_rol_epi64((a), (b)), \
4759 (__v8di)(__m512i)(W)))
4760
4761#define _mm512_maskz_rol_epi64(U, a, b) \
4762 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4763 (__v8di)_mm512_rol_epi64((a), (b)), \
4764 (__v8di)_mm512_setzero_si512()))
4765
4766static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4767_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4768{
4769 return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B);
4770}
4771
4772static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4773_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4774{
4775 return (__m512i)__builtin_ia32_selectd_512(__U,
4776 (__v16si)_mm512_rolv_epi32(__A, __B),
4777 (__v16si)__W);
4778}
4779
4780static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4781_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4782{
4783 return (__m512i)__builtin_ia32_selectd_512(__U,
4784 (__v16si)_mm512_rolv_epi32(__A, __B),
4785 (__v16si)_mm512_setzero_si512());
4786}
4787
4788static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4789_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4790{
4791 return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B);
4792}
4793
4794static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4795_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4796{
4797 return (__m512i)__builtin_ia32_selectq_512(__U,
4798 (__v8di)_mm512_rolv_epi64(__A, __B),
4799 (__v8di)__W);
4800}
4801
4802static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4803_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4804{
4805 return (__m512i)__builtin_ia32_selectq_512(__U,
4806 (__v8di)_mm512_rolv_epi64(__A, __B),
4807 (__v8di)_mm512_setzero_si512());
4808}
4809
4810#define _mm512_ror_epi32(A, B) \
4811 ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
4812
4813#define _mm512_mask_ror_epi32(W, U, A, B) \
4814 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4815 (__v16si)_mm512_ror_epi32((A), (B)), \
4816 (__v16si)(__m512i)(W)))
4817
4818#define _mm512_maskz_ror_epi32(U, A, B) \
4819 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4820 (__v16si)_mm512_ror_epi32((A), (B)), \
4821 (__v16si)_mm512_setzero_si512()))
4822
4823#define _mm512_ror_epi64(A, B) \
4824 ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
4825
4826#define _mm512_mask_ror_epi64(W, U, A, B) \
4827 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4828 (__v8di)_mm512_ror_epi64((A), (B)), \
4829 (__v8di)(__m512i)(W)))
4830
4831#define _mm512_maskz_ror_epi64(U, A, B) \
4832 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4833 (__v8di)_mm512_ror_epi64((A), (B)), \
4834 (__v8di)_mm512_setzero_si512()))
4835
4836static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4837_mm512_slli_epi32(__m512i __A, unsigned int __B) {
4838 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B);
4839}
4840
4841static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4842_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
4843 unsigned int __B) {
4844 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4845 (__v16si)_mm512_slli_epi32(__A, __B),
4846 (__v16si)__W);
4847}
4848
4849static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4850_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
4851 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4852 (__v16si)_mm512_slli_epi32(__A, __B),
4853 (__v16si)_mm512_setzero_si512());
4854}
4855
4856static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4857_mm512_slli_epi64(__m512i __A, unsigned int __B) {
4858 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B);
4859}
4860
4861static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4862_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
4863 unsigned int __B) {
4864 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4865 (__v8di)_mm512_slli_epi64(__A, __B),
4866 (__v8di)__W);
4867}
4868
4869static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4870_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
4871 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4872 (__v8di)_mm512_slli_epi64(__A, __B),
4873 (__v8di)_mm512_setzero_si512());
4874}
4875
4876static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4877_mm512_srli_epi32(__m512i __A, unsigned int __B) {
4878 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B);
4879}
4880
4881static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4882_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
4883 unsigned int __B) {
4884 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4885 (__v16si)_mm512_srli_epi32(__A, __B),
4886 (__v16si)__W);
4887}
4888
4889static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4890_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
4891 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4892 (__v16si)_mm512_srli_epi32(__A, __B),
4893 (__v16si)_mm512_setzero_si512());
4894}
4895
4896static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4897_mm512_srli_epi64(__m512i __A, unsigned int __B) {
4898 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B);
4899}
4900
4901static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4902_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
4903 unsigned int __B) {
4904 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4905 (__v8di)_mm512_srli_epi64(__A, __B),
4906 (__v8di)__W);
4907}
4908
4909static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4910_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
4911 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4912 (__v8di)_mm512_srli_epi64(__A, __B),
4913 (__v8di)_mm512_setzero_si512());
4914}
4915
4916static __inline__ __m512i __DEFAULT_FN_ATTRS512
4917_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4918{
4919 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4920 (__v16si) __W,
4921 (__mmask16) __U);
4922}
4923
4924static __inline__ __m512i __DEFAULT_FN_ATTRS512
4926{
4927 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4928 (__v16si)
4930 (__mmask16) __U);
4931}
4932
4933static __inline__ void __DEFAULT_FN_ATTRS512
4934_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
4935{
4936 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
4937 (__mmask16) __U);
4938}
4939
4940static __inline__ __m512i __DEFAULT_FN_ATTRS512
4941_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
4942{
4943 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
4944 (__v16si) __A,
4945 (__v16si) __W);
4946}
4947
4948static __inline__ __m512i __DEFAULT_FN_ATTRS512
4950{
4951 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
4952 (__v16si) __A,
4953 (__v16si) _mm512_setzero_si512 ());
4954}
4955
4956static __inline__ __m512i __DEFAULT_FN_ATTRS512
4957_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
4958{
4959 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
4960 (__v8di) __A,
4961 (__v8di) __W);
4962}
4963
4964static __inline__ __m512i __DEFAULT_FN_ATTRS512
4966{
4967 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
4968 (__v8di) __A,
4969 (__v8di) _mm512_setzero_si512 ());
4970}
4971
4972static __inline__ __m512i __DEFAULT_FN_ATTRS512
4973_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4974{
4975 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4976 (__v8di) __W,
4977 (__mmask8) __U);
4978}
4979
4980static __inline__ __m512i __DEFAULT_FN_ATTRS512
4982{
4983 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4984 (__v8di)
4986 (__mmask8) __U);
4987}
4988
4989static __inline__ void __DEFAULT_FN_ATTRS512
4990_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
4991{
4992 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
4993 (__mmask8) __U);
4994}
4995
4996static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
4998{
4999 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5000 0, 0, 2, 2, 4, 4, 6, 6);
5001}
5002
5003static __inline__ __m512d __DEFAULT_FN_ATTRS512
5004_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5005{
5006 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5007 (__v8df)_mm512_movedup_pd(__A),
5008 (__v8df)__W);
5009}
5010
5011static __inline__ __m512d __DEFAULT_FN_ATTRS512
5013{
5014 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5015 (__v8df)_mm512_movedup_pd(__A),
5016 (__v8df)_mm512_setzero_pd());
5017}
5018
5019#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5020 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5021 (__v8df)(__m512d)(B), \
5022 (__v8di)(__m512i)(C), (int)(imm), \
5023 (__mmask8)-1, (int)(R)))
5024
5025#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5026 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5027 (__v8df)(__m512d)(B), \
5028 (__v8di)(__m512i)(C), (int)(imm), \
5029 (__mmask8)(U), (int)(R)))
5030
5031#define _mm512_fixupimm_pd(A, B, C, imm) \
5032 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5033 (__v8df)(__m512d)(B), \
5034 (__v8di)(__m512i)(C), (int)(imm), \
5035 (__mmask8)-1, \
5036 _MM_FROUND_CUR_DIRECTION))
5037
5038#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5039 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5040 (__v8df)(__m512d)(B), \
5041 (__v8di)(__m512i)(C), (int)(imm), \
5042 (__mmask8)(U), \
5043 _MM_FROUND_CUR_DIRECTION))
5044
5045#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5046 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5047 (__v8df)(__m512d)(B), \
5048 (__v8di)(__m512i)(C), \
5049 (int)(imm), (__mmask8)(U), \
5050 (int)(R)))
5051
5052#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5053 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5054 (__v8df)(__m512d)(B), \
5055 (__v8di)(__m512i)(C), \
5056 (int)(imm), (__mmask8)(U), \
5057 _MM_FROUND_CUR_DIRECTION))
5058
5059#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5060 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5061 (__v16sf)(__m512)(B), \
5062 (__v16si)(__m512i)(C), (int)(imm), \
5063 (__mmask16)-1, (int)(R)))
5064
5065#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5066 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5067 (__v16sf)(__m512)(B), \
5068 (__v16si)(__m512i)(C), (int)(imm), \
5069 (__mmask16)(U), (int)(R)))
5070
5071#define _mm512_fixupimm_ps(A, B, C, imm) \
5072 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5073 (__v16sf)(__m512)(B), \
5074 (__v16si)(__m512i)(C), (int)(imm), \
5075 (__mmask16)-1, \
5076 _MM_FROUND_CUR_DIRECTION))
5077
5078#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5079 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5080 (__v16sf)(__m512)(B), \
5081 (__v16si)(__m512i)(C), (int)(imm), \
5082 (__mmask16)(U), \
5083 _MM_FROUND_CUR_DIRECTION))
5084
5085#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5086 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5087 (__v16sf)(__m512)(B), \
5088 (__v16si)(__m512i)(C), \
5089 (int)(imm), (__mmask16)(U), \
5090 (int)(R)))
5091
5092#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5093 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5094 (__v16sf)(__m512)(B), \
5095 (__v16si)(__m512i)(C), \
5096 (int)(imm), (__mmask16)(U), \
5097 _MM_FROUND_CUR_DIRECTION))
5098
5099#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5100 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5101 (__v2df)(__m128d)(B), \
5102 (__v2di)(__m128i)(C), (int)(imm), \
5103 (__mmask8)-1, (int)(R)))
5104
5105#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5106 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5107 (__v2df)(__m128d)(B), \
5108 (__v2di)(__m128i)(C), (int)(imm), \
5109 (__mmask8)(U), (int)(R)))
5110
5111#define _mm_fixupimm_sd(A, B, C, imm) \
5112 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5113 (__v2df)(__m128d)(B), \
5114 (__v2di)(__m128i)(C), (int)(imm), \
5115 (__mmask8)-1, \
5116 _MM_FROUND_CUR_DIRECTION))
5117
5118#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5119 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5120 (__v2df)(__m128d)(B), \
5121 (__v2di)(__m128i)(C), (int)(imm), \
5122 (__mmask8)(U), \
5123 _MM_FROUND_CUR_DIRECTION))
5124
5125#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5126 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5127 (__v2df)(__m128d)(B), \
5128 (__v2di)(__m128i)(C), (int)(imm), \
5129 (__mmask8)(U), (int)(R)))
5130
5131#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5132 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5133 (__v2df)(__m128d)(B), \
5134 (__v2di)(__m128i)(C), (int)(imm), \
5135 (__mmask8)(U), \
5136 _MM_FROUND_CUR_DIRECTION))
5137
5138#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5139 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5140 (__v4sf)(__m128)(B), \
5141 (__v4si)(__m128i)(C), (int)(imm), \
5142 (__mmask8)-1, (int)(R)))
5143
5144#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5145 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5146 (__v4sf)(__m128)(B), \
5147 (__v4si)(__m128i)(C), (int)(imm), \
5148 (__mmask8)(U), (int)(R)))
5149
5150#define _mm_fixupimm_ss(A, B, C, imm) \
5151 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5152 (__v4sf)(__m128)(B), \
5153 (__v4si)(__m128i)(C), (int)(imm), \
5154 (__mmask8)-1, \
5155 _MM_FROUND_CUR_DIRECTION))
5156
5157#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5158 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5159 (__v4sf)(__m128)(B), \
5160 (__v4si)(__m128i)(C), (int)(imm), \
5161 (__mmask8)(U), \
5162 _MM_FROUND_CUR_DIRECTION))
5163
5164#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5165 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5166 (__v4sf)(__m128)(B), \
5167 (__v4si)(__m128i)(C), (int)(imm), \
5168 (__mmask8)(U), (int)(R)))
5169
5170#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5171 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5172 (__v4sf)(__m128)(B), \
5173 (__v4si)(__m128i)(C), (int)(imm), \
5174 (__mmask8)(U), \
5175 _MM_FROUND_CUR_DIRECTION))
5176
5177#define _mm_getexp_round_sd(A, B, R) \
5178 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5179 (__v2df)(__m128d)(B), \
5180 (__v2df)_mm_setzero_pd(), \
5181 (__mmask8)-1, (int)(R)))
5182
5183
5184static __inline__ __m128d __DEFAULT_FN_ATTRS128
5185_mm_getexp_sd (__m128d __A, __m128d __B)
5186{
5187 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5188 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5189}
5190
5191static __inline__ __m128d __DEFAULT_FN_ATTRS128
5192_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5193{
5194 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5195 (__v2df) __B,
5196 (__v2df) __W,
5197 (__mmask8) __U,
5199}
5200
5201#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5202 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5203 (__v2df)(__m128d)(B), \
5204 (__v2df)(__m128d)(W), \
5205 (__mmask8)(U), (int)(R)))
5206
5207static __inline__ __m128d __DEFAULT_FN_ATTRS128
5208_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5209{
5210 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5211 (__v2df) __B,
5212 (__v2df) _mm_setzero_pd (),
5213 (__mmask8) __U,
5215}
5216
5217#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5218 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5219 (__v2df)(__m128d)(B), \
5220 (__v2df)_mm_setzero_pd(), \
5221 (__mmask8)(U), (int)(R)))
5222
5223#define _mm_getexp_round_ss(A, B, R) \
5224 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5225 (__v4sf)(__m128)(B), \
5226 (__v4sf)_mm_setzero_ps(), \
5227 (__mmask8)-1, (int)(R)))
5228
5229static __inline__ __m128 __DEFAULT_FN_ATTRS128
5230_mm_getexp_ss (__m128 __A, __m128 __B)
5231{
5232 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5233 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5234}
5235
5236static __inline__ __m128 __DEFAULT_FN_ATTRS128
5237_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5238{
5239 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5240 (__v4sf) __B,
5241 (__v4sf) __W,
5242 (__mmask8) __U,
5244}
5245
5246#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5247 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5248 (__v4sf)(__m128)(B), \
5249 (__v4sf)(__m128)(W), \
5250 (__mmask8)(U), (int)(R)))
5251
5252static __inline__ __m128 __DEFAULT_FN_ATTRS128
5253_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5254{
5255 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5256 (__v4sf) __B,
5257 (__v4sf) _mm_setzero_ps (),
5258 (__mmask8) __U,
5260}
5261
5262#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5263 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5264 (__v4sf)(__m128)(B), \
5265 (__v4sf)_mm_setzero_ps(), \
5266 (__mmask8)(U), (int)(R)))
5267
5268#define _mm_getmant_round_sd(A, B, C, D, R) \
5269 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5270 (__v2df)(__m128d)(B), \
5271 (int)(((D)<<2) | (C)), \
5272 (__v2df)_mm_setzero_pd(), \
5273 (__mmask8)-1, (int)(R)))
5274
5275#define _mm_getmant_sd(A, B, C, D) \
5276 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5277 (__v2df)(__m128d)(B), \
5278 (int)(((D)<<2) | (C)), \
5279 (__v2df)_mm_setzero_pd(), \
5280 (__mmask8)-1, \
5281 _MM_FROUND_CUR_DIRECTION))
5282
5283#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5284 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5285 (__v2df)(__m128d)(B), \
5286 (int)(((D)<<2) | (C)), \
5287 (__v2df)(__m128d)(W), \
5288 (__mmask8)(U), \
5289 _MM_FROUND_CUR_DIRECTION))
5290
5291#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5292 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5293 (__v2df)(__m128d)(B), \
5294 (int)(((D)<<2) | (C)), \
5295 (__v2df)(__m128d)(W), \
5296 (__mmask8)(U), (int)(R)))
5297
5298#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5299 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5300 (__v2df)(__m128d)(B), \
5301 (int)(((D)<<2) | (C)), \
5302 (__v2df)_mm_setzero_pd(), \
5303 (__mmask8)(U), \
5304 _MM_FROUND_CUR_DIRECTION))
5305
5306#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5307 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5308 (__v2df)(__m128d)(B), \
5309 (int)(((D)<<2) | (C)), \
5310 (__v2df)_mm_setzero_pd(), \
5311 (__mmask8)(U), (int)(R)))
5312
5313#define _mm_getmant_round_ss(A, B, C, D, R) \
5314 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5315 (__v4sf)(__m128)(B), \
5316 (int)(((D)<<2) | (C)), \
5317 (__v4sf)_mm_setzero_ps(), \
5318 (__mmask8)-1, (int)(R)))
5319
5320#define _mm_getmant_ss(A, B, C, D) \
5321 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5322 (__v4sf)(__m128)(B), \
5323 (int)(((D)<<2) | (C)), \
5324 (__v4sf)_mm_setzero_ps(), \
5325 (__mmask8)-1, \
5326 _MM_FROUND_CUR_DIRECTION))
5327
5328#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5329 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5330 (__v4sf)(__m128)(B), \
5331 (int)(((D)<<2) | (C)), \
5332 (__v4sf)(__m128)(W), \
5333 (__mmask8)(U), \
5334 _MM_FROUND_CUR_DIRECTION))
5335
5336#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5337 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5338 (__v4sf)(__m128)(B), \
5339 (int)(((D)<<2) | (C)), \
5340 (__v4sf)(__m128)(W), \
5341 (__mmask8)(U), (int)(R)))
5342
5343#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5344 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5345 (__v4sf)(__m128)(B), \
5346 (int)(((D)<<2) | (C)), \
5347 (__v4sf)_mm_setzero_ps(), \
5348 (__mmask8)(U), \
5349 _MM_FROUND_CUR_DIRECTION))
5350
5351#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5352 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5353 (__v4sf)(__m128)(B), \
5354 (int)(((D)<<2) | (C)), \
5355 (__v4sf)_mm_setzero_ps(), \
5356 (__mmask8)(U), (int)(R)))
5357
5358static __inline__ __mmask16
5362
5363#define _mm_comi_round_sd(A, B, P, R) \
5364 ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5365 (int)(P), (int)(R)))
5366
5367#define _mm_comi_round_ss(A, B, P, R) \
5368 ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5369 (int)(P), (int)(R)))
5370
5371#ifdef __x86_64__
5372#define _mm_cvt_roundsd_si64(A, R) \
5373 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5374#endif
5375
5376static __inline__ __m512i
5378 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5379}
5380
5381static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5382_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
5383 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5384 (__v16si)_mm512_sll_epi32(__A, __B),
5385 (__v16si)__W);
5386}
5387
5388static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5389_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
5390 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5391 (__v16si)_mm512_sll_epi32(__A, __B),
5392 (__v16si)_mm512_setzero_si512());
5393}
5394
5395static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5396_mm512_sll_epi64(__m512i __A, __m128i __B) {
5397 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5398}
5399
5400static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5401_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
5402 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5403 (__v8di)_mm512_sll_epi64(__A, __B),
5404 (__v8di)__W);
5405}
5406
5407static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5408_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
5409 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5410 (__v8di)_mm512_sll_epi64(__A, __B),
5411 (__v8di)_mm512_setzero_si512());
5412}
5413
5414static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5415_mm512_sllv_epi32(__m512i __X, __m512i __Y) {
5416 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5417}
5418
5419static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5420_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5421 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5422 (__v16si)_mm512_sllv_epi32(__X, __Y),
5423 (__v16si)__W);
5424}
5425
5426static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5427_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5428 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5429 (__v16si)_mm512_sllv_epi32(__X, __Y),
5430 (__v16si)_mm512_setzero_si512());
5431}
5432
5433static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5434_mm512_sllv_epi64(__m512i __X, __m512i __Y)
5435{
5436 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5437}
5438
5439static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5440_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5441{
5442 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5443 (__v8di)_mm512_sllv_epi64(__X, __Y),
5444 (__v8di)__W);
5445}
5446
5447static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5448_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5449{
5450 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5451 (__v8di)_mm512_sllv_epi64(__X, __Y),
5452 (__v8di)_mm512_setzero_si512());
5453}
5454
5455static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5456_mm512_sra_epi32(__m512i __A, __m128i __B) {
5457 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5458}
5459
5460static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5461_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
5462 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5463 (__v16si)_mm512_sra_epi32(__A, __B),
5464 (__v16si)__W);
5465}
5466
5467static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5468_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
5469 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5470 (__v16si)_mm512_sra_epi32(__A, __B),
5471 (__v16si)_mm512_setzero_si512());
5472}
5473
5474static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5475_mm512_sra_epi64(__m512i __A, __m128i __B) {
5476 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5477}
5478
5479static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5480_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
5481 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5482 (__v8di)_mm512_sra_epi64(__A, __B),
5483 (__v8di)__W);
5484}
5485
5486static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5487_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
5488 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5489 (__v8di)_mm512_sra_epi64(__A, __B),
5490 (__v8di)_mm512_setzero_si512());
5491}
5492
5493static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5494_mm512_srav_epi32(__m512i __X, __m512i __Y) {
5495 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5496}
5497
5498static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5499_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5500 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5501 (__v16si)_mm512_srav_epi32(__X, __Y),
5502 (__v16si)__W);
5503}
5504
5505static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5506_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5507 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5508 (__v16si)_mm512_srav_epi32(__X, __Y),
5509 (__v16si)_mm512_setzero_si512());
5510}
5511
5512static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5513_mm512_srav_epi64(__m512i __X, __m512i __Y)
5514{
5515 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5516}
5517
5518static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5519_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5520{
5521 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5522 (__v8di)_mm512_srav_epi64(__X, __Y),
5523 (__v8di)__W);
5524}
5525
5526static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5527_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5528{
5529 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5530 (__v8di)_mm512_srav_epi64(__X, __Y),
5531 (__v8di)_mm512_setzero_si512());
5532}
5533
5534static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5535_mm512_srl_epi32(__m512i __A, __m128i __B) {
5536 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5537}
5538
5539static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5540_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
5541 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5542 (__v16si)_mm512_srl_epi32(__A, __B),
5543 (__v16si)__W);
5544}
5545
5546static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5547_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
5548 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5549 (__v16si)_mm512_srl_epi32(__A, __B),
5550 (__v16si)_mm512_setzero_si512());
5551}
5552
5553static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5554_mm512_srl_epi64(__m512i __A, __m128i __B) {
5555 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5556}
5557
5558static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5559_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
5560 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5561 (__v8di)_mm512_srl_epi64(__A, __B),
5562 (__v8di)__W);
5563}
5564
5565static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5566_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
5567 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5568 (__v8di)_mm512_srl_epi64(__A, __B),
5569 (__v8di)_mm512_setzero_si512());
5570}
5571
5572static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5573_mm512_srlv_epi32(__m512i __X, __m512i __Y) {
5574 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5575}
5576
5577static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5578_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5579 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5580 (__v16si)_mm512_srlv_epi32(__X, __Y),
5581 (__v16si)__W);
5582}
5583
5584static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5585_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5586 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5587 (__v16si)_mm512_srlv_epi32(__X, __Y),
5588 (__v16si)_mm512_setzero_si512());
5589}
5590
5591static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5592_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5593{
5594 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5595}
5596
5597static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5598_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5599{
5600 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5601 (__v8di)_mm512_srlv_epi64(__X, __Y),
5602 (__v8di)__W);
5603}
5604
5605static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5606_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5607{
5608 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5609 (__v8di)_mm512_srlv_epi64(__X, __Y),
5610 (__v8di)_mm512_setzero_si512());
5611}
5612
5613/// \enum _MM_TERNLOG_ENUM
5614/// A helper to represent the ternary logic operations among vector \a A,
5615/// \a B and \a C. The representation is passed to \a imm.
5621
5622#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5623 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5624 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5625 (unsigned char)(imm), (__mmask16)-1))
5626
5627#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5628 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5629 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5630 (unsigned char)(imm), (__mmask16)(U)))
5631
5632#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5633 ((__m512i)__builtin_ia32_pternlogd512_maskz( \
5634 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5635 (unsigned char)(imm), (__mmask16)(U)))
5636
5637#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5638 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5639 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5640 (unsigned char)(imm), (__mmask8)-1))
5641
5642#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5643 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5644 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5645 (unsigned char)(imm), (__mmask8)(U)))
5646
5647#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5648 ((__m512i)__builtin_ia32_pternlogq512_maskz( \
5649 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5650 (unsigned char)(imm), (__mmask8)(U)))
5651
5652#ifdef __x86_64__
5653#define _mm_cvt_roundsd_i64(A, R) \
5654 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5655#endif
5656
5657#define _mm_cvt_roundsd_si32(A, R) \
5658 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5659
5660#define _mm_cvt_roundsd_i32(A, R) \
5661 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5662
5663#define _mm_cvt_roundsd_u32(A, R) \
5664 ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5665
5666static __inline__ unsigned __DEFAULT_FN_ATTRS128
5667_mm_cvtsd_u32 (__m128d __A)
5668{
5669 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5671}
5672
5673#ifdef __x86_64__
5674#define _mm_cvt_roundsd_u64(A, R) \
5675 ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5676 (int)(R)))
5677
5678static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5679_mm_cvtsd_u64 (__m128d __A)
5680{
5681 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5682 __A,
5684}
5685#endif
5686
5687#define _mm_cvt_roundss_si32(A, R) \
5688 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5689
5690#define _mm_cvt_roundss_i32(A, R) \
5691 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5692
5693#ifdef __x86_64__
5694#define _mm_cvt_roundss_si64(A, R) \
5695 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5696
5697#define _mm_cvt_roundss_i64(A, R) \
5698 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5699#endif
5700
5701#define _mm_cvt_roundss_u32(A, R) \
5702 ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
5703
5704static __inline__ unsigned __DEFAULT_FN_ATTRS128
5705_mm_cvtss_u32 (__m128 __A)
5706{
5707 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
5709}
5710
5711#ifdef __x86_64__
5712#define _mm_cvt_roundss_u64(A, R) \
5713 ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
5714 (int)(R)))
5715
5716static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5717_mm_cvtss_u64 (__m128 __A)
5718{
5719 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
5720 __A,
5722}
5723#endif
5724
5725#define _mm_cvtt_roundsd_i32(A, R) \
5726 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5727
5728#define _mm_cvtt_roundsd_si32(A, R) \
5729 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5730
5731static __inline__ int __DEFAULT_FN_ATTRS128
5732_mm_cvttsd_i32 (__m128d __A)
5733{
5734 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
5736}
5737
5738#ifdef __x86_64__
5739#define _mm_cvtt_roundsd_si64(A, R) \
5740 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5741
5742#define _mm_cvtt_roundsd_i64(A, R) \
5743 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5744
5745static __inline__ long long __DEFAULT_FN_ATTRS128
5746_mm_cvttsd_i64 (__m128d __A)
5747{
5748 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
5750}
5751#endif
5752
5753#define _mm_cvtt_roundsd_u32(A, R) \
5754 ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5755
5756static __inline__ unsigned __DEFAULT_FN_ATTRS128
5757_mm_cvttsd_u32 (__m128d __A)
5758{
5759 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
5761}
5762
5763#ifdef __x86_64__
5764#define _mm_cvtt_roundsd_u64(A, R) \
5765 ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
5766 (int)(R)))
5767
5768static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5769_mm_cvttsd_u64 (__m128d __A)
5770{
5771 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
5772 __A,
5774}
5775#endif
5776
5777#define _mm_cvtt_roundss_i32(A, R) \
5778 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5779
5780#define _mm_cvtt_roundss_si32(A, R) \
5781 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5782
5783static __inline__ int __DEFAULT_FN_ATTRS128
5784_mm_cvttss_i32 (__m128 __A)
5785{
5786 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
5788}
5789
5790#ifdef __x86_64__
5791#define _mm_cvtt_roundss_i64(A, R) \
5792 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5793
5794#define _mm_cvtt_roundss_si64(A, R) \
5795 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5796
5797static __inline__ long long __DEFAULT_FN_ATTRS128
5798_mm_cvttss_i64 (__m128 __A)
5799{
5800 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
5802}
5803#endif
5804
5805#define _mm_cvtt_roundss_u32(A, R) \
5806 ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
5807
5808static __inline__ unsigned __DEFAULT_FN_ATTRS128
5809_mm_cvttss_u32 (__m128 __A)
5810{
5811 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
5813}
5814
5815#ifdef __x86_64__
5816#define _mm_cvtt_roundss_u64(A, R) \
5817 ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
5818 (int)(R)))
5819
5820static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5821_mm_cvttss_u64 (__m128 __A)
5822{
5823 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
5824 __A,
5826}
5827#endif
5828
5829#define _mm512_permute_pd(X, C) \
5830 ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
5831
5832#define _mm512_mask_permute_pd(W, U, X, C) \
5833 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5834 (__v8df)_mm512_permute_pd((X), (C)), \
5835 (__v8df)(__m512d)(W)))
5836
5837#define _mm512_maskz_permute_pd(U, X, C) \
5838 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5839 (__v8df)_mm512_permute_pd((X), (C)), \
5840 (__v8df)_mm512_setzero_pd()))
5841
5842#define _mm512_permute_ps(X, C) \
5843 ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
5844
5845#define _mm512_mask_permute_ps(W, U, X, C) \
5846 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5847 (__v16sf)_mm512_permute_ps((X), (C)), \
5848 (__v16sf)(__m512)(W)))
5849
5850#define _mm512_maskz_permute_ps(U, X, C) \
5851 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5852 (__v16sf)_mm512_permute_ps((X), (C)), \
5853 (__v16sf)_mm512_setzero_ps()))
5854
5855static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5856_mm512_permutevar_pd(__m512d __A, __m512i __C) {
5857 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
5858}
5859
5860static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5861_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) {
5862 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5863 (__v8df)_mm512_permutevar_pd(__A, __C),
5864 (__v8df)__W);
5865}
5866
5867static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5868_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
5869 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5870 (__v8df)_mm512_permutevar_pd(__A, __C),
5871 (__v8df)_mm512_setzero_pd());
5872}
5873
5874static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5875_mm512_permutevar_ps(__m512 __A, __m512i __C) {
5876 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
5877}
5878
5879static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5880_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) {
5881 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
5882 (__v16sf)_mm512_permutevar_ps(__A, __C),
5883 (__v16sf)__W);
5884}
5885
5886static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5887_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
5888 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
5889 (__v16sf)_mm512_permutevar_ps(__A, __C),
5890 (__v16sf)_mm512_setzero_ps());
5891}
5892
5893static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5894_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) {
5895 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
5896 (__v8df)__B);
5897}
5898
5899static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5900_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I,
5901 __m512d __B) {
5902 return (__m512d)__builtin_ia32_selectpd_512(__U,
5903 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5904 (__v8df)__A);
5905}
5906
5907static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5908_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
5909 __m512d __B) {
5910 return (__m512d)__builtin_ia32_selectpd_512(__U,
5911 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5912 (__v8df)(__m512d)__I);
5913}
5914
5915static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5916_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
5917 __m512d __B) {
5918 return (__m512d)__builtin_ia32_selectpd_512(__U,
5919 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5920 (__v8df)_mm512_setzero_pd());
5921}
5922
5923static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5924_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) {
5925 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
5926 (__v16sf) __B);
5927}
5928
5929static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5930_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I,
5931 __m512 __B) {
5932 return (__m512)__builtin_ia32_selectps_512(__U,
5933 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5934 (__v16sf)__A);
5935}
5936
5937static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5938_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U,
5939 __m512 __B) {
5940 return (__m512)__builtin_ia32_selectps_512(__U,
5941 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5942 (__v16sf)(__m512)__I);
5943}
5944
5945static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5946_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I,
5947 __m512 __B) {
5948 return (__m512)__builtin_ia32_selectps_512(__U,
5949 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5950 (__v16sf)_mm512_setzero_ps());
5951}
5952
5953#define _mm512_cvtt_roundpd_epu32(A, R) \
5954 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5955 (__v8si)_mm256_undefined_si256(), \
5956 (__mmask8)-1, (int)(R)))
5957
5958#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
5959 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5960 (__v8si)(__m256i)(W), \
5961 (__mmask8)(U), (int)(R)))
5962
5963#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
5964 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5965 (__v8si)_mm256_setzero_si256(), \
5966 (__mmask8)(U), (int)(R)))
5967
5968static __inline__ __m256i __DEFAULT_FN_ATTRS512
5970{
5971 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5972 (__v8si)
5974 (__mmask8) -1,
5976}
5977
5978static __inline__ __m256i __DEFAULT_FN_ATTRS512
5979_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
5980{
5981 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5982 (__v8si) __W,
5983 (__mmask8) __U,
5985}
5986
5987static __inline__ __m256i __DEFAULT_FN_ATTRS512
5989{
5990 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5991 (__v8si)
5993 (__mmask8) __U,
5995}
5996
5997#define _mm_roundscale_round_sd(A, B, imm, R) \
5998 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
5999 (__v2df)(__m128d)(B), \
6000 (__v2df)_mm_setzero_pd(), \
6001 (__mmask8)-1, (int)(imm), \
6002 (int)(R)))
6003
6004#define _mm_roundscale_sd(A, B, imm) \
6005 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6006 (__v2df)(__m128d)(B), \
6007 (__v2df)_mm_setzero_pd(), \
6008 (__mmask8)-1, (int)(imm), \
6009 _MM_FROUND_CUR_DIRECTION))
6010
6011#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6012 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6013 (__v2df)(__m128d)(B), \
6014 (__v2df)(__m128d)(W), \
6015 (__mmask8)(U), (int)(imm), \
6016 _MM_FROUND_CUR_DIRECTION))
6017
6018#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6019 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6020 (__v2df)(__m128d)(B), \
6021 (__v2df)(__m128d)(W), \
6022 (__mmask8)(U), (int)(I), \
6023 (int)(R)))
6024
6025#define _mm_maskz_roundscale_sd(U, A, B, I) \
6026 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6027 (__v2df)(__m128d)(B), \
6028 (__v2df)_mm_setzero_pd(), \
6029 (__mmask8)(U), (int)(I), \
6030 _MM_FROUND_CUR_DIRECTION))
6031
6032#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6033 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6034 (__v2df)(__m128d)(B), \
6035 (__v2df)_mm_setzero_pd(), \
6036 (__mmask8)(U), (int)(I), \
6037 (int)(R)))
6038
6039#define _mm_roundscale_round_ss(A, B, imm, R) \
6040 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6041 (__v4sf)(__m128)(B), \
6042 (__v4sf)_mm_setzero_ps(), \
6043 (__mmask8)-1, (int)(imm), \
6044 (int)(R)))
6045
6046#define _mm_roundscale_ss(A, B, imm) \
6047 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6048 (__v4sf)(__m128)(B), \
6049 (__v4sf)_mm_setzero_ps(), \
6050 (__mmask8)-1, (int)(imm), \
6051 _MM_FROUND_CUR_DIRECTION))
6052
6053#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6054 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6055 (__v4sf)(__m128)(B), \
6056 (__v4sf)(__m128)(W), \
6057 (__mmask8)(U), (int)(I), \
6058 _MM_FROUND_CUR_DIRECTION))
6059
6060#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6061 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6062 (__v4sf)(__m128)(B), \
6063 (__v4sf)(__m128)(W), \
6064 (__mmask8)(U), (int)(I), \
6065 (int)(R)))
6066
6067#define _mm_maskz_roundscale_ss(U, A, B, I) \
6068 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6069 (__v4sf)(__m128)(B), \
6070 (__v4sf)_mm_setzero_ps(), \
6071 (__mmask8)(U), (int)(I), \
6072 _MM_FROUND_CUR_DIRECTION))
6073
6074#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6075 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6076 (__v4sf)(__m128)(B), \
6077 (__v4sf)_mm_setzero_ps(), \
6078 (__mmask8)(U), (int)(I), \
6079 (int)(R)))
6080
6081#define _mm512_scalef_round_pd(A, B, R) \
6082 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6083 (__v8df)(__m512d)(B), \
6084 (__v8df)_mm512_undefined_pd(), \
6085 (__mmask8)-1, (int)(R)))
6086
6087#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6088 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6089 (__v8df)(__m512d)(B), \
6090 (__v8df)(__m512d)(W), \
6091 (__mmask8)(U), (int)(R)))
6092
6093#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6094 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6095 (__v8df)(__m512d)(B), \
6096 (__v8df)_mm512_setzero_pd(), \
6097 (__mmask8)(U), (int)(R)))
6098
6099static __inline__ __m512d __DEFAULT_FN_ATTRS512
6100_mm512_scalef_pd (__m512d __A, __m512d __B)
6101{
6102 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6103 (__v8df) __B,
6104 (__v8df)
6106 (__mmask8) -1,
6108}
6109
6110static __inline__ __m512d __DEFAULT_FN_ATTRS512
6111_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6112{
6113 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6114 (__v8df) __B,
6115 (__v8df) __W,
6116 (__mmask8) __U,
6118}
6119
6120static __inline__ __m512d __DEFAULT_FN_ATTRS512
6121_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6122{
6123 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6124 (__v8df) __B,
6125 (__v8df)
6127 (__mmask8) __U,
6129}
6130
6131#define _mm512_scalef_round_ps(A, B, R) \
6132 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6133 (__v16sf)(__m512)(B), \
6134 (__v16sf)_mm512_undefined_ps(), \
6135 (__mmask16)-1, (int)(R)))
6136
6137#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6138 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6139 (__v16sf)(__m512)(B), \
6140 (__v16sf)(__m512)(W), \
6141 (__mmask16)(U), (int)(R)))
6142
6143#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6144 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6145 (__v16sf)(__m512)(B), \
6146 (__v16sf)_mm512_setzero_ps(), \
6147 (__mmask16)(U), (int)(R)))
6148
6149static __inline__ __m512 __DEFAULT_FN_ATTRS512
6150_mm512_scalef_ps (__m512 __A, __m512 __B)
6151{
6152 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6153 (__v16sf) __B,
6154 (__v16sf)
6156 (__mmask16) -1,
6158}
6159
6160static __inline__ __m512 __DEFAULT_FN_ATTRS512
6161_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6162{
6163 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6164 (__v16sf) __B,
6165 (__v16sf) __W,
6166 (__mmask16) __U,
6168}
6169
6170static __inline__ __m512 __DEFAULT_FN_ATTRS512
6171_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6172{
6173 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6174 (__v16sf) __B,
6175 (__v16sf)
6177 (__mmask16) __U,
6179}
6180
6181#define _mm_scalef_round_sd(A, B, R) \
6182 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6183 (__v2df)(__m128d)(B), \
6184 (__v2df)_mm_setzero_pd(), \
6185 (__mmask8)-1, (int)(R)))
6186
6187static __inline__ __m128d __DEFAULT_FN_ATTRS128
6188_mm_scalef_sd (__m128d __A, __m128d __B)
6189{
6190 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6191 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6192 (__mmask8) -1,
6194}
6195
6196static __inline__ __m128d __DEFAULT_FN_ATTRS128
6197_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6198{
6199 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6200 (__v2df) __B,
6201 (__v2df) __W,
6202 (__mmask8) __U,
6204}
6205
6206#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6207 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6208 (__v2df)(__m128d)(B), \
6209 (__v2df)(__m128d)(W), \
6210 (__mmask8)(U), (int)(R)))
6211
6212static __inline__ __m128d __DEFAULT_FN_ATTRS128
6213_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6214{
6215 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6216 (__v2df) __B,
6217 (__v2df) _mm_setzero_pd (),
6218 (__mmask8) __U,
6220}
6221
6222#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6223 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6224 (__v2df)(__m128d)(B), \
6225 (__v2df)_mm_setzero_pd(), \
6226 (__mmask8)(U), (int)(R)))
6227
6228#define _mm_scalef_round_ss(A, B, R) \
6229 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6230 (__v4sf)(__m128)(B), \
6231 (__v4sf)_mm_setzero_ps(), \
6232 (__mmask8)-1, (int)(R)))
6233
6234static __inline__ __m128 __DEFAULT_FN_ATTRS128
6235_mm_scalef_ss (__m128 __A, __m128 __B)
6236{
6237 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6238 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6239 (__mmask8) -1,
6241}
6242
6243static __inline__ __m128 __DEFAULT_FN_ATTRS128
6244_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6245{
6246 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6247 (__v4sf) __B,
6248 (__v4sf) __W,
6249 (__mmask8) __U,
6251}
6252
6253#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6254 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6255 (__v4sf)(__m128)(B), \
6256 (__v4sf)(__m128)(W), \
6257 (__mmask8)(U), (int)(R)))
6258
6259static __inline__ __m128 __DEFAULT_FN_ATTRS128
6260_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6261{
6262 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6263 (__v4sf) __B,
6264 (__v4sf) _mm_setzero_ps (),
6265 (__mmask8) __U,
6267}
6268
6269#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6270 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6271 (__v4sf)(__m128)(B), \
6272 (__v4sf)_mm_setzero_ps(), \
6273 (__mmask8)(U), \
6274 (int)(R)))
6275
6276static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6277_mm512_srai_epi32(__m512i __A, unsigned int __B) {
6278 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B);
6279}
6280
6281static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6282_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
6283 unsigned int __B) {
6284 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6285 (__v16si)_mm512_srai_epi32(__A, __B),
6286 (__v16si)__W);
6287}
6288
6289static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6290_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
6291 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6292 (__v16si)_mm512_srai_epi32(__A, __B),
6293 (__v16si)_mm512_setzero_si512());
6294}
6295
6296static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6297_mm512_srai_epi64(__m512i __A, unsigned int __B) {
6298 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B);
6299}
6300
6301static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6302_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A,
6303 unsigned int __B) {
6304 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6305 (__v8di)_mm512_srai_epi64(__A, __B),
6306 (__v8di)__W);
6307}
6308
6309static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6310_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
6311 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6312 (__v8di)_mm512_srai_epi64(__A, __B),
6313 (__v8di)_mm512_setzero_si512());
6314}
6315
6316#define _mm512_shuffle_f32x4(A, B, imm) \
6317 ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6318 (__v16sf)(__m512)(B), (int)(imm)))
6319
6320#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6321 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6322 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6323 (__v16sf)(__m512)(W)))
6324
6325#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6326 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6327 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6328 (__v16sf)_mm512_setzero_ps()))
6329
6330#define _mm512_shuffle_f64x2(A, B, imm) \
6331 ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6332 (__v8df)(__m512d)(B), (int)(imm)))
6333
6334#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6335 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6336 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6337 (__v8df)(__m512d)(W)))
6338
6339#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6340 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6341 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6342 (__v8df)_mm512_setzero_pd()))
6343
6344#define _mm512_shuffle_i32x4(A, B, imm) \
6345 ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6346 (__v16si)(__m512i)(B), (int)(imm)))
6347
6348#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6349 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6350 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6351 (__v16si)(__m512i)(W)))
6352
6353#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6354 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6355 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6356 (__v16si)_mm512_setzero_si512()))
6357
6358#define _mm512_shuffle_i64x2(A, B, imm) \
6359 ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6360 (__v8di)(__m512i)(B), (int)(imm)))
6361
6362#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6363 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6364 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6365 (__v8di)(__m512i)(W)))
6366
6367#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6368 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6369 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6370 (__v8di)_mm512_setzero_si512()))
6371
6372#define _mm512_shuffle_pd(A, B, M) \
6373 ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6374 (__v8df)(__m512d)(B), (int)(M)))
6375
6376#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6377 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6378 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6379 (__v8df)(__m512d)(W)))
6380
6381#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6382 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6383 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6384 (__v8df)_mm512_setzero_pd()))
6385
6386#define _mm512_shuffle_ps(A, B, M) \
6387 ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6388 (__v16sf)(__m512)(B), (int)(M)))
6389
6390#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6391 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6392 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6393 (__v16sf)(__m512)(W)))
6394
6395#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6396 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6397 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6398 (__v16sf)_mm512_setzero_ps()))
6399
6400#define _mm_sqrt_round_sd(A, B, R) \
6401 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6402 (__v2df)(__m128d)(B), \
6403 (__v2df)_mm_setzero_pd(), \
6404 (__mmask8)-1, (int)(R)))
6405
6406static __inline__ __m128d __DEFAULT_FN_ATTRS128
6407_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6408{
6409 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6410 (__v2df) __B,
6411 (__v2df) __W,
6412 (__mmask8) __U,
6414}
6415
6416#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6417 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6418 (__v2df)(__m128d)(B), \
6419 (__v2df)(__m128d)(W), \
6420 (__mmask8)(U), (int)(R)))
6421
6422static __inline__ __m128d __DEFAULT_FN_ATTRS128
6423_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6424{
6425 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6426 (__v2df) __B,
6427 (__v2df) _mm_setzero_pd (),
6428 (__mmask8) __U,
6430}
6431
6432#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6433 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6434 (__v2df)(__m128d)(B), \
6435 (__v2df)_mm_setzero_pd(), \
6436 (__mmask8)(U), (int)(R)))
6437
6438#define _mm_sqrt_round_ss(A, B, R) \
6439 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6440 (__v4sf)(__m128)(B), \
6441 (__v4sf)_mm_setzero_ps(), \
6442 (__mmask8)-1, (int)(R)))
6443
6444static __inline__ __m128 __DEFAULT_FN_ATTRS128
6445_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6446{
6447 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6448 (__v4sf) __B,
6449 (__v4sf) __W,
6450 (__mmask8) __U,
6452}
6453
6454#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6455 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6456 (__v4sf)(__m128)(B), \
6457 (__v4sf)(__m128)(W), (__mmask8)(U), \
6458 (int)(R)))
6459
6460static __inline__ __m128 __DEFAULT_FN_ATTRS128
6461_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6462{
6463 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6464 (__v4sf) __B,
6465 (__v4sf) _mm_setzero_ps (),
6466 (__mmask8) __U,
6468}
6469
6470#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6471 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6472 (__v4sf)(__m128)(B), \
6473 (__v4sf)_mm_setzero_ps(), \
6474 (__mmask8)(U), (int)(R)))
6475
6476static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6478 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6479 0, 1, 2, 3, 0, 1, 2, 3,
6480 0, 1, 2, 3, 0, 1, 2, 3);
6481}
6482
6483static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6484_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) {
6485 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6486 (__v16sf)_mm512_broadcast_f32x4(__A),
6487 (__v16sf)__O);
6488}
6489
6490static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6492 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6493 (__v16sf)_mm512_broadcast_f32x4(__A),
6494 (__v16sf)_mm512_setzero_ps());
6495}
6496
6497static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6499 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6500 0, 1, 2, 3, 0, 1, 2, 3);
6501}
6502
6503static __inline__ __m512d __DEFAULT_FN_ATTRS512
6504_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
6505{
6506 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6507 (__v8df)_mm512_broadcast_f64x4(__A),
6508 (__v8df)__O);
6509}
6510
6511static __inline__ __m512d __DEFAULT_FN_ATTRS512
6513{
6514 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6515 (__v8df)_mm512_broadcast_f64x4(__A),
6516 (__v8df)_mm512_setzero_pd());
6517}
6518
6519static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6521 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6522 0, 1, 2, 3, 0, 1, 2, 3,
6523 0, 1, 2, 3, 0, 1, 2, 3);
6524}
6525
6526static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6527_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) {
6528 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6529 (__v16si)_mm512_broadcast_i32x4(__A),
6530 (__v16si)__O);
6531}
6532
6533static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6535 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6536 (__v16si)_mm512_broadcast_i32x4(__A),
6537 (__v16si)_mm512_setzero_si512());
6538}
6539
6540static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6542 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6543 0, 1, 2, 3, 0, 1, 2, 3);
6544}
6545
6546static __inline__ __m512i __DEFAULT_FN_ATTRS512
6547_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
6548{
6549 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6550 (__v8di)_mm512_broadcast_i64x4(__A),
6551 (__v8di)__O);
6552}
6553
6554static __inline__ __m512i __DEFAULT_FN_ATTRS512
6556{
6557 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6558 (__v8di)_mm512_broadcast_i64x4(__A),
6559 (__v8di)_mm512_setzero_si512());
6560}
6561
6562static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6563_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) {
6564 return (__m512d)__builtin_ia32_selectpd_512(__M,
6565 (__v8df) _mm512_broadcastsd_pd(__A),
6566 (__v8df) __O);
6567}
6568
6569static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6571 return (__m512d)__builtin_ia32_selectpd_512(__M,
6572 (__v8df) _mm512_broadcastsd_pd(__A),
6573 (__v8df) _mm512_setzero_pd());
6574}
6575
6576static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6577_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) {
6578 return (__m512)__builtin_ia32_selectps_512(__M,
6579 (__v16sf) _mm512_broadcastss_ps(__A),
6580 (__v16sf) __O);
6581}
6582
6583static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6585 return (__m512)__builtin_ia32_selectps_512(__M,
6586 (__v16sf) _mm512_broadcastss_ps(__A),
6587 (__v16sf) _mm512_setzero_ps());
6588}
6589
6590static __inline__ __m128i __DEFAULT_FN_ATTRS512
6592{
6593 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6594 (__v16qi) _mm_undefined_si128 (),
6595 (__mmask16) -1);
6596}
6597
6598static __inline__ __m128i __DEFAULT_FN_ATTRS512
6599_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6600{
6601 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6602 (__v16qi) __O, __M);
6603}
6604
6605static __inline__ __m128i __DEFAULT_FN_ATTRS512
6607{
6608 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6609 (__v16qi) _mm_setzero_si128 (),
6610 __M);
6611}
6612
6613static __inline__ void __DEFAULT_FN_ATTRS512
6615{
6616 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6617}
6618
6619static __inline__ __m256i __DEFAULT_FN_ATTRS512
6621{
6622 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6623 (__v16hi) _mm256_undefined_si256 (),
6624 (__mmask16) -1);
6625}
6626
6627static __inline__ __m256i __DEFAULT_FN_ATTRS512
6628_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6629{
6630 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6631 (__v16hi) __O, __M);
6632}
6633
6634static __inline__ __m256i __DEFAULT_FN_ATTRS512
6636{
6637 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6638 (__v16hi) _mm256_setzero_si256 (),
6639 __M);
6640}
6641
6642static __inline__ void __DEFAULT_FN_ATTRS512
6644{
6645 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6646}
6647
6648static __inline__ __m128i __DEFAULT_FN_ATTRS512
6650{
6651 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6652 (__v16qi) _mm_undefined_si128 (),
6653 (__mmask8) -1);
6654}
6655
6656static __inline__ __m128i __DEFAULT_FN_ATTRS512
6657_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6658{
6659 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6660 (__v16qi) __O, __M);
6661}
6662
6663static __inline__ __m128i __DEFAULT_FN_ATTRS512
6665{
6666 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6667 (__v16qi) _mm_setzero_si128 (),
6668 __M);
6669}
6670
6671static __inline__ void __DEFAULT_FN_ATTRS512
6673{
6674 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6675}
6676
6677static __inline__ __m256i __DEFAULT_FN_ATTRS512
6679{
6680 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6681 (__v8si) _mm256_undefined_si256 (),
6682 (__mmask8) -1);
6683}
6684
6685static __inline__ __m256i __DEFAULT_FN_ATTRS512
6686_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6687{
6688 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6689 (__v8si) __O, __M);
6690}
6691
6692static __inline__ __m256i __DEFAULT_FN_ATTRS512
6694{
6695 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6696 (__v8si) _mm256_setzero_si256 (),
6697 __M);
6698}
6699
6700static __inline__ void __DEFAULT_FN_ATTRS512
6702{
6703 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
6704}
6705
6706static __inline__ __m128i __DEFAULT_FN_ATTRS512
6708{
6709 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6710 (__v8hi) _mm_undefined_si128 (),
6711 (__mmask8) -1);
6712}
6713
6714static __inline__ __m128i __DEFAULT_FN_ATTRS512
6715_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6716{
6717 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6718 (__v8hi) __O, __M);
6719}
6720
6721static __inline__ __m128i __DEFAULT_FN_ATTRS512
6723{
6724 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6725 (__v8hi) _mm_setzero_si128 (),
6726 __M);
6727}
6728
6729static __inline__ void __DEFAULT_FN_ATTRS512
6731{
6732 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
6733}
6734
6735static __inline__ __m128i __DEFAULT_FN_ATTRS512
6737{
6738 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6739 (__v16qi) _mm_undefined_si128 (),
6740 (__mmask16) -1);
6741}
6742
6743static __inline__ __m128i __DEFAULT_FN_ATTRS512
6744_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6745{
6746 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6747 (__v16qi) __O,
6748 __M);
6749}
6750
6751static __inline__ __m128i __DEFAULT_FN_ATTRS512
6753{
6754 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6755 (__v16qi) _mm_setzero_si128 (),
6756 __M);
6757}
6758
6759static __inline__ void __DEFAULT_FN_ATTRS512
6761{
6762 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6763}
6764
6765static __inline__ __m256i __DEFAULT_FN_ATTRS512
6767{
6768 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6769 (__v16hi) _mm256_undefined_si256 (),
6770 (__mmask16) -1);
6771}
6772
6773static __inline__ __m256i __DEFAULT_FN_ATTRS512
6774_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6775{
6776 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6777 (__v16hi) __O,
6778 __M);
6779}
6780
6781static __inline__ __m256i __DEFAULT_FN_ATTRS512
6783{
6784 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6785 (__v16hi) _mm256_setzero_si256 (),
6786 __M);
6787}
6788
6789static __inline__ void __DEFAULT_FN_ATTRS512
6791{
6792 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6793}
6794
6795static __inline__ __m128i __DEFAULT_FN_ATTRS512
6797{
6798 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6799 (__v16qi) _mm_undefined_si128 (),
6800 (__mmask8) -1);
6801}
6802
6803static __inline__ __m128i __DEFAULT_FN_ATTRS512
6804_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6805{
6806 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6807 (__v16qi) __O,
6808 __M);
6809}
6810
6811static __inline__ __m128i __DEFAULT_FN_ATTRS512
6813{
6814 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6815 (__v16qi) _mm_setzero_si128 (),
6816 __M);
6817}
6818
6819static __inline__ void __DEFAULT_FN_ATTRS512
6821{
6822 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6823}
6824
6825static __inline__ __m256i __DEFAULT_FN_ATTRS512
6827{
6828 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6829 (__v8si) _mm256_undefined_si256 (),
6830 (__mmask8) -1);
6831}
6832
6833static __inline__ __m256i __DEFAULT_FN_ATTRS512
6834_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6835{
6836 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6837 (__v8si) __O, __M);
6838}
6839
6840static __inline__ __m256i __DEFAULT_FN_ATTRS512
6842{
6843 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6844 (__v8si) _mm256_setzero_si256 (),
6845 __M);
6846}
6847
6848static __inline__ void __DEFAULT_FN_ATTRS512
6850{
6851 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
6852}
6853
6854static __inline__ __m128i __DEFAULT_FN_ATTRS512
6856{
6857 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6858 (__v8hi) _mm_undefined_si128 (),
6859 (__mmask8) -1);
6860}
6861
6862static __inline__ __m128i __DEFAULT_FN_ATTRS512
6863_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6864{
6865 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6866 (__v8hi) __O, __M);
6867}
6868
6869static __inline__ __m128i __DEFAULT_FN_ATTRS512
6871{
6872 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6873 (__v8hi) _mm_setzero_si128 (),
6874 __M);
6875}
6876
6877static __inline__ void __DEFAULT_FN_ATTRS512
6879{
6880 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
6881}
6882
6883static __inline__ __m128i __DEFAULT_FN_ATTRS512
6885{
6886 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6887 (__v16qi) _mm_undefined_si128 (),
6888 (__mmask16) -1);
6889}
6890
6891static __inline__ __m128i __DEFAULT_FN_ATTRS512
6892_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6893{
6894 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6895 (__v16qi) __O, __M);
6896}
6897
6898static __inline__ __m128i __DEFAULT_FN_ATTRS512
6900{
6901 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6902 (__v16qi) _mm_setzero_si128 (),
6903 __M);
6904}
6905
6906static __inline__ void __DEFAULT_FN_ATTRS512
6908{
6909 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6910}
6911
6912static __inline__ __m256i __DEFAULT_FN_ATTRS512
6914{
6915 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6916 (__v16hi) _mm256_undefined_si256 (),
6917 (__mmask16) -1);
6918}
6919
6920static __inline__ __m256i __DEFAULT_FN_ATTRS512
6921_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6922{
6923 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6924 (__v16hi) __O, __M);
6925}
6926
6927static __inline__ __m256i __DEFAULT_FN_ATTRS512
6929{
6930 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6931 (__v16hi) _mm256_setzero_si256 (),
6932 __M);
6933}
6934
6935static __inline__ void __DEFAULT_FN_ATTRS512
6937{
6938 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
6939}
6940
6941static __inline__ __m128i __DEFAULT_FN_ATTRS512
6943{
6944 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6945 (__v16qi) _mm_undefined_si128 (),
6946 (__mmask8) -1);
6947}
6948
6949static __inline__ __m128i __DEFAULT_FN_ATTRS512
6950_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6951{
6952 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6953 (__v16qi) __O, __M);
6954}
6955
6956static __inline__ __m128i __DEFAULT_FN_ATTRS512
6958{
6959 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6960 (__v16qi) _mm_setzero_si128 (),
6961 __M);
6962}
6963
6964static __inline__ void __DEFAULT_FN_ATTRS512
6966{
6967 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6968}
6969
6970static __inline__ __m256i __DEFAULT_FN_ATTRS512
6972{
6973 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6974 (__v8si) _mm256_undefined_si256 (),
6975 (__mmask8) -1);
6976}
6977
6978static __inline__ __m256i __DEFAULT_FN_ATTRS512
6979_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6980{
6981 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6982 (__v8si) __O, __M);
6983}
6984
6985static __inline__ __m256i __DEFAULT_FN_ATTRS512
6987{
6988 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6989 (__v8si) _mm256_setzero_si256 (),
6990 __M);
6991}
6992
6993static __inline__ void __DEFAULT_FN_ATTRS512
6995{
6996 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
6997}
6998
6999static __inline__ __m128i __DEFAULT_FN_ATTRS512
7001{
7002 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7003 (__v8hi) _mm_undefined_si128 (),
7004 (__mmask8) -1);
7005}
7006
7007static __inline__ __m128i __DEFAULT_FN_ATTRS512
7008_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7009{
7010 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7011 (__v8hi) __O, __M);
7012}
7013
7014static __inline__ __m128i __DEFAULT_FN_ATTRS512
7016{
7017 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7018 (__v8hi) _mm_setzero_si128 (),
7019 __M);
7020}
7021
7022static __inline__ void __DEFAULT_FN_ATTRS512
7024{
7025 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7026}
7027
7028#define _mm512_extracti32x4_epi32(A, imm) \
7029 ((__m128i)__builtin_ia32_extracti32x4_mask( \
7030 (__v16si)(__m512i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \
7031 (__mmask8) - 1))
7032
7033#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7034 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7035 (__v4si)(__m128i)(W), \
7036 (__mmask8)(U)))
7037
7038#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7039 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7040 (__v4si)_mm_setzero_si128(), \
7041 (__mmask8)(U)))
7042
7043#define _mm512_extracti64x4_epi64(A, imm) \
7044 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7045 (__v4di)_mm256_setzero_si256(), \
7046 (__mmask8) - 1))
7047
7048#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7049 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7050 (__v4di)(__m256i)(W), \
7051 (__mmask8)(U)))
7052
7053#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7054 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7055 (__v4di)_mm256_setzero_si256(), \
7056 (__mmask8)(U)))
7057
7058#define _mm512_insertf64x4(A, B, imm) \
7059 ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7060 (__v4df)(__m256d)(B), (int)(imm)))
7061
7062#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7063 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7064 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7065 (__v8df)(__m512d)(W)))
7066
7067#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7068 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7069 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7070 (__v8df)_mm512_setzero_pd()))
7071
7072#define _mm512_inserti64x4(A, B, imm) \
7073 ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7074 (__v4di)(__m256i)(B), (int)(imm)))
7075
7076#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7077 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7078 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7079 (__v8di)(__m512i)(W)))
7080
7081#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7082 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7083 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7084 (__v8di)_mm512_setzero_si512()))
7085
7086#define _mm512_insertf32x4(A, B, imm) \
7087 ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7088 (__v4sf)(__m128)(B), (int)(imm)))
7089
7090#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7091 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7092 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7093 (__v16sf)(__m512)(W)))
7094
7095#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7096 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7097 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7098 (__v16sf)_mm512_setzero_ps()))
7099
7100#define _mm512_inserti32x4(A, B, imm) \
7101 ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7102 (__v4si)(__m128i)(B), (int)(imm)))
7103
7104#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7105 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7106 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7107 (__v16si)(__m512i)(W)))
7108
7109#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7110 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7111 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7112 (__v16si)_mm512_setzero_si512()))
7113
7114#define _mm512_getmant_round_pd(A, B, C, R) \
7115 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7116 (int)(((C)<<2) | (B)), \
7117 (__v8df)_mm512_undefined_pd(), \
7118 (__mmask8)-1, (int)(R)))
7119
7120#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7121 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7122 (int)(((C)<<2) | (B)), \
7123 (__v8df)(__m512d)(W), \
7124 (__mmask8)(U), (int)(R)))
7125
7126#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7127 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7128 (int)(((C)<<2) | (B)), \
7129 (__v8df)_mm512_setzero_pd(), \
7130 (__mmask8)(U), (int)(R)))
7131
7132#define _mm512_getmant_pd(A, B, C) \
7133 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7134 (int)(((C)<<2) | (B)), \
7135 (__v8df)_mm512_setzero_pd(), \
7136 (__mmask8)-1, \
7137 _MM_FROUND_CUR_DIRECTION))
7138
7139#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7140 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7141 (int)(((C)<<2) | (B)), \
7142 (__v8df)(__m512d)(W), \
7143 (__mmask8)(U), \
7144 _MM_FROUND_CUR_DIRECTION))
7145
7146#define _mm512_maskz_getmant_pd(U, A, B, C) \
7147 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7148 (int)(((C)<<2) | (B)), \
7149 (__v8df)_mm512_setzero_pd(), \
7150 (__mmask8)(U), \
7151 _MM_FROUND_CUR_DIRECTION))
7152
7153#define _mm512_getmant_round_ps(A, B, C, R) \
7154 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7155 (int)(((C)<<2) | (B)), \
7156 (__v16sf)_mm512_undefined_ps(), \
7157 (__mmask16)-1, (int)(R)))
7158
7159#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7160 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7161 (int)(((C)<<2) | (B)), \
7162 (__v16sf)(__m512)(W), \
7163 (__mmask16)(U), (int)(R)))
7164
7165#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7166 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7167 (int)(((C)<<2) | (B)), \
7168 (__v16sf)_mm512_setzero_ps(), \
7169 (__mmask16)(U), (int)(R)))
7170
7171#define _mm512_getmant_ps(A, B, C) \
7172 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7173 (int)(((C)<<2)|(B)), \
7174 (__v16sf)_mm512_undefined_ps(), \
7175 (__mmask16)-1, \
7176 _MM_FROUND_CUR_DIRECTION))
7177
7178#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7179 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7180 (int)(((C)<<2)|(B)), \
7181 (__v16sf)(__m512)(W), \
7182 (__mmask16)(U), \
7183 _MM_FROUND_CUR_DIRECTION))
7184
7185#define _mm512_maskz_getmant_ps(U, A, B, C) \
7186 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7187 (int)(((C)<<2)|(B)), \
7188 (__v16sf)_mm512_setzero_ps(), \
7189 (__mmask16)(U), \
7190 _MM_FROUND_CUR_DIRECTION))
7191
7192#define _mm512_getexp_round_pd(A, R) \
7193 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7194 (__v8df)_mm512_undefined_pd(), \
7195 (__mmask8)-1, (int)(R)))
7196
7197#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7198 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7199 (__v8df)(__m512d)(W), \
7200 (__mmask8)(U), (int)(R)))
7201
7202#define _mm512_maskz_getexp_round_pd(U, A, R) \
7203 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7204 (__v8df)_mm512_setzero_pd(), \
7205 (__mmask8)(U), (int)(R)))
7206
7207static __inline__ __m512d __DEFAULT_FN_ATTRS512
7208_mm512_getexp_pd (__m512d __A)
7209{
7210 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7211 (__v8df) _mm512_undefined_pd (),
7212 (__mmask8) -1,
7214}
7215
7216static __inline__ __m512d __DEFAULT_FN_ATTRS512
7217_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7218{
7219 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7220 (__v8df) __W,
7221 (__mmask8) __U,
7223}
7224
7225static __inline__ __m512d __DEFAULT_FN_ATTRS512
7227{
7228 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7229 (__v8df) _mm512_setzero_pd (),
7230 (__mmask8) __U,
7232}
7233
7234#define _mm512_getexp_round_ps(A, R) \
7235 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7236 (__v16sf)_mm512_undefined_ps(), \
7237 (__mmask16)-1, (int)(R)))
7238
7239#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7240 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7241 (__v16sf)(__m512)(W), \
7242 (__mmask16)(U), (int)(R)))
7243
7244#define _mm512_maskz_getexp_round_ps(U, A, R) \
7245 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7246 (__v16sf)_mm512_setzero_ps(), \
7247 (__mmask16)(U), (int)(R)))
7248
7249static __inline__ __m512 __DEFAULT_FN_ATTRS512
7251{
7252 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7253 (__v16sf) _mm512_undefined_ps (),
7254 (__mmask16) -1,
7256}
7257
7258static __inline__ __m512 __DEFAULT_FN_ATTRS512
7259_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7260{
7261 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7262 (__v16sf) __W,
7263 (__mmask16) __U,
7265}
7266
7267static __inline__ __m512 __DEFAULT_FN_ATTRS512
7269{
7270 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7271 (__v16sf) _mm512_setzero_ps (),
7272 (__mmask16) __U,
7274}
7275
7276#define _mm512_i64gather_ps(index, addr, scale) \
7277 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7278 (void const *)(addr), \
7279 (__v8di)(__m512i)(index), (__mmask8)-1, \
7280 (int)(scale)))
7281
7282#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7283 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7284 (void const *)(addr), \
7285 (__v8di)(__m512i)(index), \
7286 (__mmask8)(mask), (int)(scale)))
7287
7288#define _mm512_i64gather_epi32(index, addr, scale) \
7289 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7290 (void const *)(addr), \
7291 (__v8di)(__m512i)(index), \
7292 (__mmask8)-1, (int)(scale)))
7293
7294#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7295 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7296 (void const *)(addr), \
7297 (__v8di)(__m512i)(index), \
7298 (__mmask8)(mask), (int)(scale)))
7299
7300#define _mm512_i64gather_pd(index, addr, scale) \
7301 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7302 (void const *)(addr), \
7303 (__v8di)(__m512i)(index), (__mmask8)-1, \
7304 (int)(scale)))
7305
7306#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7307 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7308 (void const *)(addr), \
7309 (__v8di)(__m512i)(index), \
7310 (__mmask8)(mask), (int)(scale)))
7311
7312#define _mm512_i64gather_epi64(index, addr, scale) \
7313 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7314 (void const *)(addr), \
7315 (__v8di)(__m512i)(index), (__mmask8)-1, \
7316 (int)(scale)))
7317
7318#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7319 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7320 (void const *)(addr), \
7321 (__v8di)(__m512i)(index), \
7322 (__mmask8)(mask), (int)(scale)))
7323
7324#define _mm512_i32gather_ps(index, addr, scale) \
7325 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7326 (void const *)(addr), \
7327 (__v16si)(__m512)(index), \
7328 (__mmask16)-1, (int)(scale)))
7329
7330#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7331 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7332 (void const *)(addr), \
7333 (__v16si)(__m512)(index), \
7334 (__mmask16)(mask), (int)(scale)))
7335
7336#define _mm512_i32gather_epi32(index, addr, scale) \
7337 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7338 (void const *)(addr), \
7339 (__v16si)(__m512i)(index), \
7340 (__mmask16)-1, (int)(scale)))
7341
7342#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7343 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7344 (void const *)(addr), \
7345 (__v16si)(__m512i)(index), \
7346 (__mmask16)(mask), (int)(scale)))
7347
7348#define _mm512_i32gather_pd(index, addr, scale) \
7349 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7350 (void const *)(addr), \
7351 (__v8si)(__m256i)(index), (__mmask8)-1, \
7352 (int)(scale)))
7353
7354#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7355 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7356 (void const *)(addr), \
7357 (__v8si)(__m256i)(index), \
7358 (__mmask8)(mask), (int)(scale)))
7359
7360#define _mm512_i32gather_epi64(index, addr, scale) \
7361 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7362 (void const *)(addr), \
7363 (__v8si)(__m256i)(index), (__mmask8)-1, \
7364 (int)(scale)))
7365
7366#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7367 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7368 (void const *)(addr), \
7369 (__v8si)(__m256i)(index), \
7370 (__mmask8)(mask), (int)(scale)))
7371
7372#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7373 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7374 (__v8di)(__m512i)(index), \
7375 (__v8sf)(__m256)(v1), (int)(scale))
7376
7377#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7378 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7379 (__v8di)(__m512i)(index), \
7380 (__v8sf)(__m256)(v1), (int)(scale))
7381
7382#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7383 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7384 (__v8di)(__m512i)(index), \
7385 (__v8si)(__m256i)(v1), (int)(scale))
7386
7387#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7388 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7389 (__v8di)(__m512i)(index), \
7390 (__v8si)(__m256i)(v1), (int)(scale))
7391
7392#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7393 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7394 (__v8di)(__m512i)(index), \
7395 (__v8df)(__m512d)(v1), (int)(scale))
7396
7397#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7398 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7399 (__v8di)(__m512i)(index), \
7400 (__v8df)(__m512d)(v1), (int)(scale))
7401
7402#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7403 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7404 (__v8di)(__m512i)(index), \
7405 (__v8di)(__m512i)(v1), (int)(scale))
7406
7407#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7408 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7409 (__v8di)(__m512i)(index), \
7410 (__v8di)(__m512i)(v1), (int)(scale))
7411
7412#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7413 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7414 (__v16si)(__m512i)(index), \
7415 (__v16sf)(__m512)(v1), (int)(scale))
7416
7417#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7418 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7419 (__v16si)(__m512i)(index), \
7420 (__v16sf)(__m512)(v1), (int)(scale))
7421
7422#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7423 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7424 (__v16si)(__m512i)(index), \
7425 (__v16si)(__m512i)(v1), (int)(scale))
7426
7427#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7428 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7429 (__v16si)(__m512i)(index), \
7430 (__v16si)(__m512i)(v1), (int)(scale))
7431
7432#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7433 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7434 (__v8si)(__m256i)(index), \
7435 (__v8df)(__m512d)(v1), (int)(scale))
7436
7437#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7438 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7439 (__v8si)(__m256i)(index), \
7440 (__v8df)(__m512d)(v1), (int)(scale))
7441
7442#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7443 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7444 (__v8si)(__m256i)(index), \
7445 (__v8di)(__m512i)(v1), (int)(scale))
7446
7447#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7448 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7449 (__v8si)(__m256i)(index), \
7450 (__v8di)(__m512i)(v1), (int)(scale))
7451
7452static __inline__ __m128 __DEFAULT_FN_ATTRS128
7453_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7454{
7455 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7456 (__v4sf)__A,
7457 (__v4sf)__B,
7458 (__mmask8)__U,
7460}
7461
7462#define _mm_fmadd_round_ss(A, B, C, R) \
7463 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7464 (__v4sf)(__m128)(B), \
7465 (__v4sf)(__m128)(C), (__mmask8)-1, \
7466 (int)(R)))
7467
7468#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7469 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7470 (__v4sf)(__m128)(A), \
7471 (__v4sf)(__m128)(B), (__mmask8)(U), \
7472 (int)(R)))
7473
7474static __inline__ __m128 __DEFAULT_FN_ATTRS128
7475_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7476{
7477 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7478 (__v4sf)__B,
7479 (__v4sf)__C,
7480 (__mmask8)__U,
7482}
7483
7484#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7485 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7486 (__v4sf)(__m128)(B), \
7487 (__v4sf)(__m128)(C), (__mmask8)(U), \
7488 (int)(R)))
7489
7490static __inline__ __m128 __DEFAULT_FN_ATTRS128
7491_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7492{
7493 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7494 (__v4sf)__X,
7495 (__v4sf)__Y,
7496 (__mmask8)__U,
7498}
7499
7500#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7501 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7502 (__v4sf)(__m128)(X), \
7503 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7504 (int)(R)))
7505
7506static __inline__ __m128 __DEFAULT_FN_ATTRS128
7507_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7508{
7509 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7510 (__v4sf)__A,
7511 -(__v4sf)__B,
7512 (__mmask8)__U,
7514}
7515
7516#define _mm_fmsub_round_ss(A, B, C, R) \
7517 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7518 (__v4sf)(__m128)(B), \
7519 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7520 (int)(R)))
7521
7522#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7523 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7524 (__v4sf)(__m128)(A), \
7525 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7526 (int)(R)))
7527
7528static __inline__ __m128 __DEFAULT_FN_ATTRS128
7529_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7530{
7531 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7532 (__v4sf)__B,
7533 -(__v4sf)__C,
7534 (__mmask8)__U,
7536}
7537
7538#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7539 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7540 (__v4sf)(__m128)(B), \
7541 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7542 (int)(R)))
7543
7544static __inline__ __m128 __DEFAULT_FN_ATTRS128
7545_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7546{
7547 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7548 (__v4sf)__X,
7549 (__v4sf)__Y,
7550 (__mmask8)__U,
7552}
7553
7554#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7555 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7556 (__v4sf)(__m128)(X), \
7557 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7558 (int)(R)))
7559
7560static __inline__ __m128 __DEFAULT_FN_ATTRS128
7561_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7562{
7563 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7564 -(__v4sf)__A,
7565 (__v4sf)__B,
7566 (__mmask8)__U,
7568}
7569
7570#define _mm_fnmadd_round_ss(A, B, C, R) \
7571 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7572 -(__v4sf)(__m128)(B), \
7573 (__v4sf)(__m128)(C), (__mmask8)-1, \
7574 (int)(R)))
7575
7576#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7577 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7578 -(__v4sf)(__m128)(A), \
7579 (__v4sf)(__m128)(B), (__mmask8)(U), \
7580 (int)(R)))
7581
7582static __inline__ __m128 __DEFAULT_FN_ATTRS128
7583_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7584{
7585 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7586 -(__v4sf)__B,
7587 (__v4sf)__C,
7588 (__mmask8)__U,
7590}
7591
7592#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7593 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7594 -(__v4sf)(__m128)(B), \
7595 (__v4sf)(__m128)(C), (__mmask8)(U), \
7596 (int)(R)))
7597
7598static __inline__ __m128 __DEFAULT_FN_ATTRS128
7599_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7600{
7601 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7602 -(__v4sf)__X,
7603 (__v4sf)__Y,
7604 (__mmask8)__U,
7606}
7607
7608#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7609 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7610 -(__v4sf)(__m128)(X), \
7611 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7612 (int)(R)))
7613
7614static __inline__ __m128 __DEFAULT_FN_ATTRS128
7615_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7616{
7617 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7618 -(__v4sf)__A,
7619 -(__v4sf)__B,
7620 (__mmask8)__U,
7622}
7623
7624#define _mm_fnmsub_round_ss(A, B, C, R) \
7625 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7626 -(__v4sf)(__m128)(B), \
7627 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7628 (int)(R)))
7629
7630#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7631 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7632 -(__v4sf)(__m128)(A), \
7633 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7634 (int)(R)))
7635
7636static __inline__ __m128 __DEFAULT_FN_ATTRS128
7637_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7638{
7639 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7640 -(__v4sf)__B,
7641 -(__v4sf)__C,
7642 (__mmask8)__U,
7644}
7645
7646#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
7647 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7648 -(__v4sf)(__m128)(B), \
7649 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7650 (int)(R)))
7651
7652static __inline__ __m128 __DEFAULT_FN_ATTRS128
7653_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7654{
7655 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7656 -(__v4sf)__X,
7657 (__v4sf)__Y,
7658 (__mmask8)__U,
7660}
7661
7662#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
7663 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7664 -(__v4sf)(__m128)(X), \
7665 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7666 (int)(R)))
7667
7668static __inline__ __m128d __DEFAULT_FN_ATTRS128
7669_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7670{
7671 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7672 (__v2df)__A,
7673 (__v2df)__B,
7674 (__mmask8)__U,
7676}
7677
7678#define _mm_fmadd_round_sd(A, B, C, R) \
7679 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7680 (__v2df)(__m128d)(B), \
7681 (__v2df)(__m128d)(C), (__mmask8)-1, \
7682 (int)(R)))
7683
7684#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
7685 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7686 (__v2df)(__m128d)(A), \
7687 (__v2df)(__m128d)(B), (__mmask8)(U), \
7688 (int)(R)))
7689
7690static __inline__ __m128d __DEFAULT_FN_ATTRS128
7691_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7692{
7693 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7694 (__v2df)__B,
7695 (__v2df)__C,
7696 (__mmask8)__U,
7698}
7699
7700#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
7701 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7702 (__v2df)(__m128d)(B), \
7703 (__v2df)(__m128d)(C), (__mmask8)(U), \
7704 (int)(R)))
7705
7706static __inline__ __m128d __DEFAULT_FN_ATTRS128
7707_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7708{
7709 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7710 (__v2df)__X,
7711 (__v2df)__Y,
7712 (__mmask8)__U,
7714}
7715
7716#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
7717 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7718 (__v2df)(__m128d)(X), \
7719 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7720 (int)(R)))
7721
7722static __inline__ __m128d __DEFAULT_FN_ATTRS128
7723_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7724{
7725 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7726 (__v2df)__A,
7727 -(__v2df)__B,
7728 (__mmask8)__U,
7730}
7731
7732#define _mm_fmsub_round_sd(A, B, C, R) \
7733 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7734 (__v2df)(__m128d)(B), \
7735 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7736 (int)(R)))
7737
7738#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
7739 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7740 (__v2df)(__m128d)(A), \
7741 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7742 (int)(R)))
7743
7744static __inline__ __m128d __DEFAULT_FN_ATTRS128
7745_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7746{
7747 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7748 (__v2df)__B,
7749 -(__v2df)__C,
7750 (__mmask8)__U,
7752}
7753
7754#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
7755 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7756 (__v2df)(__m128d)(B), \
7757 -(__v2df)(__m128d)(C), \
7758 (__mmask8)(U), (int)(R)))
7759
7760static __inline__ __m128d __DEFAULT_FN_ATTRS128
7761_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7762{
7763 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7764 (__v2df)__X,
7765 (__v2df)__Y,
7766 (__mmask8)__U,
7768}
7769
7770#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
7771 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7772 (__v2df)(__m128d)(X), \
7773 (__v2df)(__m128d)(Y), \
7774 (__mmask8)(U), (int)(R)))
7775
7776static __inline__ __m128d __DEFAULT_FN_ATTRS128
7777_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7778{
7779 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7780 -(__v2df)__A,
7781 (__v2df)__B,
7782 (__mmask8)__U,
7784}
7785
7786#define _mm_fnmadd_round_sd(A, B, C, R) \
7787 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7788 -(__v2df)(__m128d)(B), \
7789 (__v2df)(__m128d)(C), (__mmask8)-1, \
7790 (int)(R)))
7791
7792#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
7793 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7794 -(__v2df)(__m128d)(A), \
7795 (__v2df)(__m128d)(B), (__mmask8)(U), \
7796 (int)(R)))
7797
7798static __inline__ __m128d __DEFAULT_FN_ATTRS128
7799_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7800{
7801 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7802 -(__v2df)__B,
7803 (__v2df)__C,
7804 (__mmask8)__U,
7806}
7807
7808#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
7809 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7810 -(__v2df)(__m128d)(B), \
7811 (__v2df)(__m128d)(C), (__mmask8)(U), \
7812 (int)(R)))
7813
7814static __inline__ __m128d __DEFAULT_FN_ATTRS128
7815_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7816{
7817 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7818 -(__v2df)__X,
7819 (__v2df)__Y,
7820 (__mmask8)__U,
7822}
7823
7824#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
7825 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7826 -(__v2df)(__m128d)(X), \
7827 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7828 (int)(R)))
7829
7830static __inline__ __m128d __DEFAULT_FN_ATTRS128
7831_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7832{
7833 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7834 -(__v2df)__A,
7835 -(__v2df)__B,
7836 (__mmask8)__U,
7838}
7839
7840#define _mm_fnmsub_round_sd(A, B, C, R) \
7841 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7842 -(__v2df)(__m128d)(B), \
7843 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7844 (int)(R)))
7845
7846#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
7847 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7848 -(__v2df)(__m128d)(A), \
7849 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7850 (int)(R)))
7851
7852static __inline__ __m128d __DEFAULT_FN_ATTRS128
7853_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7854{
7855 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7856 -(__v2df)__B,
7857 -(__v2df)__C,
7858 (__mmask8)__U,
7860}
7861
7862#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
7863 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7864 -(__v2df)(__m128d)(B), \
7865 -(__v2df)(__m128d)(C), \
7866 (__mmask8)(U), \
7867 (int)(R)))
7868
7869static __inline__ __m128d __DEFAULT_FN_ATTRS128
7870_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7871{
7872 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7873 -(__v2df)__X,
7874 (__v2df)__Y,
7875 (__mmask8)__U,
7877}
7878
7879#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
7880 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7881 -(__v2df)(__m128d)(X), \
7882 (__v2df)(__m128d)(Y), \
7883 (__mmask8)(U), (int)(R)))
7884
7885#define _mm512_permutex_pd(X, C) \
7886 ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
7887
7888#define _mm512_mask_permutex_pd(W, U, X, C) \
7889 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7890 (__v8df)_mm512_permutex_pd((X), (C)), \
7891 (__v8df)(__m512d)(W)))
7892
7893#define _mm512_maskz_permutex_pd(U, X, C) \
7894 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7895 (__v8df)_mm512_permutex_pd((X), (C)), \
7896 (__v8df)_mm512_setzero_pd()))
7897
7898#define _mm512_permutex_epi64(X, C) \
7899 ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
7900
7901#define _mm512_mask_permutex_epi64(W, U, X, C) \
7902 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7903 (__v8di)_mm512_permutex_epi64((X), (C)), \
7904 (__v8di)(__m512i)(W)))
7905
7906#define _mm512_maskz_permutex_epi64(U, X, C) \
7907 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7908 (__v8di)_mm512_permutex_epi64((X), (C)), \
7909 (__v8di)_mm512_setzero_si512()))
7910
7911static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
7912_mm512_permutexvar_pd(__m512i __X, __m512d __Y) {
7913 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
7914}
7915
7916static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
7917_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X,
7918 __m512d __Y) {
7919 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
7920 (__v8df)_mm512_permutexvar_pd(__X, __Y),
7921 (__v8df)__W);
7922}
7923
7924static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
7925_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) {
7926 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
7927 (__v8df)_mm512_permutexvar_pd(__X, __Y),
7928 (__v8df)_mm512_setzero_pd());
7929}
7930
7931static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7932_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) {
7933 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
7934}
7935
7936static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7937_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) {
7938 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7939 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
7940 (__v8di)_mm512_setzero_si512());
7941}
7942
7943static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7944_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X,
7945 __m512i __Y) {
7946 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7947 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
7948 (__v8di)__W);
7949}
7950
7951static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
7952_mm512_permutexvar_ps(__m512i __X, __m512 __Y) {
7953 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
7954}
7955
7956static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
7957_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) {
7958 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
7959 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
7960 (__v16sf)__W);
7961}
7962
7963static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
7964_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) {
7965 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
7966 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
7967 (__v16sf)_mm512_setzero_ps());
7968}
7969
7970static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7971_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) {
7972 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
7973}
7974
7975#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
7976
7977static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7979 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7980 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
7981 (__v16si)_mm512_setzero_si512());
7982}
7983
7984static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7985_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X,
7986 __m512i __Y) {
7987 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7988 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
7989 (__v16si)__W);
7990}
7991
7992#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
7993
7994static __inline__ __mmask16
7996 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
7997}
7998
8001 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8002}
8003
8006 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8007}
8008
8009static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
8011 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8012}
8013
8014static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
8016 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8017}
8018
8019static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
8021 return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8022}
8023
8024static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
8026 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8027}
8028
8029static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
8030_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8031 *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8032 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8033}
8034
8037 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8038}
8039
8042 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8043}
8044
8047 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8048}
8049
8050#define _kand_mask16 _mm512_kand
8051#define _kandn_mask16 _mm512_kandn
8052#define _knot_mask16 _mm512_knot
8053#define _kor_mask16 _mm512_kor
8054#define _kxnor_mask16 _mm512_kxnor
8055#define _kxor_mask16 _mm512_kxor
8056
8057#define _kshiftli_mask16(A, I) \
8058 ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
8059
8060#define _kshiftri_mask16(A, I) \
8061 ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
8062
8063static __inline__ unsigned int
8065 return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8066}
8067
8069_cvtu32_mask16(unsigned int __A) {
8070 return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8071}
8072
8073static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8075 return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8076}
8077
8078static __inline__ void __DEFAULT_FN_ATTRS
8080 *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8081}
8082
8083static __inline__ void __DEFAULT_FN_ATTRS512
8084_mm512_stream_si512 (void * __P, __m512i __A)
8085{
8086 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8087 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8088}
8089
8090static __inline__ __m512i __DEFAULT_FN_ATTRS512
8092{
8093 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8094 return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8095}
8096
8097static __inline__ void __DEFAULT_FN_ATTRS512
8098_mm512_stream_pd (void *__P, __m512d __A)
8099{
8100 typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8101 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8102}
8103
8104static __inline__ void __DEFAULT_FN_ATTRS512
8105_mm512_stream_ps (void *__P, __m512 __A)
8106{
8107 typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8108 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8109}
8110
8111static __inline__ __m512d __DEFAULT_FN_ATTRS512
8112_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8113{
8114 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8115 (__v8df) __W,
8116 (__mmask8) __U);
8117}
8118
8119static __inline__ __m512d __DEFAULT_FN_ATTRS512
8121{
8122 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8123 (__v8df)
8125 (__mmask8) __U);
8126}
8127
8128static __inline__ __m512i __DEFAULT_FN_ATTRS512
8129_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8130{
8131 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8132 (__v8di) __W,
8133 (__mmask8) __U);
8134}
8135
8136static __inline__ __m512i __DEFAULT_FN_ATTRS512
8138{
8139 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8140 (__v8di)
8142 (__mmask8) __U);
8143}
8144
8145static __inline__ __m512 __DEFAULT_FN_ATTRS512
8146_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8147{
8148 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8149 (__v16sf) __W,
8150 (__mmask16) __U);
8151}
8152
8153static __inline__ __m512 __DEFAULT_FN_ATTRS512
8155{
8156 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8157 (__v16sf)
8159 (__mmask16) __U);
8160}
8161
8162static __inline__ __m512i __DEFAULT_FN_ATTRS512
8163_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8164{
8165 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8166 (__v16si) __W,
8167 (__mmask16) __U);
8168}
8169
8170static __inline__ __m512i __DEFAULT_FN_ATTRS512
8172{
8173 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8174 (__v16si)
8176 (__mmask16) __U);
8177}
8178
8179#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8180 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8181 (__v4sf)(__m128)(Y), (int)(P), \
8182 (__mmask8)-1, (int)(R)))
8183
8184#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8185 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8186 (__v4sf)(__m128)(Y), (int)(P), \
8187 (__mmask8)(M), (int)(R)))
8188
8189#define _mm_cmp_ss_mask(X, Y, P) \
8190 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8191 (__v4sf)(__m128)(Y), (int)(P), \
8192 (__mmask8)-1, \
8193 _MM_FROUND_CUR_DIRECTION))
8194
8195#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8196 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8197 (__v4sf)(__m128)(Y), (int)(P), \
8198 (__mmask8)(M), \
8199 _MM_FROUND_CUR_DIRECTION))
8200
8201#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8202 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8203 (__v2df)(__m128d)(Y), (int)(P), \
8204 (__mmask8)-1, (int)(R)))
8205
8206#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8207 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8208 (__v2df)(__m128d)(Y), (int)(P), \
8209 (__mmask8)(M), (int)(R)))
8210
8211#define _mm_cmp_sd_mask(X, Y, P) \
8212 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8213 (__v2df)(__m128d)(Y), (int)(P), \
8214 (__mmask8)-1, \
8215 _MM_FROUND_CUR_DIRECTION))
8216
8217#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8218 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8219 (__v2df)(__m128d)(Y), (int)(P), \
8220 (__mmask8)(M), \
8221 _MM_FROUND_CUR_DIRECTION))
8222
8223/* Bit Test */
8224
8225static __inline __mmask16 __DEFAULT_FN_ATTRS512
8226_mm512_test_epi32_mask (__m512i __A, __m512i __B)
8227{
8230}
8231
8232static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8233_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8234{
8235 return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8237}
8238
8239static __inline __mmask8 __DEFAULT_FN_ATTRS512
8240_mm512_test_epi64_mask (__m512i __A, __m512i __B)
8241{
8242 return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8244}
8245
8246static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8247_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8248{
8249 return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8251}
8252
8253static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8254_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8255{
8256 return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8258}
8259
8260static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8261_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8262{
8263 return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8265}
8266
8267static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8268_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8269{
8270 return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8272}
8273
8274static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8275_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8276{
8277 return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8279}
8280
8281static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8283{
8284 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8285 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8286}
8287
8288static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8289_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8290 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8291 (__v16sf)_mm512_movehdup_ps(__A),
8292 (__v16sf)__W);
8293}
8294
8295static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8297 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8298 (__v16sf)_mm512_movehdup_ps(__A),
8299 (__v16sf)_mm512_setzero_ps());
8300}
8301
8302static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8304{
8305 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8306 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8307}
8308
8309static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8310_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8311 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8312 (__v16sf)_mm512_moveldup_ps(__A),
8313 (__v16sf)__W);
8314}
8315
8316static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8318 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8319 (__v16sf)_mm512_moveldup_ps(__A),
8320 (__v16sf)_mm512_setzero_ps());
8321}
8322
8323static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8324_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
8325 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8326}
8327
8328static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8329_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
8330 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8331 _mm_setzero_ps());
8332}
8333
8334static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8335_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
8336 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8337}
8338
8339static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8340_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
8341 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8342 _mm_setzero_pd());
8343}
8344
8345static __inline__ void __DEFAULT_FN_ATTRS128
8346_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8347{
8348 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8349}
8350
8351static __inline__ void __DEFAULT_FN_ATTRS128
8352_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8353{
8354 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8355}
8356
8357static __inline__ __m128 __DEFAULT_FN_ATTRS128
8358_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8359{
8360 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8361 (__v4sf)_mm_setzero_ps(),
8362 0, 4, 4, 4);
8363
8364 return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
8365}
8366
8367static __inline__ __m128 __DEFAULT_FN_ATTRS128
8368_mm_maskz_load_ss (__mmask8 __U, const float* __A)
8369{
8370 return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
8371 (__v4sf) _mm_setzero_ps(),
8372 __U & 1);
8373}
8374
8375static __inline__ __m128d __DEFAULT_FN_ATTRS128
8376_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8377{
8378 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8379 (__v2df)_mm_setzero_pd(),
8380 0, 2);
8381
8382 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
8383}
8384
8385static __inline__ __m128d __DEFAULT_FN_ATTRS128
8386_mm_maskz_load_sd (__mmask8 __U, const double* __A)
8387{
8388 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
8389 (__v2df) _mm_setzero_pd(),
8390 __U & 1);
8391}
8392
8393#define _mm512_shuffle_epi32(A, I) \
8394 ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
8395
8396#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8397 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8398 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8399 (__v16si)(__m512i)(W)))
8400
8401#define _mm512_maskz_shuffle_epi32(U, A, I) \
8402 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8403 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8404 (__v16si)_mm512_setzero_si512()))
8405
8406static __inline__ __m512d __DEFAULT_FN_ATTRS512
8407_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8408{
8409 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8410 (__v8df) __W,
8411 (__mmask8) __U);
8412}
8413
8414static __inline__ __m512d __DEFAULT_FN_ATTRS512
8416{
8417 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8418 (__v8df) _mm512_setzero_pd (),
8419 (__mmask8) __U);
8420}
8421
8422static __inline__ __m512i __DEFAULT_FN_ATTRS512
8423_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8424{
8425 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8426 (__v8di) __W,
8427 (__mmask8) __U);
8428}
8429
8430static __inline__ __m512i __DEFAULT_FN_ATTRS512
8432{
8433 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8434 (__v8di) _mm512_setzero_si512 (),
8435 (__mmask8) __U);
8436}
8437
8438static __inline__ __m512d __DEFAULT_FN_ATTRS512
8439_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8440{
8441 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8442 (__v8df) __W,
8443 (__mmask8) __U);
8444}
8445
8446static __inline__ __m512d __DEFAULT_FN_ATTRS512
8448{
8449 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8450 (__v8df) _mm512_setzero_pd(),
8451 (__mmask8) __U);
8452}
8453
8454static __inline__ __m512i __DEFAULT_FN_ATTRS512
8455_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8456{
8457 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8458 (__v8di) __W,
8459 (__mmask8) __U);
8460}
8461
8462static __inline__ __m512i __DEFAULT_FN_ATTRS512
8464{
8465 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8466 (__v8di) _mm512_setzero_si512(),
8467 (__mmask8) __U);
8468}
8469
8470static __inline__ __m512 __DEFAULT_FN_ATTRS512
8471_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8472{
8473 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8474 (__v16sf) __W,
8475 (__mmask16) __U);
8476}
8477
8478static __inline__ __m512 __DEFAULT_FN_ATTRS512
8480{
8481 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8482 (__v16sf) _mm512_setzero_ps(),
8483 (__mmask16) __U);
8484}
8485
8486static __inline__ __m512i __DEFAULT_FN_ATTRS512
8487_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8488{
8489 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8490 (__v16si) __W,
8491 (__mmask16) __U);
8492}
8493
8494static __inline__ __m512i __DEFAULT_FN_ATTRS512
8496{
8497 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8498 (__v16si) _mm512_setzero_si512(),
8499 (__mmask16) __U);
8500}
8501
8502static __inline__ __m512 __DEFAULT_FN_ATTRS512
8503_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8504{
8505 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8506 (__v16sf) __W,
8507 (__mmask16) __U);
8508}
8509
8510static __inline__ __m512 __DEFAULT_FN_ATTRS512
8512{
8513 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8514 (__v16sf) _mm512_setzero_ps(),
8515 (__mmask16) __U);
8516}
8517
8518static __inline__ __m512i __DEFAULT_FN_ATTRS512
8519_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8520{
8521 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8522 (__v16si) __W,
8523 (__mmask16) __U);
8524}
8525
8526static __inline__ __m512i __DEFAULT_FN_ATTRS512
8528{
8529 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8530 (__v16si) _mm512_setzero_si512(),
8531 (__mmask16) __U);
8532}
8533
8534#define _mm512_cvt_roundps_pd(A, R) \
8535 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8536 (__v8df)_mm512_undefined_pd(), \
8537 (__mmask8)-1, (int)(R)))
8538
8539#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8540 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8541 (__v8df)(__m512d)(W), \
8542 (__mmask8)(U), (int)(R)))
8543
8544#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8545 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8546 (__v8df)_mm512_setzero_pd(), \
8547 (__mmask8)(U), (int)(R)))
8548
8549static __inline__ __m512d
8551 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8552}
8553
8554static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8555_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) {
8556 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8557 (__v8df)_mm512_cvtps_pd(__A),
8558 (__v8df)__W);
8559}
8560
8561static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8563 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8564 (__v8df)_mm512_cvtps_pd(__A),
8565 (__v8df)_mm512_setzero_pd());
8566}
8567
8568static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8570 return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8571}
8572
8573static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8574_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) {
8575 return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8576}
8577
8578static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8579_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) {
8580 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8581 (__v8df)__W);
8582}
8583
8584static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8586 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8587 (__v8df)_mm512_setzero_pd());
8588}
8589
8590static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8591_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8592 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8593 (__v16sf)__W);
8594}
8595
8596static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8598 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8599 (__v16sf)_mm512_setzero_ps());
8600}
8601
8602static __inline__ void __DEFAULT_FN_ATTRS512
8604{
8605 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8606 (__mmask8) __U);
8607}
8608
8609static __inline__ void __DEFAULT_FN_ATTRS512
8611{
8612 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8613 (__mmask8) __U);
8614}
8615
8616static __inline__ void __DEFAULT_FN_ATTRS512
8618{
8619 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
8620 (__mmask16) __U);
8621}
8622
8623static __inline__ void __DEFAULT_FN_ATTRS512
8625{
8626 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
8627 (__mmask16) __U);
8628}
8629
8630#define _mm_cvt_roundsd_ss(A, B, R) \
8631 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8632 (__v2df)(__m128d)(B), \
8633 (__v4sf)_mm_undefined_ps(), \
8634 (__mmask8)-1, (int)(R)))
8635
8636#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
8637 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8638 (__v2df)(__m128d)(B), \
8639 (__v4sf)(__m128)(W), \
8640 (__mmask8)(U), (int)(R)))
8641
8642#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
8643 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8644 (__v2df)(__m128d)(B), \
8645 (__v4sf)_mm_setzero_ps(), \
8646 (__mmask8)(U), (int)(R)))
8647
8648static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8649_mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) {
8650 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8651 (__v2df)__B,
8652 (__v4sf)__W,
8654}
8655
8656static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8657_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) {
8658 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8659 (__v2df)__B,
8660 (__v4sf)_mm_setzero_ps(),
8662}
8663
8664#define _mm_cvtss_i32 _mm_cvtss_si32
8665#define _mm_cvtsd_i32 _mm_cvtsd_si32
8666#define _mm_cvti32_sd _mm_cvtsi32_sd
8667#define _mm_cvti32_ss _mm_cvtsi32_ss
8668#ifdef __x86_64__
8669#define _mm_cvtss_i64 _mm_cvtss_si64
8670#define _mm_cvtsd_i64 _mm_cvtsd_si64
8671#define _mm_cvti64_sd _mm_cvtsi64_sd
8672#define _mm_cvti64_ss _mm_cvtsi64_ss
8673#endif
8674
8675#ifdef __x86_64__
8676#define _mm_cvt_roundi64_sd(A, B, R) \
8677 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8678 (int)(R)))
8679
8680#define _mm_cvt_roundsi64_sd(A, B, R) \
8681 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8682 (int)(R)))
8683#endif
8684
8685#define _mm_cvt_roundsi32_ss(A, B, R) \
8686 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8687
8688#define _mm_cvt_roundi32_ss(A, B, R) \
8689 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8690
8691#ifdef __x86_64__
8692#define _mm_cvt_roundsi64_ss(A, B, R) \
8693 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8694 (int)(R)))
8695
8696#define _mm_cvt_roundi64_ss(A, B, R) \
8697 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8698 (int)(R)))
8699#endif
8700
8701#define _mm_cvt_roundss_sd(A, B, R) \
8702 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8703 (__v4sf)(__m128)(B), \
8704 (__v2df)_mm_undefined_pd(), \
8705 (__mmask8)-1, (int)(R)))
8706
8707#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
8708 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8709 (__v4sf)(__m128)(B), \
8710 (__v2df)(__m128d)(W), \
8711 (__mmask8)(U), (int)(R)))
8712
8713#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
8714 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8715 (__v4sf)(__m128)(B), \
8716 (__v2df)_mm_setzero_pd(), \
8717 (__mmask8)(U), (int)(R)))
8718
8719static __inline__ __m128d __DEFAULT_FN_ATTRS128
8720_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
8721{
8722 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8723 (__v4sf)__B,
8724 (__v2df)__W,
8726}
8727
8728static __inline__ __m128d __DEFAULT_FN_ATTRS128
8729_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
8730{
8731 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8732 (__v4sf)__B,
8733 (__v2df)_mm_setzero_pd(),
8735}
8736
8737static __inline__ __m128d __DEFAULT_FN_ATTRS128
8738_mm_cvtu32_sd (__m128d __A, unsigned __B)
8739{
8740 __A[0] = __B;
8741 return __A;
8742}
8743
8744#ifdef __x86_64__
8745#define _mm_cvt_roundu64_sd(A, B, R) \
8746 ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
8747 (unsigned long long)(B), (int)(R)))
8748
8749static __inline__ __m128d __DEFAULT_FN_ATTRS128
8750_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
8751{
8752 __A[0] = __B;
8753 return __A;
8754}
8755#endif
8756
8757#define _mm_cvt_roundu32_ss(A, B, R) \
8758 ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
8759 (int)(R)))
8760
8761static __inline__ __m128 __DEFAULT_FN_ATTRS128
8762_mm_cvtu32_ss (__m128 __A, unsigned __B)
8763{
8764 __A[0] = __B;
8765 return __A;
8766}
8767
8768#ifdef __x86_64__
8769#define _mm_cvt_roundu64_ss(A, B, R) \
8770 ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
8771 (unsigned long long)(B), (int)(R)))
8772
8773static __inline__ __m128 __DEFAULT_FN_ATTRS128
8774_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
8775{
8776 __A[0] = __B;
8777 return __A;
8778}
8779#endif
8780
8781static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8782_mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A) {
8783 return (__m512i) __builtin_ia32_selectd_512(__M,
8784 (__v16si) _mm512_set1_epi32(__A),
8785 (__v16si) __O);
8786}
8787
8788static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8789_mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A) {
8790 return (__m512i) __builtin_ia32_selectq_512(__M,
8791 (__v8di) _mm512_set1_epi64(__A),
8792 (__v8di) __O);
8793}
8794
8796 char __e63, char __e62, char __e61, char __e60, char __e59, char __e58,
8797 char __e57, char __e56, char __e55, char __e54, char __e53, char __e52,
8798 char __e51, char __e50, char __e49, char __e48, char __e47, char __e46,
8799 char __e45, char __e44, char __e43, char __e42, char __e41, char __e40,
8800 char __e39, char __e38, char __e37, char __e36, char __e35, char __e34,
8801 char __e33, char __e32, char __e31, char __e30, char __e29, char __e28,
8802 char __e27, char __e26, char __e25, char __e24, char __e23, char __e22,
8803 char __e21, char __e20, char __e19, char __e18, char __e17, char __e16,
8804 char __e15, char __e14, char __e13, char __e12, char __e11, char __e10,
8805 char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3,
8806 char __e2, char __e1, char __e0) {
8807
8808 return __extension__ (__m512i)(__v64qi)
8809 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8810 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8811 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8812 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
8813 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
8814 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
8815 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
8816 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
8817}
8818
8820 short __e31, short __e30, short __e29, short __e28, short __e27,
8821 short __e26, short __e25, short __e24, short __e23, short __e22,
8822 short __e21, short __e20, short __e19, short __e18, short __e17,
8823 short __e16, short __e15, short __e14, short __e13, short __e12,
8824 short __e11, short __e10, short __e9, short __e8, short __e7, short __e6,
8825 short __e5, short __e4, short __e3, short __e2, short __e1, short __e0) {
8826 return __extension__ (__m512i)(__v32hi)
8827 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8828 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8829 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8830 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
8831}
8832
8834 int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H,
8835 int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P) {
8836 return __extension__ (__m512i)(__v16si)
8837 { __P, __O, __N, __M, __L, __K, __J, __I,
8838 __H, __G, __F, __E, __D, __C, __B, __A };
8839}
8840
8842 int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8,
8843 int e9, int e10, int e11, int e12, int e13, int e14, int e15) {
8844 return _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4,
8845 e3, e2, e1, e0);
8846}
8847
8848static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8849_mm512_set_epi64(long long __A, long long __B, long long __C, long long __D,
8850 long long __E, long long __F, long long __G, long long __H) {
8851 return __extension__ (__m512i) (__v8di)
8852 { __H, __G, __F, __E, __D, __C, __B, __A };
8853}
8854
8855static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8856_mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3,
8857 long long e4, long long e5, long long e6, long long e7) {
8858 return _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0);
8859}
8860
8861static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8862_mm512_set_pd(double __A, double __B, double __C, double __D, double __E,
8863 double __F, double __G, double __H) {
8864 return __extension__ (__m512d)
8865 { __H, __G, __F, __E, __D, __C, __B, __A };
8866}
8867
8868static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8869_mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5,
8870 double e6, double e7) {
8871 return _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0);
8872}
8873
8874static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8875_mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F,
8876 float __G, float __H, float __I, float __J, float __K, float __L,
8877 float __M, float __N, float __O, float __P) {
8878 return __extension__ (__m512)
8879 { __P, __O, __N, __M, __L, __K, __J, __I,
8880 __H, __G, __F, __E, __D, __C, __B, __A };
8881}
8882
8883static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8884_mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5,
8885 float e6, float e7, float e8, float e9, float e10, float e11,
8886 float e12, float e13, float e14, float e15) {
8887 return _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
8888 e2, e1, e0);
8889}
8890
8891static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8892_mm512_abs_ps(__m512 __A) {
8893 return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
8894}
8895
8896static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8897_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) {
8898 return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
8899}
8900
8901static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8902_mm512_abs_pd(__m512d __A) {
8903 return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
8904}
8905
8906static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8907_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) {
8908 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
8909}
8910
8911/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
8912 * outputs. This class of vector operation forms the basis of many scientific
8913 * computations. In vector-reduction arithmetic, the evaluation order is
8914 * independent of the order of the input elements of V.
8915
8916 * For floating-point intrinsics:
8917 * 1. When using fadd/fmul intrinsics, the order of operations within the
8918 * vector is unspecified (associative math).
8919 * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector
8920 * produce unspecified results.
8921
8922 * Used bisection method. At each step, we partition the vector with previous
8923 * step in half, and the operation is performed on its two halves.
8924 * This takes log2(n) steps where n is the number of elements in the vector.
8925 */
8926
8927static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8929 return __builtin_reduce_add((__v8di)__W);
8930}
8931
8932static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8934 return __builtin_reduce_mul((__v8di)__W);
8935}
8936
8937static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8939 return __builtin_reduce_and((__v8di)__W);
8940}
8941
8942static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8944 return __builtin_reduce_or((__v8di)__W);
8945}
8946
8947static __inline__ long long __DEFAULT_FN_ATTRS512
8949 __W = _mm512_maskz_mov_epi64(__M, __W);
8950 return __builtin_reduce_add((__v8di)__W);
8951}
8952
8953static __inline__ long long __DEFAULT_FN_ATTRS512
8955 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
8956 return __builtin_reduce_mul((__v8di)__W);
8957}
8958
8959static __inline__ long long __DEFAULT_FN_ATTRS512
8961 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __W);
8962 return __builtin_reduce_and((__v8di)__W);
8963}
8964
8965static __inline__ long long __DEFAULT_FN_ATTRS512
8967 __W = _mm512_maskz_mov_epi64(__M, __W);
8968 return __builtin_reduce_or((__v8di)__W);
8969}
8970
8971// -0.0 is used to ignore the start value since it is the neutral value of
8972// floating point addition. For more information, please refer to
8973// https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic
8974static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
8975 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
8976}
8977
8978static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
8979 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
8980}
8981
8982static __inline__ double __DEFAULT_FN_ATTRS512
8984 __W = _mm512_maskz_mov_pd(__M, __W);
8985 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
8986}
8987
8988static __inline__ double __DEFAULT_FN_ATTRS512
8990 __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
8991 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
8992}
8993
8994static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
8996 return __builtin_reduce_add((__v16si)__W);
8997}
8998
8999static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9001 return __builtin_reduce_mul((__v16si)__W);
9002}
9003
9004static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9006 return __builtin_reduce_and((__v16si)__W);
9007}
9008
9009static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9011 return __builtin_reduce_or((__v16si)__W);
9012}
9013
9014static __inline__ int __DEFAULT_FN_ATTRS512
9016 __W = _mm512_maskz_mov_epi32(__M, __W);
9017 return __builtin_reduce_add((__v16si)__W);
9018}
9019
9020static __inline__ int __DEFAULT_FN_ATTRS512
9022 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
9023 return __builtin_reduce_mul((__v16si)__W);
9024}
9025
9026static __inline__ int __DEFAULT_FN_ATTRS512
9028 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __W);
9029 return __builtin_reduce_and((__v16si)__W);
9030}
9031
9032static __inline__ int __DEFAULT_FN_ATTRS512
9034 __W = _mm512_maskz_mov_epi32(__M, __W);
9035 return __builtin_reduce_or((__v16si)__W);
9036}
9037
9038static __inline__ float __DEFAULT_FN_ATTRS512
9040 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9041}
9042
9043static __inline__ float __DEFAULT_FN_ATTRS512
9045 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9046}
9047
9048static __inline__ float __DEFAULT_FN_ATTRS512
9050 __W = _mm512_maskz_mov_ps(__M, __W);
9051 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9052}
9053
9054static __inline__ float __DEFAULT_FN_ATTRS512
9056 __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9057 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9058}
9059
9060static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9062 return __builtin_reduce_max((__v8di)__V);
9063}
9064
9065static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9067 return __builtin_reduce_max((__v8du)__V);
9068}
9069
9070static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9072 return __builtin_reduce_min((__v8di)__V);
9073}
9074
9075static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9077 return __builtin_reduce_min((__v8du)__V);
9078}
9079
9080static __inline__ long long __DEFAULT_FN_ATTRS512
9082 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9083 return __builtin_reduce_max((__v8di)__V);
9084}
9085
9086static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9088 __V = _mm512_maskz_mov_epi64(__M, __V);
9089 return __builtin_reduce_max((__v8du)__V);
9090}
9091
9092static __inline__ long long __DEFAULT_FN_ATTRS512
9094 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9095 return __builtin_reduce_min((__v8di)__V);
9096}
9097
9098static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9100 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V);
9101 return __builtin_reduce_min((__v8du)__V);
9102}
9103static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9105 return __builtin_reduce_max((__v16si)__V);
9106}
9107
9108static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9110 return __builtin_reduce_max((__v16su)__V);
9111}
9112
9113static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9115 return __builtin_reduce_min((__v16si)__V);
9116}
9117
9118static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9120 return __builtin_reduce_min((__v16su)__V);
9121}
9122
9123static __inline__ int __DEFAULT_FN_ATTRS512
9125 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9126 return __builtin_reduce_max((__v16si)__V);
9127}
9128
9129static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9131 __V = _mm512_maskz_mov_epi32(__M, __V);
9132 return __builtin_reduce_max((__v16su)__V);
9133}
9134
9135static __inline__ int __DEFAULT_FN_ATTRS512
9137 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9138 return __builtin_reduce_min((__v16si)__V);
9139}
9140
9141static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9143 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __V);
9144 return __builtin_reduce_min((__v16su)__V);
9145}
9146
9147static __inline__ double __DEFAULT_FN_ATTRS512
9149 return __builtin_ia32_reduce_fmax_pd512(__V);
9150}
9151
9152static __inline__ double __DEFAULT_FN_ATTRS512
9154 return __builtin_ia32_reduce_fmin_pd512(__V);
9155}
9156
9157static __inline__ double __DEFAULT_FN_ATTRS512
9159 __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9160 return __builtin_ia32_reduce_fmax_pd512(__V);
9161}
9162
9163static __inline__ double __DEFAULT_FN_ATTRS512
9165 __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9166 return __builtin_ia32_reduce_fmin_pd512(__V);
9167}
9168
9169static __inline__ float __DEFAULT_FN_ATTRS512
9171 return __builtin_ia32_reduce_fmax_ps512(__V);
9172}
9173
9174static __inline__ float __DEFAULT_FN_ATTRS512
9176 return __builtin_ia32_reduce_fmin_ps512(__V);
9177}
9178
9179static __inline__ float __DEFAULT_FN_ATTRS512
9181 __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9182 return __builtin_ia32_reduce_fmax_ps512(__V);
9183}
9184
9185static __inline__ float __DEFAULT_FN_ATTRS512
9187 __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9188 return __builtin_ia32_reduce_fmin_ps512(__V);
9189}
9190
9191/// Moves the least significant 32 bits of a vector of [16 x i32] to a
9192/// 32-bit signed integer value.
9193///
9194/// \headerfile <x86intrin.h>
9195///
9196/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
9197///
9198/// \param __A
9199/// A vector of [16 x i32]. The least significant 32 bits are moved to the
9200/// destination.
9201/// \returns A 32-bit signed integer containing the moved value.
9202static __inline__ int __DEFAULT_FN_ATTRS512
9204 __v16si __b = (__v16si)__A;
9205 return __b[0];
9206}
9207
9208/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
9209/// locations starting at location \a base_addr at packed 32-bit integer indices
9210/// stored in the lower half of \a vindex scaled by \a scale them in dst.
9211///
9212/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9213///
9214/// \code{.operation}
9215/// FOR j := 0 to 7
9216/// i := j*64
9217/// m := j*32
9218/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9219/// dst[i+63:i] := MEM[addr+63:addr]
9220/// ENDFOR
9221/// dst[MAX:512] := 0
9222/// \endcode
9223#define _mm512_i32logather_pd(vindex, base_addr, scale) \
9224 _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9225
9226/// Loads 8 double-precision (64-bit) floating-point elements from memory
9227/// starting at location \a base_addr at packed 32-bit integer indices stored in
9228/// the lower half of \a vindex scaled by \a scale into dst using writemask
9229/// \a mask (elements are copied from \a src when the corresponding mask bit is
9230/// not set).
9231///
9232/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9233///
9234/// \code{.operation}
9235/// FOR j := 0 to 7
9236/// i := j*64
9237/// m := j*32
9238/// IF mask[j]
9239/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9240/// dst[i+63:i] := MEM[addr+63:addr]
9241/// ELSE
9242/// dst[i+63:i] := src[i+63:i]
9243/// FI
9244/// ENDFOR
9245/// dst[MAX:512] := 0
9246/// \endcode
9247#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \
9248 _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \
9249 (base_addr), (scale))
9250
9251/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9252/// at packed 32-bit integer indices stored in the lower half of \a vindex
9253/// scaled by \a scale and stores them in dst.
9254///
9255/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9256///
9257/// \code{.operation}
9258/// FOR j := 0 to 7
9259/// i := j*64
9260/// m := j*32
9261/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9262/// dst[i+63:i] := MEM[addr+63:addr]
9263/// ENDFOR
9264/// dst[MAX:512] := 0
9265/// \endcode
9266#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
9267 _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9268
9269/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9270/// at packed 32-bit integer indices stored in the lower half of \a vindex
9271/// scaled by \a scale and stores them in dst using writemask \a mask (elements
9272/// are copied from \a src when the corresponding mask bit is not set).
9273///
9274/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9275///
9276/// \code{.operation}
9277/// FOR j := 0 to 7
9278/// i := j*64
9279/// m := j*32
9280/// IF mask[j]
9281/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9282/// dst[i+63:i] := MEM[addr+63:addr]
9283/// ELSE
9284/// dst[i+63:i] := src[i+63:i]
9285/// FI
9286/// ENDFOR
9287/// dst[MAX:512] := 0
9288/// \endcode
9289#define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \
9290 _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \
9291 (base_addr), (scale))
9292
9293/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9294/// and to memory locations starting at location \a base_addr at packed 32-bit
9295/// integer indices stored in \a vindex scaled by \a scale.
9296///
9297/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9298///
9299/// \code{.operation}
9300/// FOR j := 0 to 7
9301/// i := j*64
9302/// m := j*32
9303/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9304/// MEM[addr+63:addr] := v1[i+63:i]
9305/// ENDFOR
9306/// \endcode
9307#define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \
9308 _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9309
9310/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9311/// to memory locations starting at location \a base_addr at packed 32-bit
9312/// integer indices stored in \a vindex scaled by \a scale. Only those elements
9313/// whose corresponding mask bit is set in writemask \a mask are written to
9314/// memory.
9315///
9316/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9317///
9318/// \code{.operation}
9319/// FOR j := 0 to 7
9320/// i := j*64
9321/// m := j*32
9322/// IF mask[j]
9323/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9324/// MEM[addr+63:addr] := a[i+63:i]
9325/// FI
9326/// ENDFOR
9327/// \endcode
9328#define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \
9329 _mm512_mask_i32scatter_pd((base_addr), (mask), \
9330 _mm512_castsi512_si256(vindex), (v1), (scale))
9331
9332/// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in
9333/// memory locations starting at location \a base_addr at packed 32-bit integer
9334/// indices stored in \a vindex scaled by \a scale.
9335///
9336/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9337///
9338/// \code{.operation}
9339/// FOR j := 0 to 7
9340/// i := j*64
9341/// m := j*32
9342/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9343/// MEM[addr+63:addr] := a[i+63:i]
9344/// ENDFOR
9345/// \endcode
9346#define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \
9347 _mm512_i32scatter_epi64((base_addr), \
9348 _mm512_castsi512_si256(vindex), (v1), (scale))
9349
9350/// Stores 8 packed 64-bit integer elements located in a and stores them in
9351/// memory locations starting at location \a base_addr at packed 32-bit integer
9352/// indices stored in \a vindex scaled by scale using writemask \a mask (elements
9353/// whose corresponding mask bit is not set are not written to memory).
9354///
9355/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9356///
9357/// \code{.operation}
9358/// FOR j := 0 to 7
9359/// i := j*64
9360/// m := j*32
9361/// IF mask[j]
9362/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9363/// MEM[addr+63:addr] := a[i+63:i]
9364/// FI
9365/// ENDFOR
9366/// \endcode
9367#define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \
9368 _mm512_mask_i32scatter_epi64((base_addr), (mask), \
9369 _mm512_castsi512_si256(vindex), (v1), (scale))
9370
9371#undef __DEFAULT_FN_ATTRS512
9372#undef __DEFAULT_FN_ATTRS128
9373#undef __DEFAULT_FN_ATTRS
9374#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
9375#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
9376#undef __DEFAULT_FN_ATTRS_CONSTEXPR
9377
9378#endif /* __AVX512FINTRIN_H */
#define __L(__X)
#define __DEFAULT_FN_ATTRS
static __inline__ vector float vector float __b
Definition altivec.h:578
static __inline__ uint32_t volatile uint32_t * __p
Definition arm_acle.h:57
return __v
Definition arm_acle.h:88
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
Definition avx2intrin.h:30
#define __DEFAULT_FN_ATTRS512_CONSTEXPR
#define __DEFAULT_FN_ATTRS512
#define __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR _cvtmask16_u32(__mmask16 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _cvtu32_mask16(unsigned int __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi64(long long __d)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi64(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f32x4(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd_si512(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi64(__m512i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A)
Moves the least significant 32 bits of a vector of [16 x i32] to a 32-bit signed integer value.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_ps(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi64(__m512i __W)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(void *__P, __m512d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi32(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi32(int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8, int e9, int e10, int e11, int e12, int e13, int e14, int e15)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi64(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_si256(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movehdup_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5, float e6, float e7, float e8, float e9, float e10, float e11, float e12, float e13, float e14, float e15)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_pd(double __w)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
_MM_MANTISSA_NORM_ENUM
@ _MM_MANT_NORM_p5_1
@ _MM_MANT_NORM_p5_2
@ _MM_MANT_NORM_1_2
@ _MM_MANT_NORM_p75_1p5
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline __m128 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps512_ps128(__m512 __a)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i64x4(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5, double e6, double e7)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps512_ps256(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi8(char __w)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3, long long e4, long long e5, long long e6, long long e7)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline __m128i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_si128(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_int2mask(int __a)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask2int(__mmask16 __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m256d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd512_pd256(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_pd(__m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi64(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movedup_pd(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_pd(__m512d __a, __m512d __b)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtps_pd(__m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
_MM_CMPINT_ENUM
@ _MM_CMPINT_NE
@ _MM_CMPINT_NLT
@ _MM_CMPINT_LE
@ _MM_CMPINT_EQ
@ _MM_CMPINT_LT
@ _MM_CMPINT_UNUSED
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_knot(__mmask16 __M)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastq_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_pd(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
unsigned char __mmask8
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
_MM_TERNLOG_ENUM
A helper to represent the ternary logic operations among vector A, B and C.
@ _MM_TERNLOG_A
@ _MM_TERNLOG_B
@ _MM_TERNLOG_C
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi32(__m512i __W)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(void *__P, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_ps(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
_MM_MANTISSA_SIGN_ENUM
@ _MM_MANT_SIGN_zero
@ _MM_MANT_SIGN_src
@ _MM_MANT_SIGN_nan
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_ps(__m512 __a, __m512 __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi32(int e0, int e1, int e2, int e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_ps(float __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi32(__m512i __A, __m512i __B)
_MM_PERM_ENUM
@ _MM_PERM_BBCA
@ _MM_PERM_BCBD
@ _MM_PERM_DAAB
@ _MM_PERM_CBBD
@ _MM_PERM_DCCC
@ _MM_PERM_CDBB
@ _MM_PERM_DDDD
@ _MM_PERM_CCCC
@ _MM_PERM_CADA
@ _MM_PERM_BACD
@ _MM_PERM_CCAD
@ _MM_PERM_ABDB
@ _MM_PERM_BBBD
@ _MM_PERM_DCAB
@ _MM_PERM_BABC
@ _MM_PERM_AACD
@ _MM_PERM_BBAB
@ _MM_PERM_DCDB
@ _MM_PERM_BACC
@ _MM_PERM_ABDA
@ _MM_PERM_ACCC
@ _MM_PERM_ADAC
@ _MM_PERM_CCCD
@ _MM_PERM_CADD
@ _MM_PERM_ACCB
@ _MM_PERM_BBDB
@ _MM_PERM_ABBB
@ _MM_PERM_BACB
@ _MM_PERM_CDCA
@ _MM_PERM_ACBC
@ _MM_PERM_ADCB
@ _MM_PERM_BBBC
@ _MM_PERM_DBBA
@ _MM_PERM_BBBB
@ _MM_PERM_DDDB
@ _MM_PERM_CAAA
@ _MM_PERM_ADBB
@ _MM_PERM_ACDB
@ _MM_PERM_DCAD
@ _MM_PERM_DBBC
@ _MM_PERM_BAAB
@ _MM_PERM_BDDD
@ _MM_PERM_BBAD
@ _MM_PERM_DDBA
@ _MM_PERM_CDCD
@ _MM_PERM_CCCA
@ _MM_PERM_DBBB
@ _MM_PERM_DAAD
@ _MM_PERM_DCBA
@ _MM_PERM_CCBC
@ _MM_PERM_ADDD
@ _MM_PERM_DBAC
@ _MM_PERM_ABAB
@ _MM_PERM_CBDB
@ _MM_PERM_CDBC
@ _MM_PERM_AABC
@ _MM_PERM_DABD
@ _MM_PERM_CBBA
@ _MM_PERM_CBAA
@ _MM_PERM_BDDB
@ _MM_PERM_CABC
@ _MM_PERM_BDBD
@ _MM_PERM_BCAD
@ _MM_PERM_ACBA
@ _MM_PERM_ADBA
@ _MM_PERM_ADBC
@ _MM_PERM_DBCB
@ _MM_PERM_CBDC
@ _MM_PERM_CBAD
@ _MM_PERM_ABCC
@ _MM_PERM_AAAD
@ _MM_PERM_CBAC
@ _MM_PERM_CCDA
@ _MM_PERM_CDAC
@ _MM_PERM_BADD
@ _MM_PERM_DAAC
@ _MM_PERM_BCCC
@ _MM_PERM_DBBD
@ _MM_PERM_DDAC
@ _MM_PERM_DACD
@ _MM_PERM_BAAC
@ _MM_PERM_ACCA
@ _MM_PERM_ABDD
@ _MM_PERM_BBCC
@ _MM_PERM_DAAA
@ _MM_PERM_CAAB
@ _MM_PERM_BCDB
@ _MM_PERM_ACBB
@ _MM_PERM_CDAB
@ _MM_PERM_DBDB
@ _MM_PERM_AABB
@ _MM_PERM_DBDA
@ _MM_PERM_BCBA
@ _MM_PERM_CBAB
@ _MM_PERM_DCDC
@ _MM_PERM_BBCB
@ _MM_PERM_CDCB
@ _MM_PERM_AACA
@ _MM_PERM_ACBD
@ _MM_PERM_AAAB
@ _MM_PERM_DCCB
@ _MM_PERM_ADDB
@ _MM_PERM_AAAA
@ _MM_PERM_AACC
@ _MM_PERM_BDDC
@ _MM_PERM_CBBC
@ _MM_PERM_DDCC
@ _MM_PERM_CABD
@ _MM_PERM_AADC
@ _MM_PERM_BCBC
@ _MM_PERM_BCCA
@ _MM_PERM_CCBD
@ _MM_PERM_CBBB
@ _MM_PERM_CDBA
@ _MM_PERM_CACD
@ _MM_PERM_BDAD
@ _MM_PERM_ADCA
@ _MM_PERM_AAAC
@ _MM_PERM_BDDA
@ _MM_PERM_CCAC
@ _MM_PERM_ACDC
@ _MM_PERM_DBCA
@ _MM_PERM_DBAA
@ _MM_PERM_AABD
@ _MM_PERM_CDCC
@ _MM_PERM_DCAA
@ _MM_PERM_DDDC
@ _MM_PERM_CDDB
@ _MM_PERM_AABA
@ _MM_PERM_DDBB
@ _MM_PERM_CDDA
@ _MM_PERM_AADD
@ _MM_PERM_BADC
@ _MM_PERM_BDBA
@ _MM_PERM_DBDD
@ _MM_PERM_BDAC
@ _MM_PERM_DBDC
@ _MM_PERM_BBBA
@ _MM_PERM_DDBC
@ _MM_PERM_BAAA
@ _MM_PERM_BDCC
@ _MM_PERM_DDAB
@ _MM_PERM_BCCB
@ _MM_PERM_BCCD
@ _MM_PERM_ADBD
@ _MM_PERM_ADCC
@ _MM_PERM_CCBB
@ _MM_PERM_CDAA
@ _MM_PERM_BBDA
@ _MM_PERM_CACC
@ _MM_PERM_DCBB
@ _MM_PERM_DABA
@ _MM_PERM_BADB
@ _MM_PERM_ABCA
@ _MM_PERM_CBCC
@ _MM_PERM_ABAD
@ _MM_PERM_BDBC
@ _MM_PERM_DDDA
@ _MM_PERM_ADAB
@ _MM_PERM_CADB
@ _MM_PERM_ADAA
@ _MM_PERM_ACAC
@ _MM_PERM_DADD
@ _MM_PERM_BABD
@ _MM_PERM_ACCD
@ _MM_PERM_CCAA
@ _MM_PERM_AADA
@ _MM_PERM_BDCA
@ _MM_PERM_CDDD
@ _MM_PERM_ABBD
@ _MM_PERM_ACAA
@ _MM_PERM_ACDD
@ _MM_PERM_DABB
@ _MM_PERM_CCCB
@ _MM_PERM_AADB
@ _MM_PERM_DBAD
@ _MM_PERM_BBDD
@ _MM_PERM_BCDC
@ _MM_PERM_CABA
@ _MM_PERM_BBAA
@ _MM_PERM_ADAD
@ _MM_PERM_BADA
@ _MM_PERM_DCDA
@ _MM_PERM_ABBA
@ _MM_PERM_ACAB
@ _MM_PERM_CCDD
@ _MM_PERM_CADC
@ _MM_PERM_DDCB
@ _MM_PERM_BABB
@ _MM_PERM_CCDB
@ _MM_PERM_DDAD
@ _MM_PERM_DBCC
@ _MM_PERM_BCBB
@ _MM_PERM_ADDC
@ _MM_PERM_CCBA
@ _MM_PERM_ABCD
@ _MM_PERM_BCAB
@ _MM_PERM_DCBC
@ _MM_PERM_BCDD
@ _MM_PERM_CCDC
@ _MM_PERM_ABAC
@ _MM_PERM_CBCB
@ _MM_PERM_CCAB
@ _MM_PERM_DDCD
@ _MM_PERM_DACA
@ _MM_PERM_ACAD
@ _MM_PERM_BABA
@ _MM_PERM_CBCD
@ _MM_PERM_CAAD
@ _MM_PERM_DCDD
@ _MM_PERM_BDBB
@ _MM_PERM_BCAA
@ _MM_PERM_ABDC
@ _MM_PERM_BBCD
@ _MM_PERM_CAAC
@ _MM_PERM_BBAC
@ _MM_PERM_CBCA
@ _MM_PERM_DCAC
@ _MM_PERM_ABAA
@ _MM_PERM_CACB
@ _MM_PERM_BBDC
@ _MM_PERM_CDAD
@ _MM_PERM_ADCD
@ _MM_PERM_DADB
@ _MM_PERM_DBCD
@ _MM_PERM_DACC
@ _MM_PERM_DACB
@ _MM_PERM_DCBD
@ _MM_PERM_CACA
@ _MM_PERM_ABBC
@ _MM_PERM_DCCA
@ _MM_PERM_DABC
@ _MM_PERM_CBDD
@ _MM_PERM_DDBD
@ _MM_PERM_DDCA
@ _MM_PERM_BDCD
@ _MM_PERM_CDBD
@ _MM_PERM_ABCB
@ _MM_PERM_CDDC
@ _MM_PERM_AACB
@ _MM_PERM_DDAA
@ _MM_PERM_ADDA
@ _MM_PERM_DADA
@ _MM_PERM_BCDA
@ _MM_PERM_BDAB
@ _MM_PERM_BAAD
@ _MM_PERM_DBAB
@ _MM_PERM_DCCD
@ _MM_PERM_CABB
@ _MM_PERM_BDAA
@ _MM_PERM_BDCB
@ _MM_PERM_ACDA
@ _MM_PERM_DADC
@ _MM_PERM_CBDA
@ _MM_PERM_BCAC
@ _MM_PERM_BACA
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi32(int __s)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_pd(double e0, double e1, double e2, double e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m128d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd512_pd128(__m512d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
unsigned short __mmask16
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kmov(__mmask16 __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_si512(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps_si512(__m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_ps(float e0, float e1, float e2, float e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi16(short __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_moveldup_ps(__m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd_ps(__m512d __A)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition avxintrin.h:3620
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition avxintrin.h:4291
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition avxintrin.h:4279
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4303
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
Definition emmintrin.h:120
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
Definition emmintrin.h:199
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
Definition emmintrin.h:80
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3878
static __inline__ void int __a
Definition emmintrin.h:4077
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3493
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
Definition emmintrin.h:1887
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition emmintrin.h:1867
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
Definition emmintrin.h:159
static __inline__ void short __D
Definition immintrin.h:342
static __inline__ void const void * __src
__inline unsigned int unsigned int unsigned int * __P
Definition bmi2intrin.h:25
__inline unsigned int unsigned int __Y
Definition bmi2intrin.h:19
#define _MM_FROUND_FLOOR
Definition smmintrin.h:41
#define _MM_FROUND_CEIL
Definition smmintrin.h:42
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
Definition xmmintrin.h:2801
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:160
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
Definition xmmintrin.h:119
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:79
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:2018
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
Definition xmmintrin.h:200