clang 23.0.0git
avx512fintrin.h
Go to the documentation of this file.
1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
15
16typedef char __v64qi __attribute__((__vector_size__(64)));
17typedef short __v32hi __attribute__((__vector_size__(64)));
18typedef double __v8df __attribute__((__vector_size__(64)));
19typedef float __v16sf __attribute__((__vector_size__(64)));
20typedef long long __v8di __attribute__((__vector_size__(64)));
21typedef int __v16si __attribute__((__vector_size__(64)));
22
23/* Unsigned types */
24typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28
29/* We need an explicitly signed variant for char. Note that this shouldn't
30 * appear in the interface though. */
31typedef signed char __v64qs __attribute__((__vector_size__(64)));
32
33typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
34typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
35typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
36
37typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
38typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
39typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
40
41typedef unsigned char __mmask8;
42typedef unsigned short __mmask16;
43
44/* Rounding mode macros. */
45#define _MM_FROUND_TO_NEAREST_INT 0x00
46#define _MM_FROUND_TO_NEG_INF 0x01
47#define _MM_FROUND_TO_POS_INF 0x02
48#define _MM_FROUND_TO_ZERO 0x03
49#define _MM_FROUND_CUR_DIRECTION 0x04
50
51/* Constants for integer comparison predicates */
52typedef enum {
53 _MM_CMPINT_EQ, /* Equal */
54 _MM_CMPINT_LT, /* Less than */
55 _MM_CMPINT_LE, /* Less than or Equal */
57 _MM_CMPINT_NE, /* Not Equal */
58 _MM_CMPINT_NLT, /* Not Less than */
59#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
60 _MM_CMPINT_NLE /* Not Less than or Equal */
61#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
63
64typedef enum
65{
153
154typedef enum
155{
156 _MM_MANT_NORM_1_2, /* interval [1, 2) */
157 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
158 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
159 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
161
162typedef enum
163{
164 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
165 _MM_MANT_SIGN_zero, /* sign = 0 */
166 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
168
169/* Define the default attributes for the functions in this file. */
170#define __DEFAULT_FN_ATTRS512 \
171 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
172 __min_vector_width__(512)))
173#define __DEFAULT_FN_ATTRS128 \
174 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
175 __min_vector_width__(128)))
176#define __DEFAULT_FN_ATTRS \
177 __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
178
179#if defined(__cplusplus) && (__cplusplus >= 201103L)
180#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
181#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
182#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
183#else
184#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
185#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
186#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
187#endif
188
189/* Create vectors with repeated elements */
190
191static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
193 return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0};
194}
195
196#define _mm512_setzero_epi32 _mm512_setzero_si512
197
198static __inline__ __m512d __DEFAULT_FN_ATTRS512
200{
201 return (__m512d)__builtin_ia32_undef512();
202}
203
204static __inline__ __m512 __DEFAULT_FN_ATTRS512
206{
207 return (__m512)__builtin_ia32_undef512();
208}
209
210static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void) {
211 return (__m512)__builtin_ia32_undef512();
212}
213
214static __inline__ __m512i __DEFAULT_FN_ATTRS512
216{
217 return (__m512i)__builtin_ia32_undef512();
218}
219
220static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
222 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
224}
225
226static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
227_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) {
228 return (__m512i)__builtin_ia32_selectd_512(__M,
229 (__v16si) _mm512_broadcastd_epi32(__A),
230 (__v16si) __O);
231}
232
233static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
235 return (__m512i)__builtin_ia32_selectd_512(__M,
236 (__v16si) _mm512_broadcastd_epi32(__A),
237 (__v16si) _mm512_setzero_si512());
238}
239
240static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
242 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
243 0, 0, 0, 0, 0, 0, 0, 0);
244}
245
246static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
247_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) {
248 return (__m512i)__builtin_ia32_selectq_512(
249 __M, (__v8di)_mm512_broadcastq_epi64(__A), (__v8di)__O);
250}
251
252static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
254 return (__m512i)__builtin_ia32_selectq_512(__M,
255 (__v8di) _mm512_broadcastq_epi64(__A),
256 (__v8di) _mm512_setzero_si512());
257}
258
260 return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
261 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
262}
263
264#define _mm512_setzero _mm512_setzero_ps
265
266static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
268 return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
269}
270
271static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
273{
274 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
275 __w, __w, __w, __w, __w, __w, __w, __w };
276}
277
278static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
279_mm512_set1_pd(double __w)
280{
281 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
282}
283
284static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
286{
287 return __extension__ (__m512i)(__v64qi){
288 __w, __w, __w, __w, __w, __w, __w, __w,
289 __w, __w, __w, __w, __w, __w, __w, __w,
290 __w, __w, __w, __w, __w, __w, __w, __w,
291 __w, __w, __w, __w, __w, __w, __w, __w,
292 __w, __w, __w, __w, __w, __w, __w, __w,
293 __w, __w, __w, __w, __w, __w, __w, __w,
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w };
296}
297
298static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
300{
301 return __extension__ (__m512i)(__v32hi){
302 __w, __w, __w, __w, __w, __w, __w, __w,
303 __w, __w, __w, __w, __w, __w, __w, __w,
304 __w, __w, __w, __w, __w, __w, __w, __w,
305 __w, __w, __w, __w, __w, __w, __w, __w };
306}
307
308static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
310{
311 return __extension__ (__m512i)(__v16si){
312 __s, __s, __s, __s, __s, __s, __s, __s,
313 __s, __s, __s, __s, __s, __s, __s, __s };
314}
315
316static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
318 return (__m512i)__builtin_ia32_selectd_512(__M,
319 (__v16si)_mm512_set1_epi32(__A),
320 (__v16si)_mm512_setzero_si512());
321}
322
323static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
324_mm512_set1_epi64(long long __d)
325{
326 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
327}
328
329static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
331 return (__m512i)__builtin_ia32_selectq_512(__M,
332 (__v8di)_mm512_set1_epi64(__A),
333 (__v8di)_mm512_setzero_si512());
334}
335
336static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
338 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
339 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
340}
341
342static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
343_mm512_set4_epi32(int __A, int __B, int __C, int __D) {
344 return __extension__ (__m512i)(__v16si)
345 { __D, __C, __B, __A, __D, __C, __B, __A,
346 __D, __C, __B, __A, __D, __C, __B, __A };
347}
348
349static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
350_mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D) {
351 return __extension__ (__m512i) (__v8di)
352 { __D, __C, __B, __A, __D, __C, __B, __A };
353}
354
355static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
356_mm512_set4_pd(double __A, double __B, double __C, double __D) {
357 return __extension__ (__m512d)
358 { __D, __C, __B, __A, __D, __C, __B, __A };
359}
360
361static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
362_mm512_set4_ps(float __A, float __B, float __C, float __D) {
363 return __extension__ (__m512)
364 { __D, __C, __B, __A, __D, __C, __B, __A,
365 __D, __C, __B, __A, __D, __C, __B, __A };
366}
367
368static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
369_mm512_setr4_epi32(int e0, int e1, int e2, int e3) {
370 return _mm512_set4_epi32(e3, e2, e1, e0);
371}
372
373static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
374_mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3) {
375 return _mm512_set4_epi64(e3, e2, e1, e0);
376}
377
378static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
379_mm512_setr4_pd(double e0, double e1, double e2, double e3) {
380 return _mm512_set4_pd(e3, e2, e1, e0);
381}
382
383static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
384_mm512_setr4_ps(float e0, float e1, float e2, float e3) {
385 return _mm512_set4_ps(e3, e2, e1, e0);
386}
387
388static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
390 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
391 0, 0, 0, 0, 0, 0, 0, 0);
392}
393
394/* Cast between vector types */
395
396static __inline __m512d __DEFAULT_FN_ATTRS512
398{
399 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
400 1, 2, 3, 4, 5, 6, 7);
401}
402
403static __inline __m512 __DEFAULT_FN_ATTRS512
405{
406 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
407 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
408}
409
410static __inline __m128d __DEFAULT_FN_ATTRS512_CONSTEXPR
412{
413 return __builtin_shufflevector(__a, __a, 0, 1);
414}
415
416static __inline __m256d __DEFAULT_FN_ATTRS512_CONSTEXPR
418{
419 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
420}
421
422static __inline __m128 __DEFAULT_FN_ATTRS512_CONSTEXPR
424{
425 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
426}
427
428static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
430 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
431}
432
433static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
434_mm512_castpd_ps (__m512d __A)
435{
436 return (__m512) (__A);
437}
438
439static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
441{
442 return (__m512i) (__A);
443}
444
445static __inline__ __m512d __DEFAULT_FN_ATTRS512
447{
448 __m256d __B = __builtin_nondeterministic_value(__B);
449 return __builtin_shufflevector(
450 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
451 __B, 0, 1, 2, 3, 4, 5, 6, 7);
452}
453
454static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
456{
457 return (__m512d) (__A);
458}
459
460static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
462{
463 return (__m512i) (__A);
464}
465
466static __inline__ __m512 __DEFAULT_FN_ATTRS512
468{
469 __m256 __B = __builtin_nondeterministic_value(__B);
470 return __builtin_shufflevector(
471 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
472 __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
473}
474
475static __inline__ __m512i __DEFAULT_FN_ATTRS512
477{
478 __m256i __B = __builtin_nondeterministic_value(__B);
479 return __builtin_shufflevector(
480 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
481 __B, 0, 1, 2, 3, 4, 5, 6, 7);
482}
483
484static __inline__ __m512i __DEFAULT_FN_ATTRS512
486{
487 return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
488}
489
490static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
492{
493 return (__m512) (__A);
494}
495
496static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
498{
499 return (__m512d) (__A);
500}
501
502static __inline __m128i __DEFAULT_FN_ATTRS512_CONSTEXPR
504{
505 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
506}
507
508static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR
510 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
511}
512
515 return (__mmask16)__a;
516}
517
518static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
520 return (int)__a;
521}
522
523/// Constructs a 512-bit floating-point vector of [8 x double] from a
524/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
525/// contain the value of the source vector. The upper 384 bits are set
526/// to zero.
527///
528/// \headerfile <x86intrin.h>
529///
530/// This intrinsic has no corresponding instruction.
531///
532/// \param __a
533/// A 128-bit vector of [2 x double].
534/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
535/// contain the value of the parameter. The upper 384 bits are set to zero.
536static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
538 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
539}
540
541/// Constructs a 512-bit floating-point vector of [8 x double] from a
542/// 256-bit floating-point vector of [4 x double]. The lower 256 bits
543/// contain the value of the source vector. The upper 256 bits are set
544/// to zero.
545///
546/// \headerfile <x86intrin.h>
547///
548/// This intrinsic has no corresponding instruction.
549///
550/// \param __a
551/// A 256-bit vector of [4 x double].
552/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
553/// contain the value of the parameter. The upper 256 bits are set to zero.
554static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
556 return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
557}
558
559/// Constructs a 512-bit floating-point vector of [16 x float] from a
560/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
561/// the value of the source vector. The upper 384 bits are set to zero.
562///
563/// \headerfile <x86intrin.h>
564///
565/// This intrinsic has no corresponding instruction.
566///
567/// \param __a
568/// A 128-bit vector of [4 x float].
569/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
570/// contain the value of the parameter. The upper 384 bits are set to zero.
571static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
573 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
574}
575
576/// Constructs a 512-bit floating-point vector of [16 x float] from a
577/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
578/// the value of the source vector. The upper 256 bits are set to zero.
579///
580/// \headerfile <x86intrin.h>
581///
582/// This intrinsic has no corresponding instruction.
583///
584/// \param __a
585/// A 256-bit vector of [8 x float].
586/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
587/// contain the value of the parameter. The upper 256 bits are set to zero.
588static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
590 return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
591}
592
593/// Constructs a 512-bit integer vector from a 128-bit integer vector.
594/// The lower 128 bits contain the value of the source vector. The upper
595/// 384 bits are set to zero.
596///
597/// \headerfile <x86intrin.h>
598///
599/// This intrinsic has no corresponding instruction.
600///
601/// \param __a
602/// A 128-bit integer vector.
603/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
604/// the parameter. The upper 384 bits are set to zero.
605static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
607 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
608}
609
610/// Constructs a 512-bit integer vector from a 256-bit integer vector.
611/// The lower 256 bits contain the value of the source vector. The upper
612/// 256 bits are set to zero.
613///
614/// \headerfile <x86intrin.h>
615///
616/// This intrinsic has no corresponding instruction.
617///
618/// \param __a
619/// A 256-bit integer vector.
620/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
621/// the parameter. The upper 256 bits are set to zero.
622static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
624 return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
625}
626
627/* Bitwise operators */
628static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
629_mm512_and_epi32(__m512i __a, __m512i __b)
630{
631 return (__m512i)((__v16su)__a & (__v16su)__b);
632}
633
634static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
635_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) {
636 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
637 (__v16si) _mm512_and_epi32(__a, __b),
638 (__v16si) __src);
639}
640
641static __inline__ __m512i __DEFAULT_FN_ATTRS512
643{
645 __k, __a, __b);
646}
647
648static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
649_mm512_and_epi64(__m512i __a, __m512i __b)
650{
651 return (__m512i)((__v8du)__a & (__v8du)__b);
652}
653
654static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
655_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) {
656 return (__m512i)__builtin_ia32_selectq_512(
657 (__mmask8)__k, (__v8di)_mm512_and_epi64(__a, __b), (__v8di)__src);
658}
659
660static __inline__ __m512i __DEFAULT_FN_ATTRS512
662{
664 __k, __a, __b);
665}
666
667static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
668_mm512_andnot_si512 (__m512i __A, __m512i __B)
669{
670 return (__m512i)(~(__v8du)__A & (__v8du)__B);
671}
672
673static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
674_mm512_andnot_epi32 (__m512i __A, __m512i __B)
675{
676 return (__m512i)(~(__v16su)__A & (__v16su)__B);
677}
678
679static __inline__ __m512i __DEFAULT_FN_ATTRS512
680_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
681{
682 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
683 (__v16si)_mm512_andnot_epi32(__A, __B),
684 (__v16si)__W);
685}
686
687static __inline__ __m512i __DEFAULT_FN_ATTRS512
688_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
689{
691 __U, __A, __B);
692}
693
694static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
695_mm512_andnot_epi64(__m512i __A, __m512i __B)
696{
697 return (__m512i)(~(__v8du)__A & (__v8du)__B);
698}
699
700static __inline__ __m512i __DEFAULT_FN_ATTRS512
701_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
702{
703 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
704 (__v8di)_mm512_andnot_epi64(__A, __B),
705 (__v8di)__W);
706}
707
708static __inline__ __m512i __DEFAULT_FN_ATTRS512
709_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
710{
712 __U, __A, __B);
713}
714
715static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
716_mm512_or_epi32(__m512i __a, __m512i __b)
717{
718 return (__m512i)((__v16su)__a | (__v16su)__b);
719}
720
721static __inline__ __m512i __DEFAULT_FN_ATTRS512
722_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
723{
724 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
725 (__v16si)_mm512_or_epi32(__a, __b),
726 (__v16si)__src);
727}
728
729static __inline__ __m512i __DEFAULT_FN_ATTRS512
731{
732 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
733}
734
735static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
736_mm512_or_epi64(__m512i __a, __m512i __b)
737{
738 return (__m512i)((__v8du)__a | (__v8du)__b);
739}
740
741static __inline__ __m512i __DEFAULT_FN_ATTRS512
742_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
743{
744 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
745 (__v8di)_mm512_or_epi64(__a, __b),
746 (__v8di)__src);
747}
748
749static __inline__ __m512i __DEFAULT_FN_ATTRS512
750_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
751{
752 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
753}
754
755static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
756_mm512_xor_epi32(__m512i __a, __m512i __b)
757{
758 return (__m512i)((__v16su)__a ^ (__v16su)__b);
759}
760
761static __inline__ __m512i __DEFAULT_FN_ATTRS512
762_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
763{
764 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
765 (__v16si)_mm512_xor_epi32(__a, __b),
766 (__v16si)__src);
767}
768
769static __inline__ __m512i __DEFAULT_FN_ATTRS512
771{
772 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
773}
774
775static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
776_mm512_xor_epi64(__m512i __a, __m512i __b)
777{
778 return (__m512i)((__v8du)__a ^ (__v8du)__b);
779}
780
781static __inline__ __m512i __DEFAULT_FN_ATTRS512
782_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
783{
784 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
785 (__v8di)_mm512_xor_epi64(__a, __b),
786 (__v8di)__src);
787}
788
789static __inline__ __m512i __DEFAULT_FN_ATTRS512
791{
792 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
793}
794
795static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
796_mm512_and_si512(__m512i __a, __m512i __b)
797{
798 return (__m512i)((__v8du)__a & (__v8du)__b);
799}
800
801static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
802_mm512_or_si512(__m512i __a, __m512i __b)
803{
804 return (__m512i)((__v8du)__a | (__v8du)__b);
805}
806
807static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
808_mm512_xor_si512(__m512i __a, __m512i __b)
809{
810 return (__m512i)((__v8du)__a ^ (__v8du)__b);
811}
812
813/* Arithmetic */
814
815static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
816_mm512_add_pd(__m512d __a, __m512d __b) {
817 return (__m512d)((__v8df)__a + (__v8df)__b);
818}
819
820static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
821_mm512_add_ps(__m512 __a, __m512 __b) {
822 return (__m512)((__v16sf)__a + (__v16sf)__b);
823}
824
825static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
826_mm512_mul_pd(__m512d __a, __m512d __b) {
827 return (__m512d)((__v8df)__a * (__v8df)__b);
828}
829
830static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
831_mm512_mul_ps(__m512 __a, __m512 __b) {
832 return (__m512)((__v16sf)__a * (__v16sf)__b);
833}
834
835static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
836_mm512_sub_pd(__m512d __a, __m512d __b) {
837 return (__m512d)((__v8df)__a - (__v8df)__b);
838}
839
840static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
841_mm512_sub_ps(__m512 __a, __m512 __b) {
842 return (__m512)((__v16sf)__a - (__v16sf)__b);
843}
844
845static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
846_mm512_add_epi64(__m512i __A, __m512i __B) {
847 return (__m512i) ((__v8du) __A + (__v8du) __B);
848}
849
850static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
851_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
852 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
853 (__v8di)_mm512_add_epi64(__A, __B),
854 (__v8di)__W);
855}
856
857static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
858_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
859 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
860 (__v8di)_mm512_add_epi64(__A, __B),
861 (__v8di)_mm512_setzero_si512());
862}
863
864static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
865_mm512_sub_epi64(__m512i __A, __m512i __B) {
866 return (__m512i) ((__v8du) __A - (__v8du) __B);
867}
868
869static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
870_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
871 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
872 (__v8di)_mm512_sub_epi64(__A, __B),
873 (__v8di)__W);
874}
875
876static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
877_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
878 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
879 (__v8di)_mm512_sub_epi64(__A, __B),
880 (__v8di)_mm512_setzero_si512());
881}
882
883static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
884_mm512_add_epi32(__m512i __A, __m512i __B) {
885 return (__m512i) ((__v16su) __A + (__v16su) __B);
886}
887
888static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
889_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
890 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
891 (__v16si)_mm512_add_epi32(__A, __B),
892 (__v16si)__W);
893}
894
895static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
896_mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
897 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
898 (__v16si)_mm512_add_epi32(__A, __B),
899 (__v16si)_mm512_setzero_si512());
900}
901
902static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
903_mm512_sub_epi32(__m512i __A, __m512i __B) {
904 return (__m512i) ((__v16su) __A - (__v16su) __B);
905}
906
907static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
908_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
909 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
910 (__v16si)_mm512_sub_epi32(__A, __B),
911 (__v16si)__W);
912}
913
914static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
915_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
916 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
917 (__v16si)_mm512_sub_epi32(__A, __B),
918 (__v16si)_mm512_setzero_si512());
919}
920
921#define _mm512_max_round_pd(A, B, R) \
922 ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
923 (__v8df)(__m512d)(B), (int)(R)))
924
925#define _mm512_mask_max_round_pd(W, U, A, B, R) \
926 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
927 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
928 (__v8df)(W)))
929
930#define _mm512_maskz_max_round_pd(U, A, B, R) \
931 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
932 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
933 (__v8df)_mm512_setzero_pd()))
934
935static __inline__ __m512d
937 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
939}
940
941static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
942_mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
943 return (__m512d)__builtin_ia32_selectpd_512(__U,
944 (__v8df)_mm512_max_pd(__A, __B),
945 (__v8df)__W);
946}
947
948static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
949_mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B) {
950 return (__m512d)__builtin_ia32_selectpd_512(__U,
951 (__v8df)_mm512_max_pd(__A, __B),
952 (__v8df)_mm512_setzero_pd());
953}
954
955#define _mm512_max_round_ps(A, B, R) \
956 ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
957 (__v16sf)(__m512)(B), (int)(R)))
958
959#define _mm512_mask_max_round_ps(W, U, A, B, R) \
960 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
961 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
962 (__v16sf)(W)))
963
964#define _mm512_maskz_max_round_ps(U, A, B, R) \
965 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
966 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
967 (__v16sf)_mm512_setzero_ps()))
968
969static __inline__ __m512
971 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
973}
974
975static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
976_mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
977 return (__m512)__builtin_ia32_selectps_512(__U,
978 (__v16sf)_mm512_max_ps(__A, __B),
979 (__v16sf)__W);
980}
981
982static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
983_mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B) {
984 return (__m512)__builtin_ia32_selectps_512(__U,
985 (__v16sf)_mm512_max_ps(__A, __B),
986 (__v16sf)_mm512_setzero_ps());
987}
988
989static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
990_mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
991 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
992 (__v4sf) __B,
993 (__v4sf) __W,
994 (__mmask8) __U,
996}
997
998static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
999_mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B) {
1000 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1001 (__v4sf) __B,
1002 (__v4sf) _mm_setzero_ps (),
1003 (__mmask8) __U,
1005}
1006
1007#define _mm_max_round_ss(A, B, R) \
1008 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1009 (__v4sf)(__m128)(B), \
1010 (__v4sf)_mm_setzero_ps(), \
1011 (__mmask8)-1, (int)(R)))
1012
1013#define _mm_mask_max_round_ss(W, U, A, B, R) \
1014 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1015 (__v4sf)(__m128)(B), \
1016 (__v4sf)(__m128)(W), (__mmask8)(U), \
1017 (int)(R)))
1018
1019#define _mm_maskz_max_round_ss(U, A, B, R) \
1020 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1021 (__v4sf)(__m128)(B), \
1022 (__v4sf)_mm_setzero_ps(), \
1023 (__mmask8)(U), (int)(R)))
1024
1025static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1026_mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1027 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1028 (__v2df) __B,
1029 (__v2df) __W,
1030 (__mmask8) __U,
1032}
1033
1034static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1035_mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B) {
1036 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1037 (__v2df) __B,
1038 (__v2df) _mm_setzero_pd (),
1039 (__mmask8) __U,
1041}
1042
1043#define _mm_max_round_sd(A, B, R) \
1044 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1045 (__v2df)(__m128d)(B), \
1046 (__v2df)_mm_setzero_pd(), \
1047 (__mmask8)-1, (int)(R)))
1048
1049#define _mm_mask_max_round_sd(W, U, A, B, R) \
1050 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1051 (__v2df)(__m128d)(B), \
1052 (__v2df)(__m128d)(W), \
1053 (__mmask8)(U), (int)(R)))
1054
1055#define _mm_maskz_max_round_sd(U, A, B, R) \
1056 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1057 (__v2df)(__m128d)(B), \
1058 (__v2df)_mm_setzero_pd(), \
1059 (__mmask8)(U), (int)(R)))
1060
1061static __inline __m512i
1063 return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1064}
1065
1066static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1067_mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1068 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1069 (__v16si)_mm512_max_epi32(__A, __B),
1070 (__v16si)__W);
1071}
1072
1073static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1074_mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1075 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1076 (__v16si)_mm512_max_epi32(__A, __B),
1077 (__v16si)_mm512_setzero_si512());
1078}
1079
1080static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1081_mm512_max_epu32(__m512i __A, __m512i __B) {
1082 return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1083}
1084
1085static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1086_mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1087 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1088 (__v16si)_mm512_max_epu32(__A, __B),
1089 (__v16si)__W);
1090}
1091
1092static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1093_mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
1094 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1095 (__v16si)_mm512_max_epu32(__A, __B),
1096 (__v16si)_mm512_setzero_si512());
1097}
1098
1099static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1100_mm512_max_epi64(__m512i __A, __m512i __B) {
1101 return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1102}
1103
1104static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1105_mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1106 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1107 (__v8di)_mm512_max_epi64(__A, __B),
1108 (__v8di)__W);
1109}
1110
1111static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1112_mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
1113 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1114 (__v8di)_mm512_max_epi64(__A, __B),
1115 (__v8di)_mm512_setzero_si512());
1116}
1117
1118static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1119_mm512_max_epu64(__m512i __A, __m512i __B) {
1120 return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1121}
1122
1123static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1124_mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1125 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1126 (__v8di)_mm512_max_epu64(__A, __B),
1127 (__v8di)__W);
1128}
1129
1130static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1131_mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
1132 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1133 (__v8di)_mm512_max_epu64(__A, __B),
1134 (__v8di)_mm512_setzero_si512());
1135}
1136
1137#define _mm512_min_round_pd(A, B, R) \
1138 ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1139 (__v8df)(__m512d)(B), (int)(R)))
1140
1141#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1142 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1143 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1144 (__v8df)(W)))
1145
1146#define _mm512_maskz_min_round_pd(U, A, B, R) \
1147 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1148 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1149 (__v8df)_mm512_setzero_pd()))
1150
1151static __inline__ __m512d
1153 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1155}
1156
1157static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1158_mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1159 return (__m512d)__builtin_ia32_selectpd_512(__U,
1160 (__v8df)_mm512_min_pd(__A, __B),
1161 (__v8df)__W);
1162}
1163
1164static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1165_mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1166 return (__m512d)__builtin_ia32_selectpd_512(__U,
1167 (__v8df)_mm512_min_pd(__A, __B),
1168 (__v8df)_mm512_setzero_pd());
1169}
1170
1171#define _mm512_min_round_ps(A, B, R) \
1172 ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1173 (__v16sf)(__m512)(B), (int)(R)))
1174
1175#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1176 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1177 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1178 (__v16sf)(W)))
1179
1180#define _mm512_maskz_min_round_ps(U, A, B, R) \
1181 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1182 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1183 (__v16sf)_mm512_setzero_ps()))
1184
1185static __inline__ __m512
1187 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1189}
1190
1191static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1192_mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1193 return (__m512)__builtin_ia32_selectps_512(__U,
1194 (__v16sf)_mm512_min_ps(__A, __B),
1195 (__v16sf)__W);
1196}
1197
1198static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1199_mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1200 return (__m512)__builtin_ia32_selectps_512(__U,
1201 (__v16sf)_mm512_min_ps(__A, __B),
1202 (__v16sf)_mm512_setzero_ps());
1203}
1204
1205static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1206_mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1207 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1208 (__v4sf) __B,
1209 (__v4sf) __W,
1210 (__mmask8) __U,
1212}
1213
1214static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1215_mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B) {
1216 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1217 (__v4sf) __B,
1218 (__v4sf) _mm_setzero_ps (),
1219 (__mmask8) __U,
1221}
1222
1223#define _mm_min_round_ss(A, B, R) \
1224 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1225 (__v4sf)(__m128)(B), \
1226 (__v4sf)_mm_setzero_ps(), \
1227 (__mmask8)-1, (int)(R)))
1228
1229#define _mm_mask_min_round_ss(W, U, A, B, R) \
1230 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1231 (__v4sf)(__m128)(B), \
1232 (__v4sf)(__m128)(W), (__mmask8)(U), \
1233 (int)(R)))
1234
1235#define _mm_maskz_min_round_ss(U, A, B, R) \
1236 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1237 (__v4sf)(__m128)(B), \
1238 (__v4sf)_mm_setzero_ps(), \
1239 (__mmask8)(U), (int)(R)))
1240
1241static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1242_mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1243 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1244 (__v2df) __B,
1245 (__v2df) __W,
1246 (__mmask8) __U,
1248}
1249
1250static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1251_mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B) {
1252 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1253 (__v2df) __B,
1254 (__v2df) _mm_setzero_pd (),
1255 (__mmask8) __U,
1257}
1258
1259#define _mm_min_round_sd(A, B, R) \
1260 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1261 (__v2df)(__m128d)(B), \
1262 (__v2df)_mm_setzero_pd(), \
1263 (__mmask8)-1, (int)(R)))
1264
1265#define _mm_mask_min_round_sd(W, U, A, B, R) \
1266 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1267 (__v2df)(__m128d)(B), \
1268 (__v2df)(__m128d)(W), \
1269 (__mmask8)(U), (int)(R)))
1270
1271#define _mm_maskz_min_round_sd(U, A, B, R) \
1272 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1273 (__v2df)(__m128d)(B), \
1274 (__v2df)_mm_setzero_pd(), \
1275 (__mmask8)(U), (int)(R)))
1276
1277static __inline __m512i
1279 return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1280}
1281
1282static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1283_mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1284 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1285 (__v16si)_mm512_min_epi32(__A, __B),
1286 (__v16si)__W);
1287}
1288
1289static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1290_mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1291 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1292 (__v16si)_mm512_min_epi32(__A, __B),
1293 (__v16si)_mm512_setzero_si512());
1294}
1295
1296static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1297_mm512_min_epu32(__m512i __A, __m512i __B) {
1298 return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1299}
1300
1301static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1302_mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1303 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1304 (__v16si)_mm512_min_epu32(__A, __B),
1305 (__v16si)__W);
1306}
1307
1308static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1309_mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
1310 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1311 (__v16si)_mm512_min_epu32(__A, __B),
1312 (__v16si)_mm512_setzero_si512());
1313}
1314
1315static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1316_mm512_min_epi64(__m512i __A, __m512i __B) {
1317 return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1318}
1319
1320static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1321_mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1322 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1323 (__v8di)_mm512_min_epi64(__A, __B),
1324 (__v8di)__W);
1325}
1326
1327static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1328_mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
1329 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1330 (__v8di)_mm512_min_epi64(__A, __B),
1331 (__v8di)_mm512_setzero_si512());
1332}
1333
1334static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1335_mm512_min_epu64(__m512i __A, __m512i __B) {
1336 return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1337}
1338
1339static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1340_mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1341 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1342 (__v8di)_mm512_min_epu64(__A, __B),
1343 (__v8di)__W);
1344}
1345
1346static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1347_mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
1348 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1349 (__v8di)_mm512_min_epu64(__A, __B),
1350 (__v8di)_mm512_setzero_si512());
1351}
1352
1353static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1354_mm512_mul_epi32(__m512i __X, __m512i __Y) {
1355 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1356}
1357
1358static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1359_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
1360 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1361 (__v8di)_mm512_mul_epi32(__X, __Y),
1362 (__v8di)__W);
1363}
1364
1365static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1366_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) {
1367 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1368 (__v8di)_mm512_mul_epi32(__X, __Y),
1369 (__v8di)_mm512_setzero_si512 ());
1370}
1371
1372static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1373_mm512_mul_epu32(__m512i __X, __m512i __Y) {
1374 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1375}
1376
1377static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1378_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
1379 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1380 (__v8di)_mm512_mul_epu32(__X, __Y),
1381 (__v8di)__W);
1382}
1383
1384static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1385_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) {
1386 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1387 (__v8di)_mm512_mul_epu32(__X, __Y),
1388 (__v8di)_mm512_setzero_si512 ());
1389}
1390
1391static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1392_mm512_mullo_epi32(__m512i __A, __m512i __B) {
1393 return (__m512i) ((__v16su) __A * (__v16su) __B);
1394}
1395
1396static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1397_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1398 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1399 (__v16si)_mm512_mullo_epi32(__A, __B),
1400 (__v16si)_mm512_setzero_si512());
1401}
1402
1403static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1404_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1405 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1406 (__v16si)_mm512_mullo_epi32(__A, __B),
1407 (__v16si)__W);
1408}
1409
1410static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1411_mm512_mullox_epi64(__m512i __A, __m512i __B) {
1412 return (__m512i) ((__v8du) __A * (__v8du) __B);
1413}
1414
1415static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1416_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1417 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1418 (__v8di)_mm512_mullox_epi64(__A, __B),
1419 (__v8di)__W);
1420}
1421
1422#define _mm512_sqrt_round_pd(A, R) \
1423 ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1424
1425#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1426 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1427 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1428 (__v8df)(__m512d)(W)))
1429
1430#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1431 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1432 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1433 (__v8df)_mm512_setzero_pd()))
1434
1435static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A) {
1436 return (__m512d)__builtin_elementwise_sqrt((__v8df)__A);
1437}
1438
1439static __inline__ __m512d __DEFAULT_FN_ATTRS512
1440_mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A) {
1441 return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A),
1442 (__v8df)__W);
1443}
1444
1445static __inline__ __m512d __DEFAULT_FN_ATTRS512
1447 return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A),
1448 (__v8df)_mm512_setzero_pd());
1449}
1450
1451#define _mm512_sqrt_round_ps(A, R) \
1452 ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1453
1454#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1455 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1456 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1457 (__v16sf)(__m512)(W)))
1458
1459#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1460 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1461 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1462 (__v16sf)_mm512_setzero_ps()))
1463
1464static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A) {
1465 return (__m512)__builtin_elementwise_sqrt((__v16sf)__A);
1466}
1467
1468static __inline__ __m512 __DEFAULT_FN_ATTRS512
1469_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) {
1470 return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A),
1471 (__v16sf)__W);
1472}
1473
1474static __inline__ __m512 __DEFAULT_FN_ATTRS512
1476 return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A),
1477 (__v16sf)_mm512_setzero_ps());
1478}
1479
1480static __inline__ __m512d __DEFAULT_FN_ATTRS512
1482{
1483 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1484 (__v8df)
1486 (__mmask8) -1);}
1487
1488static __inline__ __m512d __DEFAULT_FN_ATTRS512
1489_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1490{
1491 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1492 (__v8df) __W,
1493 (__mmask8) __U);
1494}
1495
1496static __inline__ __m512d __DEFAULT_FN_ATTRS512
1498{
1499 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1500 (__v8df)
1502 (__mmask8) __U);
1503}
1504
1505static __inline__ __m512 __DEFAULT_FN_ATTRS512
1507{
1508 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1509 (__v16sf)
1511 (__mmask16) -1);
1512}
1513
1514static __inline__ __m512 __DEFAULT_FN_ATTRS512
1515_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1516{
1517 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1518 (__v16sf) __W,
1519 (__mmask16) __U);
1520}
1521
1522static __inline__ __m512 __DEFAULT_FN_ATTRS512
1524{
1525 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1526 (__v16sf)
1528 (__mmask16) __U);
1529}
1530
1531static __inline__ __m128 __DEFAULT_FN_ATTRS128
1532_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1533{
1534 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1535 (__v4sf) __B,
1536 (__v4sf)
1537 _mm_setzero_ps (),
1538 (__mmask8) -1);
1539}
1540
1541static __inline__ __m128 __DEFAULT_FN_ATTRS128
1542_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1543{
1544 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1545 (__v4sf) __B,
1546 (__v4sf) __W,
1547 (__mmask8) __U);
1548}
1549
1550static __inline__ __m128 __DEFAULT_FN_ATTRS128
1551_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1552{
1553 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1554 (__v4sf) __B,
1555 (__v4sf) _mm_setzero_ps (),
1556 (__mmask8) __U);
1557}
1558
1559static __inline__ __m128d __DEFAULT_FN_ATTRS128
1560_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1561{
1562 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1563 (__v2df) __B,
1564 (__v2df)
1565 _mm_setzero_pd (),
1566 (__mmask8) -1);
1567}
1568
1569static __inline__ __m128d __DEFAULT_FN_ATTRS128
1570_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1571{
1572 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1573 (__v2df) __B,
1574 (__v2df) __W,
1575 (__mmask8) __U);
1576}
1577
1578static __inline__ __m128d __DEFAULT_FN_ATTRS128
1579_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1580{
1581 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1582 (__v2df) __B,
1583 (__v2df) _mm_setzero_pd (),
1584 (__mmask8) __U);
1585}
1586
1587static __inline__ __m512d __DEFAULT_FN_ATTRS512
1589{
1590 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1591 (__v8df)
1593 (__mmask8) -1);
1594}
1595
1596static __inline__ __m512d __DEFAULT_FN_ATTRS512
1597_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1598{
1599 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1600 (__v8df) __W,
1601 (__mmask8) __U);
1602}
1603
1604static __inline__ __m512d __DEFAULT_FN_ATTRS512
1606{
1607 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1608 (__v8df)
1610 (__mmask8) __U);
1611}
1612
1613static __inline__ __m512 __DEFAULT_FN_ATTRS512
1615{
1616 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1617 (__v16sf)
1619 (__mmask16) -1);
1620}
1621
1622static __inline__ __m512 __DEFAULT_FN_ATTRS512
1623_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1624{
1625 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1626 (__v16sf) __W,
1627 (__mmask16) __U);
1628}
1629
1630static __inline__ __m512 __DEFAULT_FN_ATTRS512
1632{
1633 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1634 (__v16sf)
1636 (__mmask16) __U);
1637}
1638
1639static __inline__ __m128 __DEFAULT_FN_ATTRS128
1640_mm_rcp14_ss(__m128 __A, __m128 __B)
1641{
1642 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1643 (__v4sf) __B,
1644 (__v4sf)
1645 _mm_setzero_ps (),
1646 (__mmask8) -1);
1647}
1648
1649static __inline__ __m128 __DEFAULT_FN_ATTRS128
1650_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1651{
1652 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1653 (__v4sf) __B,
1654 (__v4sf) __W,
1655 (__mmask8) __U);
1656}
1657
1658static __inline__ __m128 __DEFAULT_FN_ATTRS128
1659_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1660{
1661 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1662 (__v4sf) __B,
1663 (__v4sf) _mm_setzero_ps (),
1664 (__mmask8) __U);
1665}
1666
1667static __inline__ __m128d __DEFAULT_FN_ATTRS128
1668_mm_rcp14_sd(__m128d __A, __m128d __B)
1669{
1670 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1671 (__v2df) __B,
1672 (__v2df)
1673 _mm_setzero_pd (),
1674 (__mmask8) -1);
1675}
1676
1677static __inline__ __m128d __DEFAULT_FN_ATTRS128
1678_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1679{
1680 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1681 (__v2df) __B,
1682 (__v2df) __W,
1683 (__mmask8) __U);
1684}
1685
1686static __inline__ __m128d __DEFAULT_FN_ATTRS128
1687_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1688{
1689 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1690 (__v2df) __B,
1691 (__v2df) _mm_setzero_pd (),
1692 (__mmask8) __U);
1693}
1694
1695static __inline __m512 __DEFAULT_FN_ATTRS512
1697{
1698 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1700 (__v16sf) __A, (unsigned short)-1,
1702}
1703
1704static __inline__ __m512 __DEFAULT_FN_ATTRS512
1705_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1706{
1707 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1709 (__v16sf) __W, __U,
1711}
1712
1713static __inline __m512d __DEFAULT_FN_ATTRS512
1715{
1716 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1718 (__v8df) __A, (unsigned char)-1,
1720}
1721
1722static __inline__ __m512d __DEFAULT_FN_ATTRS512
1723_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1724{
1725 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1727 (__v8df) __W, __U,
1729}
1730
1731static __inline__ __m512 __DEFAULT_FN_ATTRS512
1732_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1733{
1734 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1736 (__v16sf) __W, __U,
1738}
1739
1740static __inline __m512 __DEFAULT_FN_ATTRS512
1742{
1743 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1745 (__v16sf) __A, (unsigned short)-1,
1747}
1748
1749static __inline __m512d __DEFAULT_FN_ATTRS512
1750_mm512_ceil_pd(__m512d __A)
1751{
1752 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1754 (__v8df) __A, (unsigned char)-1,
1756}
1757
1758static __inline__ __m512d __DEFAULT_FN_ATTRS512
1759_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1760{
1761 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1763 (__v8df) __W, __U,
1765}
1766
1767static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1768_mm512_abs_epi64(__m512i __A) {
1769 return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1770}
1771
1772static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1773_mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
1774 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1775 (__v8di)_mm512_abs_epi64(__A),
1776 (__v8di)__W);
1777}
1778
1779static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1781 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1782 (__v8di)_mm512_abs_epi64(__A),
1783 (__v8di)_mm512_setzero_si512());
1784}
1785
1786static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1787_mm512_abs_epi32(__m512i __A) {
1788 return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1789}
1790
1791static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1792_mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
1793 return (__m512i)__builtin_ia32_selectd_512(__U,
1794 (__v16si)_mm512_abs_epi32(__A),
1795 (__v16si)__W);
1796}
1797
1798static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1800 return (__m512i)__builtin_ia32_selectd_512(__U,
1801 (__v16si)_mm512_abs_epi32(__A),
1802 (__v16si)_mm512_setzero_si512());
1803}
1804
1805static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1806_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1807 __A = _mm_add_ss(__A, __B);
1808 return __builtin_ia32_selectss_128(__U, __A, __W);
1809}
1810
1811static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1812_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
1813 __A = _mm_add_ss(__A, __B);
1814 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1815}
1816
1817#define _mm_add_round_ss(A, B, R) \
1818 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1819 (__v4sf)(__m128)(B), \
1820 (__v4sf)_mm_setzero_ps(), \
1821 (__mmask8)-1, (int)(R)))
1822
1823#define _mm_mask_add_round_ss(W, U, A, B, R) \
1824 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1825 (__v4sf)(__m128)(B), \
1826 (__v4sf)(__m128)(W), (__mmask8)(U), \
1827 (int)(R)))
1828
1829#define _mm_maskz_add_round_ss(U, A, B, R) \
1830 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1831 (__v4sf)(__m128)(B), \
1832 (__v4sf)_mm_setzero_ps(), \
1833 (__mmask8)(U), (int)(R)))
1834
1835static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1836_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1837 __A = _mm_add_sd(__A, __B);
1838 return __builtin_ia32_selectsd_128(__U, __A, __W);
1839}
1840
1841static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1842_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
1843 __A = _mm_add_sd(__A, __B);
1844 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1845}
1846#define _mm_add_round_sd(A, B, R) \
1847 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1848 (__v2df)(__m128d)(B), \
1849 (__v2df)_mm_setzero_pd(), \
1850 (__mmask8)-1, (int)(R)))
1851
1852#define _mm_mask_add_round_sd(W, U, A, B, R) \
1853 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1854 (__v2df)(__m128d)(B), \
1855 (__v2df)(__m128d)(W), \
1856 (__mmask8)(U), (int)(R)))
1857
1858#define _mm_maskz_add_round_sd(U, A, B, R) \
1859 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1860 (__v2df)(__m128d)(B), \
1861 (__v2df)_mm_setzero_pd(), \
1862 (__mmask8)(U), (int)(R)))
1863
1864static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1865_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1866 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1867 (__v8df)_mm512_add_pd(__A, __B),
1868 (__v8df)__W);
1869}
1870
1871static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1872_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1873 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1874 (__v8df)_mm512_add_pd(__A, __B),
1875 (__v8df)_mm512_setzero_pd());
1876}
1877
1878static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1879_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1880 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1881 (__v16sf)_mm512_add_ps(__A, __B),
1882 (__v16sf)__W);
1883}
1884
1885static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1886_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1887 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1888 (__v16sf)_mm512_add_ps(__A, __B),
1889 (__v16sf)_mm512_setzero_ps());
1890}
1891
1892#define _mm512_add_round_pd(A, B, R) \
1893 ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1894 (__v8df)(__m512d)(B), (int)(R)))
1895
1896#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1897 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1898 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1899 (__v8df)(__m512d)(W)))
1900
1901#define _mm512_maskz_add_round_pd(U, A, B, R) \
1902 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1903 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1904 (__v8df)_mm512_setzero_pd()))
1905
1906#define _mm512_add_round_ps(A, B, R) \
1907 ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1908 (__v16sf)(__m512)(B), (int)(R)))
1909
1910#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1911 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1912 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1913 (__v16sf)(__m512)(W)))
1914
1915#define _mm512_maskz_add_round_ps(U, A, B, R) \
1916 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1917 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1918 (__v16sf)_mm512_setzero_ps()))
1919
1920static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1921_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1922 __A = _mm_sub_ss(__A, __B);
1923 return __builtin_ia32_selectss_128(__U, __A, __W);
1924}
1925
1926static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1927_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
1928 __A = _mm_sub_ss(__A, __B);
1929 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1930}
1931#define _mm_sub_round_ss(A, B, R) \
1932 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1933 (__v4sf)(__m128)(B), \
1934 (__v4sf)_mm_setzero_ps(), \
1935 (__mmask8)-1, (int)(R)))
1936
1937#define _mm_mask_sub_round_ss(W, U, A, B, R) \
1938 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1939 (__v4sf)(__m128)(B), \
1940 (__v4sf)(__m128)(W), (__mmask8)(U), \
1941 (int)(R)))
1942
1943#define _mm_maskz_sub_round_ss(U, A, B, R) \
1944 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1945 (__v4sf)(__m128)(B), \
1946 (__v4sf)_mm_setzero_ps(), \
1947 (__mmask8)(U), (int)(R)))
1948
1949static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1950_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1951 __A = _mm_sub_sd(__A, __B);
1952 return __builtin_ia32_selectsd_128(__U, __A, __W);
1953}
1954
1955static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1956_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
1957 __A = _mm_sub_sd(__A, __B);
1958 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1959}
1960
1961#define _mm_sub_round_sd(A, B, R) \
1962 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1963 (__v2df)(__m128d)(B), \
1964 (__v2df)_mm_setzero_pd(), \
1965 (__mmask8)-1, (int)(R)))
1966
1967#define _mm_mask_sub_round_sd(W, U, A, B, R) \
1968 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1969 (__v2df)(__m128d)(B), \
1970 (__v2df)(__m128d)(W), \
1971 (__mmask8)(U), (int)(R)))
1972
1973#define _mm_maskz_sub_round_sd(U, A, B, R) \
1974 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1975 (__v2df)(__m128d)(B), \
1976 (__v2df)_mm_setzero_pd(), \
1977 (__mmask8)(U), (int)(R)))
1978
1979static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1980_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1981 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1982 (__v8df)_mm512_sub_pd(__A, __B),
1983 (__v8df)__W);
1984}
1985
1986static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1987_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1988 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1989 (__v8df)_mm512_sub_pd(__A, __B),
1990 (__v8df)_mm512_setzero_pd());
1991}
1992
1993static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1994_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1995 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1996 (__v16sf)_mm512_sub_ps(__A, __B),
1997 (__v16sf)__W);
1998}
1999
2000static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2001_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2002 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2003 (__v16sf)_mm512_sub_ps(__A, __B),
2004 (__v16sf)_mm512_setzero_ps());
2005}
2006
2007#define _mm512_sub_round_pd(A, B, R) \
2008 ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2009 (__v8df)(__m512d)(B), (int)(R)))
2010
2011#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2012 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2013 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2014 (__v8df)(__m512d)(W)))
2015
2016#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2017 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2018 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2019 (__v8df)_mm512_setzero_pd()))
2020
2021#define _mm512_sub_round_ps(A, B, R) \
2022 ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2023 (__v16sf)(__m512)(B), (int)(R)))
2024
2025#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2026 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2027 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2028 (__v16sf)(__m512)(W)))
2029
2030#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2031 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2032 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2033 (__v16sf)_mm512_setzero_ps()))
2034
2035static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2036_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2037 __A = _mm_mul_ss(__A, __B);
2038 return __builtin_ia32_selectss_128(__U, __A, __W);
2039}
2040
2041static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2042_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
2043 __A = _mm_mul_ss(__A, __B);
2044 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2045}
2046#define _mm_mul_round_ss(A, B, R) \
2047 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2048 (__v4sf)(__m128)(B), \
2049 (__v4sf)_mm_setzero_ps(), \
2050 (__mmask8)-1, (int)(R)))
2051
2052#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2053 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2054 (__v4sf)(__m128)(B), \
2055 (__v4sf)(__m128)(W), (__mmask8)(U), \
2056 (int)(R)))
2057
2058#define _mm_maskz_mul_round_ss(U, A, B, R) \
2059 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2060 (__v4sf)(__m128)(B), \
2061 (__v4sf)_mm_setzero_ps(), \
2062 (__mmask8)(U), (int)(R)))
2063
2064static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2065_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2066 __A = _mm_mul_sd(__A, __B);
2067 return __builtin_ia32_selectsd_128(__U, __A, __W);
2068}
2069
2070static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2071_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
2072 __A = _mm_mul_sd(__A, __B);
2073 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2074}
2075
2076#define _mm_mul_round_sd(A, B, R) \
2077 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2078 (__v2df)(__m128d)(B), \
2079 (__v2df)_mm_setzero_pd(), \
2080 (__mmask8)-1, (int)(R)))
2081
2082#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2083 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2084 (__v2df)(__m128d)(B), \
2085 (__v2df)(__m128d)(W), \
2086 (__mmask8)(U), (int)(R)))
2087
2088#define _mm_maskz_mul_round_sd(U, A, B, R) \
2089 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2090 (__v2df)(__m128d)(B), \
2091 (__v2df)_mm_setzero_pd(), \
2092 (__mmask8)(U), (int)(R)))
2093
2094static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2095_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2096 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2097 (__v8df)_mm512_mul_pd(__A, __B),
2098 (__v8df)__W);
2099}
2100
2101static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2102_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2103 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2104 (__v8df)_mm512_mul_pd(__A, __B),
2105 (__v8df)_mm512_setzero_pd());
2106}
2107
2108static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2109_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2110 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2111 (__v16sf)_mm512_mul_ps(__A, __B),
2112 (__v16sf)__W);
2113}
2114
2115static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2116_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2117 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2118 (__v16sf)_mm512_mul_ps(__A, __B),
2119 (__v16sf)_mm512_setzero_ps());
2120}
2121
2122#define _mm512_mul_round_pd(A, B, R) \
2123 ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2124 (__v8df)(__m512d)(B), (int)(R)))
2125
2126#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2127 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2128 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2129 (__v8df)(__m512d)(W)))
2130
2131#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2132 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2133 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2134 (__v8df)_mm512_setzero_pd()))
2135
2136#define _mm512_mul_round_ps(A, B, R) \
2137 ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2138 (__v16sf)(__m512)(B), (int)(R)))
2139
2140#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2141 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2142 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2143 (__v16sf)(__m512)(W)))
2144
2145#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2146 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2147 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2148 (__v16sf)_mm512_setzero_ps()))
2149
2150static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2151_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2152 __A = _mm_div_ss(__A, __B);
2153 return __builtin_ia32_selectss_128(__U, __A, __W);
2154}
2155
2156static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2157_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
2158 __A = _mm_div_ss(__A, __B);
2159 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2160}
2161
2162#define _mm_div_round_ss(A, B, R) \
2163 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2164 (__v4sf)(__m128)(B), \
2165 (__v4sf)_mm_setzero_ps(), \
2166 (__mmask8)-1, (int)(R)))
2167
2168#define _mm_mask_div_round_ss(W, U, A, B, R) \
2169 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2170 (__v4sf)(__m128)(B), \
2171 (__v4sf)(__m128)(W), (__mmask8)(U), \
2172 (int)(R)))
2173
2174#define _mm_maskz_div_round_ss(U, A, B, R) \
2175 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2176 (__v4sf)(__m128)(B), \
2177 (__v4sf)_mm_setzero_ps(), \
2178 (__mmask8)(U), (int)(R)))
2179
2180static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2181_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2182 __A = _mm_div_sd(__A, __B);
2183 return __builtin_ia32_selectsd_128(__U, __A, __W);
2184}
2185
2186static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2187_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
2188 __A = _mm_div_sd(__A, __B);
2189 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2190}
2191
2192#define _mm_div_round_sd(A, B, R) \
2193 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2194 (__v2df)(__m128d)(B), \
2195 (__v2df)_mm_setzero_pd(), \
2196 (__mmask8)-1, (int)(R)))
2197
2198#define _mm_mask_div_round_sd(W, U, A, B, R) \
2199 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2200 (__v2df)(__m128d)(B), \
2201 (__v2df)(__m128d)(W), \
2202 (__mmask8)(U), (int)(R)))
2203
2204#define _mm_maskz_div_round_sd(U, A, B, R) \
2205 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2206 (__v2df)(__m128d)(B), \
2207 (__v2df)_mm_setzero_pd(), \
2208 (__mmask8)(U), (int)(R)))
2209
2210static __inline __m512d
2212 return (__m512d)((__v8df)__a/(__v8df)__b);
2213}
2214
2215static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2216_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2217 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2218 (__v8df)_mm512_div_pd(__A, __B),
2219 (__v8df)__W);
2220}
2221
2222static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2223_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2224 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2225 (__v8df)_mm512_div_pd(__A, __B),
2226 (__v8df)_mm512_setzero_pd());
2227}
2228
2229static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2230_mm512_div_ps(__m512 __a, __m512 __b) {
2231 return (__m512)((__v16sf)__a/(__v16sf)__b);
2232}
2233
2234static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2235_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2236 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2237 (__v16sf)_mm512_div_ps(__A, __B),
2238 (__v16sf)__W);
2239}
2240
2241static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2242_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2243 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2244 (__v16sf)_mm512_div_ps(__A, __B),
2245 (__v16sf)_mm512_setzero_ps());
2246}
2247
2248#define _mm512_div_round_pd(A, B, R) \
2249 ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2250 (__v8df)(__m512d)(B), (int)(R)))
2251
2252#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2253 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2254 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2255 (__v8df)(__m512d)(W)))
2256
2257#define _mm512_maskz_div_round_pd(U, A, B, R) \
2258 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2259 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2260 (__v8df)_mm512_setzero_pd()))
2261
2262#define _mm512_div_round_ps(A, B, R) \
2263 ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2264 (__v16sf)(__m512)(B), (int)(R)))
2265
2266#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2267 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2268 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2269 (__v16sf)(__m512)(W)))
2270
2271#define _mm512_maskz_div_round_ps(U, A, B, R) \
2272 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2273 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2274 (__v16sf)_mm512_setzero_ps()))
2275
2276#define _mm512_roundscale_ps(A, B) \
2277 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2278 (__v16sf)_mm512_undefined_ps(), \
2279 (__mmask16)-1, \
2280 _MM_FROUND_CUR_DIRECTION))
2281
2282#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2283 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2284 (__v16sf)(__m512)(A), (__mmask16)(B), \
2285 _MM_FROUND_CUR_DIRECTION))
2286
2287#define _mm512_maskz_roundscale_ps(A, B, imm) \
2288 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2289 (__v16sf)_mm512_setzero_ps(), \
2290 (__mmask16)(A), \
2291 _MM_FROUND_CUR_DIRECTION))
2292
2293#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2294 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2295 (__v16sf)(__m512)(A), (__mmask16)(B), \
2296 (int)(R)))
2297
2298#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2299 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2300 (__v16sf)_mm512_setzero_ps(), \
2301 (__mmask16)(A), (int)(R)))
2302
2303#define _mm512_roundscale_round_ps(A, imm, R) \
2304 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2305 (__v16sf)_mm512_undefined_ps(), \
2306 (__mmask16)-1, (int)(R)))
2307
2308#define _mm512_roundscale_pd(A, B) \
2309 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2310 (__v8df)_mm512_undefined_pd(), \
2311 (__mmask8)-1, \
2312 _MM_FROUND_CUR_DIRECTION))
2313
2314#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2315 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2316 (__v8df)(__m512d)(A), (__mmask8)(B), \
2317 _MM_FROUND_CUR_DIRECTION))
2318
2319#define _mm512_maskz_roundscale_pd(A, B, imm) \
2320 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2321 (__v8df)_mm512_setzero_pd(), \
2322 (__mmask8)(A), \
2323 _MM_FROUND_CUR_DIRECTION))
2324
2325#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2326 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2327 (__v8df)(__m512d)(A), (__mmask8)(B), \
2328 (int)(R)))
2329
2330#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2331 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2332 (__v8df)_mm512_setzero_pd(), \
2333 (__mmask8)(A), (int)(R)))
2334
2335#define _mm512_roundscale_round_pd(A, imm, R) \
2336 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2337 (__v8df)_mm512_undefined_pd(), \
2338 (__mmask8)-1, (int)(R)))
2339
2340#define _mm512_fmadd_round_pd(A, B, C, R) \
2341 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2342 (__v8df)(__m512d)(B), \
2343 (__v8df)(__m512d)(C), \
2344 (__mmask8)-1, (int)(R)))
2345
2346
2347#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2348 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2349 (__v8df)(__m512d)(B), \
2350 (__v8df)(__m512d)(C), \
2351 (__mmask8)(U), (int)(R)))
2352
2353
2354#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2355 ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2356 (__v8df)(__m512d)(B), \
2357 (__v8df)(__m512d)(C), \
2358 (__mmask8)(U), (int)(R)))
2359
2360
2361#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2362 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2363 (__v8df)(__m512d)(B), \
2364 (__v8df)(__m512d)(C), \
2365 (__mmask8)(U), (int)(R)))
2366
2367
2368#define _mm512_fmsub_round_pd(A, B, C, R) \
2369 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2370 (__v8df)(__m512d)(B), \
2371 -(__v8df)(__m512d)(C), \
2372 (__mmask8)-1, (int)(R)))
2373
2374
2375#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2376 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2377 (__v8df)(__m512d)(B), \
2378 -(__v8df)(__m512d)(C), \
2379 (__mmask8)(U), (int)(R)))
2380
2381
2382#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2383 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2384 (__v8df)(__m512d)(B), \
2385 -(__v8df)(__m512d)(C), \
2386 (__mmask8)(U), (int)(R)))
2387
2388
2389#define _mm512_fnmadd_round_pd(A, B, C, R) \
2390 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2391 (__v8df)(__m512d)(B), \
2392 (__v8df)(__m512d)(C), \
2393 (__mmask8)-1, (int)(R)))
2394
2395
2396#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2397 ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2398 (__v8df)(__m512d)(B), \
2399 (__v8df)(__m512d)(C), \
2400 (__mmask8)(U), (int)(R)))
2401
2402
2403#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2404 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2405 (__v8df)(__m512d)(B), \
2406 (__v8df)(__m512d)(C), \
2407 (__mmask8)(U), (int)(R)))
2408
2409
2410#define _mm512_fnmsub_round_pd(A, B, C, R) \
2411 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2412 (__v8df)(__m512d)(B), \
2413 -(__v8df)(__m512d)(C), \
2414 (__mmask8)-1, (int)(R)))
2415
2416
2417#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2418 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2419 (__v8df)(__m512d)(B), \
2420 -(__v8df)(__m512d)(C), \
2421 (__mmask8)(U), (int)(R)))
2422
2423static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2424_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
2425 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2426 (__v8df)__C);
2427}
2428
2429static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2430_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2431 return (__m512d)__builtin_ia32_selectpd_512(
2432 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__A);
2433}
2434
2435static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2436_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2437 return (__m512d)__builtin_ia32_selectpd_512(
2438 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__C);
2439}
2440
2441static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2442_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2443 return (__m512d)__builtin_ia32_selectpd_512(
2444 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C),
2445 (__v8df)_mm512_setzero_pd());
2446}
2447
2448static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2449_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
2450 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2451 -(__v8df)__C);
2452}
2453
2454static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2455_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2456 return (__m512d)__builtin_ia32_selectpd_512(
2457 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__A);
2458}
2459
2460static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2461_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2462 return (__m512d)__builtin_ia32_selectpd_512(
2463 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__C);
2464}
2465
2466static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2467_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2468 return (__m512d)__builtin_ia32_selectpd_512(
2469 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C),
2470 (__v8df)_mm512_setzero_pd());
2471}
2472
2473static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2474_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
2475 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2476 (__v8df)__C);
2477}
2478
2479static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2480_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2481 return (__m512d)__builtin_ia32_selectpd_512(
2482 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__A);
2483}
2484
2485static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2486_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2487 return (__m512d)__builtin_ia32_selectpd_512(
2488 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__C);
2489}
2490
2491static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2492_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2493 return (__m512d)__builtin_ia32_selectpd_512(
2494 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C),
2495 (__v8df)_mm512_setzero_pd());
2496}
2497
2498static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2499_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
2500 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2501 -(__v8df)__C);
2502}
2503
2504static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2505_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2506 return (__m512d)__builtin_ia32_selectpd_512(
2507 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__A);
2508}
2509
2510static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2511_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2512 return (__m512d)__builtin_ia32_selectpd_512(
2513 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__C);
2514}
2515
2516static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2517_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2518 return (__m512d)__builtin_ia32_selectpd_512(
2519 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C),
2520 (__v8df)_mm512_setzero_pd());
2521}
2522
2523#define _mm512_fmadd_round_ps(A, B, C, R) \
2524 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2525 (__v16sf)(__m512)(B), \
2526 (__v16sf)(__m512)(C), \
2527 (__mmask16)-1, (int)(R)))
2528
2529
2530#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2531 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2532 (__v16sf)(__m512)(B), \
2533 (__v16sf)(__m512)(C), \
2534 (__mmask16)(U), (int)(R)))
2535
2536
2537#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2538 ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2539 (__v16sf)(__m512)(B), \
2540 (__v16sf)(__m512)(C), \
2541 (__mmask16)(U), (int)(R)))
2542
2543
2544#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2545 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2546 (__v16sf)(__m512)(B), \
2547 (__v16sf)(__m512)(C), \
2548 (__mmask16)(U), (int)(R)))
2549
2550
2551#define _mm512_fmsub_round_ps(A, B, C, R) \
2552 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2553 (__v16sf)(__m512)(B), \
2554 -(__v16sf)(__m512)(C), \
2555 (__mmask16)-1, (int)(R)))
2556
2557
2558#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2559 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2560 (__v16sf)(__m512)(B), \
2561 -(__v16sf)(__m512)(C), \
2562 (__mmask16)(U), (int)(R)))
2563
2564
2565#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2566 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2567 (__v16sf)(__m512)(B), \
2568 -(__v16sf)(__m512)(C), \
2569 (__mmask16)(U), (int)(R)))
2570
2571
2572#define _mm512_fnmadd_round_ps(A, B, C, R) \
2573 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2574 -(__v16sf)(__m512)(B), \
2575 (__v16sf)(__m512)(C), \
2576 (__mmask16)-1, (int)(R)))
2577
2578
2579#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2580 ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2581 (__v16sf)(__m512)(B), \
2582 (__v16sf)(__m512)(C), \
2583 (__mmask16)(U), (int)(R)))
2584
2585
2586#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2587 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2588 (__v16sf)(__m512)(B), \
2589 (__v16sf)(__m512)(C), \
2590 (__mmask16)(U), (int)(R)))
2591
2592
2593#define _mm512_fnmsub_round_ps(A, B, C, R) \
2594 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2595 -(__v16sf)(__m512)(B), \
2596 -(__v16sf)(__m512)(C), \
2597 (__mmask16)-1, (int)(R)))
2598
2599
2600#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2601 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2602 (__v16sf)(__m512)(B), \
2603 -(__v16sf)(__m512)(C), \
2604 (__mmask16)(U), (int)(R)))
2605
2606static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2607_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
2608 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2609 (__v16sf)__C);
2610}
2611
2612static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2613_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2614 return (__m512)__builtin_ia32_selectps_512(
2615 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__A);
2616}
2617
2618static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2619_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2620 return (__m512)__builtin_ia32_selectps_512(
2621 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__C);
2622}
2623
2624static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2625_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2626 return (__m512)__builtin_ia32_selectps_512(
2627 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C),
2628 (__v16sf)_mm512_setzero_ps());
2629}
2630
2631static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2632_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
2633 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2634 -(__v16sf)__C);
2635}
2636
2637static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2638_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2639 return (__m512)__builtin_ia32_selectps_512(
2640 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__A);
2641}
2642
2643static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2644_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2645 return (__m512)__builtin_ia32_selectps_512(
2646 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__C);
2647}
2648
2649static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2650_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2651 return (__m512)__builtin_ia32_selectps_512(
2652 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C),
2653 (__v16sf)_mm512_setzero_ps());
2654}
2655
2656static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2657_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
2658 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2659 (__v16sf)__C);
2660}
2661
2662static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2663_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2664 return (__m512)__builtin_ia32_selectps_512(
2665 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__A);
2666}
2667
2668static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2669_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2670 return (__m512)__builtin_ia32_selectps_512(
2671 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__C);
2672}
2673
2674static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2675_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2676 return (__m512)__builtin_ia32_selectps_512(
2677 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C),
2678 (__v16sf)_mm512_setzero_ps());
2679}
2680
2681static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2682_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
2683 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2684 -(__v16sf)__C);
2685}
2686
2687static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2688_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2689 return (__m512)__builtin_ia32_selectps_512(
2690 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__A);
2691}
2692
2693static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2694_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2695 return (__m512)__builtin_ia32_selectps_512(
2696 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__C);
2697}
2698
2699static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2700_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2701 return (__m512)__builtin_ia32_selectps_512(
2702 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C),
2703 (__v16sf)_mm512_setzero_ps());
2704}
2705
2706#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2707 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2708 (__v8df)(__m512d)(B), \
2709 (__v8df)(__m512d)(C), \
2710 (__mmask8)-1, (int)(R)))
2711
2712
2713#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2714 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2715 (__v8df)(__m512d)(B), \
2716 (__v8df)(__m512d)(C), \
2717 (__mmask8)(U), (int)(R)))
2718
2719
2720#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2721 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2722 (__v8df)(__m512d)(B), \
2723 (__v8df)(__m512d)(C), \
2724 (__mmask8)(U), (int)(R)))
2725
2726
2727#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2728 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2729 (__v8df)(__m512d)(B), \
2730 (__v8df)(__m512d)(C), \
2731 (__mmask8)(U), (int)(R)))
2732
2733
2734#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2735 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2736 (__v8df)(__m512d)(B), \
2737 -(__v8df)(__m512d)(C), \
2738 (__mmask8)-1, (int)(R)))
2739
2740
2741#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2742 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2743 (__v8df)(__m512d)(B), \
2744 -(__v8df)(__m512d)(C), \
2745 (__mmask8)(U), (int)(R)))
2746
2747
2748#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2749 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2750 (__v8df)(__m512d)(B), \
2751 -(__v8df)(__m512d)(C), \
2752 (__mmask8)(U), (int)(R)))
2753
2754
2755static __inline__ __m512d __DEFAULT_FN_ATTRS512
2756_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2757{
2758 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2759 (__v8df) __B,
2760 (__v8df) __C,
2761 (__mmask8) -1,
2763}
2764
2765static __inline__ __m512d __DEFAULT_FN_ATTRS512
2766_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2767{
2768 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2769 (__v8df) __B,
2770 (__v8df) __C,
2771 (__mmask8) __U,
2773}
2774
2775static __inline__ __m512d __DEFAULT_FN_ATTRS512
2776_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2777{
2778 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2779 (__v8df) __B,
2780 (__v8df) __C,
2781 (__mmask8) __U,
2783}
2784
2785static __inline__ __m512d __DEFAULT_FN_ATTRS512
2786_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2787{
2788 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2789 (__v8df) __B,
2790 (__v8df) __C,
2791 (__mmask8) __U,
2793}
2794
2795static __inline__ __m512d __DEFAULT_FN_ATTRS512
2796_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2797{
2798 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2799 (__v8df) __B,
2800 -(__v8df) __C,
2801 (__mmask8) -1,
2803}
2804
2805static __inline__ __m512d __DEFAULT_FN_ATTRS512
2806_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2807{
2808 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2809 (__v8df) __B,
2810 -(__v8df) __C,
2811 (__mmask8) __U,
2813}
2814
2815static __inline__ __m512d __DEFAULT_FN_ATTRS512
2816_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2817{
2818 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2819 (__v8df) __B,
2820 -(__v8df) __C,
2821 (__mmask8) __U,
2823}
2824
2825#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2826 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2827 (__v16sf)(__m512)(B), \
2828 (__v16sf)(__m512)(C), \
2829 (__mmask16)-1, (int)(R)))
2830
2831
2832#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2833 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2834 (__v16sf)(__m512)(B), \
2835 (__v16sf)(__m512)(C), \
2836 (__mmask16)(U), (int)(R)))
2837
2838
2839#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2840 ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2841 (__v16sf)(__m512)(B), \
2842 (__v16sf)(__m512)(C), \
2843 (__mmask16)(U), (int)(R)))
2844
2845
2846#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2847 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2848 (__v16sf)(__m512)(B), \
2849 (__v16sf)(__m512)(C), \
2850 (__mmask16)(U), (int)(R)))
2851
2852
2853#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2854 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2855 (__v16sf)(__m512)(B), \
2856 -(__v16sf)(__m512)(C), \
2857 (__mmask16)-1, (int)(R)))
2858
2859
2860#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2861 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2862 (__v16sf)(__m512)(B), \
2863 -(__v16sf)(__m512)(C), \
2864 (__mmask16)(U), (int)(R)))
2865
2866
2867#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2868 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2869 (__v16sf)(__m512)(B), \
2870 -(__v16sf)(__m512)(C), \
2871 (__mmask16)(U), (int)(R)))
2872
2873
2874static __inline__ __m512 __DEFAULT_FN_ATTRS512
2875_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
2876{
2877 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2878 (__v16sf) __B,
2879 (__v16sf) __C,
2880 (__mmask16) -1,
2882}
2883
2884static __inline__ __m512 __DEFAULT_FN_ATTRS512
2885_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2886{
2887 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2888 (__v16sf) __B,
2889 (__v16sf) __C,
2890 (__mmask16) __U,
2892}
2893
2894static __inline__ __m512 __DEFAULT_FN_ATTRS512
2895_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2896{
2897 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2898 (__v16sf) __B,
2899 (__v16sf) __C,
2900 (__mmask16) __U,
2902}
2903
2904static __inline__ __m512 __DEFAULT_FN_ATTRS512
2905_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2906{
2907 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2908 (__v16sf) __B,
2909 (__v16sf) __C,
2910 (__mmask16) __U,
2912}
2913
2914static __inline__ __m512 __DEFAULT_FN_ATTRS512
2915_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
2916{
2917 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2918 (__v16sf) __B,
2919 -(__v16sf) __C,
2920 (__mmask16) -1,
2922}
2923
2924static __inline__ __m512 __DEFAULT_FN_ATTRS512
2925_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2926{
2927 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2928 (__v16sf) __B,
2929 -(__v16sf) __C,
2930 (__mmask16) __U,
2932}
2933
2934static __inline__ __m512 __DEFAULT_FN_ATTRS512
2935_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2936{
2937 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2938 (__v16sf) __B,
2939 -(__v16sf) __C,
2940 (__mmask16) __U,
2942}
2943
2944#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
2945 ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
2946 (__v8df)(__m512d)(B), \
2947 (__v8df)(__m512d)(C), \
2948 (__mmask8)(U), (int)(R)))
2949
2950#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
2951 ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
2952 (__v16sf)(__m512)(B), \
2953 (__v16sf)(__m512)(C), \
2954 (__mmask16)(U), (int)(R)))
2955
2956#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
2957 ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
2958 (__v8df)(__m512d)(B), \
2959 (__v8df)(__m512d)(C), \
2960 (__mmask8)(U), (int)(R)))
2961
2962
2963static __inline__ __m512d __DEFAULT_FN_ATTRS512
2964_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2965{
2966 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2967 (__v8df) __B,
2968 (__v8df) __C,
2969 (__mmask8) __U,
2971}
2972
2973#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
2974 ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
2975 (__v16sf)(__m512)(B), \
2976 (__v16sf)(__m512)(C), \
2977 (__mmask16)(U), (int)(R)))
2978
2979
2980static __inline__ __m512 __DEFAULT_FN_ATTRS512
2981_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2982{
2983 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2984 (__v16sf) __B,
2985 (__v16sf) __C,
2986 (__mmask16) __U,
2988}
2989
2990#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
2991 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2992 -(__v8df)(__m512d)(B), \
2993 (__v8df)(__m512d)(C), \
2994 (__mmask8)(U), (int)(R)))
2995
2996#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
2997 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2998 -(__v16sf)(__m512)(B), \
2999 (__v16sf)(__m512)(C), \
3000 (__mmask16)(U), (int)(R)))
3001
3002#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3003 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3004 -(__v8df)(__m512d)(B), \
3005 -(__v8df)(__m512d)(C), \
3006 (__mmask8)(U), (int)(R)))
3007
3008
3009#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3010 ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3011 (__v8df)(__m512d)(B), \
3012 (__v8df)(__m512d)(C), \
3013 (__mmask8)(U), (int)(R)))
3014
3015#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3016 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3017 -(__v16sf)(__m512)(B), \
3018 -(__v16sf)(__m512)(C), \
3019 (__mmask16)(U), (int)(R)))
3020
3021
3022#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3023 ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3024 (__v16sf)(__m512)(B), \
3025 (__v16sf)(__m512)(C), \
3026 (__mmask16)(U), (int)(R)))
3027
3028/* Vector permutations */
3029
3030static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3031_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) {
3032 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3033 (__v16si) __B);
3034}
3035
3036static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3037_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3038 __m512i __B) {
3039 return (__m512i)__builtin_ia32_selectd_512(__U,
3040 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3041 (__v16si)__A);
3042}
3043
3044static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3045_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3046 __m512i __B) {
3047 return (__m512i)__builtin_ia32_selectd_512(__U,
3048 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3049 (__v16si)__I);
3050}
3051
3052static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3053_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3054 __m512i __B) {
3055 return (__m512i)__builtin_ia32_selectd_512(__U,
3056 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3057 (__v16si)_mm512_setzero_si512());
3058}
3059
3060static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3061_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) {
3062 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3063 (__v8di) __B);
3064}
3065
3066static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3067_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3068 __m512i __B) {
3069 return (__m512i)__builtin_ia32_selectq_512(__U,
3070 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3071 (__v8di)__A);
3072}
3073
3074static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3075_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3076 __m512i __B) {
3077 return (__m512i)__builtin_ia32_selectq_512(__U,
3078 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3079 (__v8di)__I);
3080}
3081
3082static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3083_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3084 __m512i __B) {
3085 return (__m512i)__builtin_ia32_selectq_512(__U,
3086 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3087 (__v8di)_mm512_setzero_si512());
3088}
3089
3090#define _mm512_alignr_epi64(A, B, I) \
3091 ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3092 (__v8di)(__m512i)(B), (int)(I)))
3093
3094#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3095 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3096 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3097 (__v8di)(__m512i)(W)))
3098
3099#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3100 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3101 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3102 (__v8di)_mm512_setzero_si512()))
3103
3104#define _mm512_alignr_epi32(A, B, I) \
3105 ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3106 (__v16si)(__m512i)(B), (int)(I)))
3107
3108#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3109 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3110 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3111 (__v16si)(__m512i)(W)))
3112
3113#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3114 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3115 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3116 (__v16si)_mm512_setzero_si512()))
3117/* Vector Extract */
3118
3119#define _mm512_extractf64x4_pd(A, I) \
3120 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3121 (__v4df)_mm256_setzero_pd(), \
3122 (__mmask8) - 1))
3123
3124#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3125 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3126 (__v4df)(__m256d)(W), \
3127 (__mmask8)(U)))
3128
3129#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3130 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3131 (__v4df)_mm256_setzero_pd(), \
3132 (__mmask8)(U)))
3133
3134#define _mm512_extractf32x4_ps(A, I) \
3135 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3136 (__v4sf)_mm_setzero_ps(), \
3137 (__mmask8) - 1))
3138
3139#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3140 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3141 (__v4sf)(__m128)(W), \
3142 (__mmask8)(U)))
3143
3144#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3145 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3146 (__v4sf)_mm_setzero_ps(), \
3147 (__mmask8)(U)))
3148
3149/* Vector Blend */
3150
3151static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3152_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
3153 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3154 (__v8df) __W,
3155 (__v8df) __A);
3156}
3157
3158static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3159_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) {
3160 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3161 (__v16sf) __W,
3162 (__v16sf) __A);
3163}
3164
3165static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3166_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
3167 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3168 (__v8di) __W,
3169 (__v8di) __A);
3170}
3171
3172static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3173_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
3174 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3175 (__v16si) __W,
3176 (__v16si) __A);
3177}
3178
3179/* Compare */
3180
3181#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3182 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3183 (__v16sf)(__m512)(B), (int)(P), \
3184 (__mmask16)-1, (int)(R)))
3185
3186#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3187 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3188 (__v16sf)(__m512)(B), (int)(P), \
3189 (__mmask16)(U), (int)(R)))
3190
3191#define _mm512_cmp_ps_mask(A, B, P) \
3192 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3193#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3194 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3195
3196#define _mm512_cmpeq_ps_mask(A, B) \
3197 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3198#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3199 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3200
3201#define _mm512_cmplt_ps_mask(A, B) \
3202 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3203#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3204 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3205
3206#define _mm512_cmple_ps_mask(A, B) \
3207 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3208#define _mm512_mask_cmple_ps_mask(k, A, B) \
3209 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3210
3211#define _mm512_cmpunord_ps_mask(A, B) \
3212 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3213#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3214 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3215
3216#define _mm512_cmpneq_ps_mask(A, B) \
3217 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3218#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3219 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3220
3221#define _mm512_cmpnlt_ps_mask(A, B) \
3222 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3223#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3224 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3225
3226#define _mm512_cmpnle_ps_mask(A, B) \
3227 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3228#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3229 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3230
3231#define _mm512_cmpord_ps_mask(A, B) \
3232 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3233#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3234 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3235
3236#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3237 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3238 (__v8df)(__m512d)(B), (int)(P), \
3239 (__mmask8)-1, (int)(R)))
3240
3241#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3242 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3243 (__v8df)(__m512d)(B), (int)(P), \
3244 (__mmask8)(U), (int)(R)))
3245
3246#define _mm512_cmp_pd_mask(A, B, P) \
3247 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3248#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3249 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3250
3251#define _mm512_cmpeq_pd_mask(A, B) \
3252 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3253#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3254 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3255
3256#define _mm512_cmplt_pd_mask(A, B) \
3257 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3258#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3259 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3260
3261#define _mm512_cmple_pd_mask(A, B) \
3262 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3263#define _mm512_mask_cmple_pd_mask(k, A, B) \
3264 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3265
3266#define _mm512_cmpunord_pd_mask(A, B) \
3267 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3268#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3269 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3270
3271#define _mm512_cmpneq_pd_mask(A, B) \
3272 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3273#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3274 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3275
3276#define _mm512_cmpnlt_pd_mask(A, B) \
3277 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3278#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3279 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3280
3281#define _mm512_cmpnle_pd_mask(A, B) \
3282 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3283#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3284 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3285
3286#define _mm512_cmpord_pd_mask(A, B) \
3287 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3288#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3289 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3290
3291/* Conversion */
3292
3293#define _mm512_cvtt_roundps_epu32(A, R) \
3294 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3295 (__v16si)_mm512_undefined_epi32(), \
3296 (__mmask16)-1, (int)(R)))
3297
3298#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3299 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3300 (__v16si)(__m512i)(W), \
3301 (__mmask16)(U), (int)(R)))
3302
3303#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3304 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3305 (__v16si)_mm512_setzero_si512(), \
3306 (__mmask16)(U), (int)(R)))
3307
3308
3309static __inline __m512i __DEFAULT_FN_ATTRS512
3311{
3312 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3313 (__v16si)
3315 (__mmask16) -1,
3317}
3318
3319static __inline__ __m512i __DEFAULT_FN_ATTRS512
3320_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3321{
3322 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3323 (__v16si) __W,
3324 (__mmask16) __U,
3326}
3327
3328static __inline__ __m512i __DEFAULT_FN_ATTRS512
3330{
3331 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3332 (__v16si) _mm512_setzero_si512 (),
3333 (__mmask16) __U,
3335}
3336
3337#define _mm512_cvt_roundepi32_ps(A, R) \
3338 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3339 (__v16sf)_mm512_setzero_ps(), \
3340 (__mmask16)-1, (int)(R)))
3341
3342#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3343 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3344 (__v16sf)(__m512)(W), \
3345 (__mmask16)(U), (int)(R)))
3346
3347#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3348 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3349 (__v16sf)_mm512_setzero_ps(), \
3350 (__mmask16)(U), (int)(R)))
3351
3352#define _mm512_cvt_roundepu32_ps(A, R) \
3353 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3354 (__v16sf)_mm512_setzero_ps(), \
3355 (__mmask16)-1, (int)(R)))
3356
3357#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3358 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3359 (__v16sf)(__m512)(W), \
3360 (__mmask16)(U), (int)(R)))
3361
3362#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3363 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3364 (__v16sf)_mm512_setzero_ps(), \
3365 (__mmask16)(U), (int)(R)))
3366
3367static __inline__ __m512
3369 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3370}
3371
3372static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3373_mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
3374 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3375 (__v16sf)_mm512_cvtepu32_ps(__A),
3376 (__v16sf)__W);
3377}
3378
3379static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3381 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3382 (__v16sf)_mm512_cvtepu32_ps(__A),
3383 (__v16sf)_mm512_setzero_ps());
3384}
3385
3386static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3388 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3389}
3390
3391static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3392_mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
3393 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3394 (__v8df)_mm512_cvtepi32_pd(__A),
3395 (__v8df)__W);
3396}
3397
3398static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3400 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3401 (__v8df)_mm512_cvtepi32_pd(__A),
3402 (__v8df)_mm512_setzero_pd());
3403}
3404
3405static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3407 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3408}
3409
3410static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3411_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
3412 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3413}
3414
3415static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3417 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3418}
3419
3420static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3421_mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
3422 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3423 (__v16sf)_mm512_cvtepi32_ps(__A),
3424 (__v16sf)__W);
3425}
3426
3427static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3429 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3430 (__v16sf)_mm512_cvtepi32_ps(__A),
3431 (__v16sf)_mm512_setzero_ps());
3432}
3433
3434static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3436 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3437}
3438
3439static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3440_mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
3441 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3442 (__v8df)_mm512_cvtepu32_pd(__A),
3443 (__v8df)__W);
3444}
3445
3446static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3448 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3449 (__v8df)_mm512_cvtepu32_pd(__A),
3450 (__v8df)_mm512_setzero_pd());
3451}
3452
3453static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3455 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3456}
3457
3458static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3459_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
3460 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3461}
3462
3463#define _mm512_cvt_roundpd_ps(A, R) \
3464 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3465 (__v8sf)_mm256_setzero_ps(), \
3466 (__mmask8)-1, (int)(R)))
3467
3468#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3469 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3470 (__v8sf)(__m256)(W), (__mmask8)(U), \
3471 (int)(R)))
3472
3473#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3474 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3475 (__v8sf)_mm256_setzero_ps(), \
3476 (__mmask8)(U), (int)(R)))
3477
3478static __inline__ __m256
3480 return (__m256)__builtin_ia32_cvtpd2ps512_mask(
3481 (__v8df)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)-1,
3483}
3484
3485static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
3486_mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A) {
3487 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3488 (__v8sf) __W,
3489 (__mmask8) __U,
3491}
3492
3493static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
3495 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3496 (__v8sf) _mm256_setzero_ps (),
3497 (__mmask8) __U,
3499}
3500
3501static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3502_mm512_cvtpd_pslo(__m512d __A) {
3503 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3504 (__v8sf) _mm256_setzero_ps (),
3505 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3506}
3507
3508static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3509_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) {
3510 return (__m512) __builtin_shufflevector (
3512 __U, __A),
3513 (__v8sf) _mm256_setzero_ps (),
3514 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3515}
3516
3517#define _mm512_cvt_roundps_ph(A, I) \
3518 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3519 (__v16hi)_mm256_undefined_si256(), \
3520 (__mmask16)-1))
3521
3522#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3523 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3524 (__v16hi)(__m256i)(U), \
3525 (__mmask16)(W)))
3526
3527#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3528 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3529 (__v16hi)_mm256_setzero_si256(), \
3530 (__mmask16)(W)))
3531
3532#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3533#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3534#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3535
3536#define _mm512_cvt_roundph_ps(A, R) \
3537 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3538 (__v16sf)_mm512_undefined_ps(), \
3539 (__mmask16)-1, (int)(R)))
3540
3541#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3542 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3543 (__v16sf)(__m512)(W), \
3544 (__mmask16)(U), (int)(R)))
3545
3546#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3547 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3548 (__v16sf)_mm512_setzero_ps(), \
3549 (__mmask16)(U), (int)(R)))
3550
3551
3552static __inline __m512 __DEFAULT_FN_ATTRS512
3554{
3555 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3556 (__v16sf)
3558 (__mmask16) -1,
3560}
3561
3562static __inline__ __m512 __DEFAULT_FN_ATTRS512
3563_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3564{
3565 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3566 (__v16sf) __W,
3567 (__mmask16) __U,
3569}
3570
3571static __inline__ __m512 __DEFAULT_FN_ATTRS512
3573{
3574 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3575 (__v16sf) _mm512_setzero_ps (),
3576 (__mmask16) __U,
3578}
3579
3580#define _mm512_cvtt_roundpd_epi32(A, R) \
3581 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3582 (__v8si)_mm256_setzero_si256(), \
3583 (__mmask8)-1, (int)(R)))
3584
3585#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3586 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3587 (__v8si)(__m256i)(W), \
3588 (__mmask8)(U), (int)(R)))
3589
3590#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3591 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3592 (__v8si)_mm256_setzero_si256(), \
3593 (__mmask8)(U), (int)(R)))
3594
3595static __inline __m256i __DEFAULT_FN_ATTRS512
3597{
3598 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3599 (__v8si)_mm256_setzero_si256(),
3600 (__mmask8) -1,
3602}
3603
3604static __inline__ __m256i __DEFAULT_FN_ATTRS512
3605_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3606{
3607 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3608 (__v8si) __W,
3609 (__mmask8) __U,
3611}
3612
3613static __inline__ __m256i __DEFAULT_FN_ATTRS512
3615{
3616 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3617 (__v8si) _mm256_setzero_si256 (),
3618 (__mmask8) __U,
3620}
3621
3622#define _mm512_cvtt_roundps_epi32(A, R) \
3623 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3624 (__v16si)_mm512_setzero_si512(), \
3625 (__mmask16)-1, (int)(R)))
3626
3627#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3628 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3629 (__v16si)(__m512i)(W), \
3630 (__mmask16)(U), (int)(R)))
3631
3632#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3633 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3634 (__v16si)_mm512_setzero_si512(), \
3635 (__mmask16)(U), (int)(R)))
3636
3637static __inline __m512i __DEFAULT_FN_ATTRS512
3639{
3640 return (__m512i)
3641 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3642 (__v16si) _mm512_setzero_si512 (),
3644}
3645
3646static __inline__ __m512i __DEFAULT_FN_ATTRS512
3647_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3648{
3649 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3650 (__v16si) __W,
3651 (__mmask16) __U,
3653}
3654
3655static __inline__ __m512i __DEFAULT_FN_ATTRS512
3657{
3658 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3659 (__v16si) _mm512_setzero_si512 (),
3660 (__mmask16) __U,
3662}
3663
3664#define _mm512_cvt_roundps_epi32(A, R) \
3665 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3666 (__v16si)_mm512_setzero_si512(), \
3667 (__mmask16)-1, (int)(R)))
3668
3669#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3670 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3671 (__v16si)(__m512i)(W), \
3672 (__mmask16)(U), (int)(R)))
3673
3674#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3675 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3676 (__v16si)_mm512_setzero_si512(), \
3677 (__mmask16)(U), (int)(R)))
3678
3679static __inline__ __m512i __DEFAULT_FN_ATTRS512
3681{
3682 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3683 (__v16si) _mm512_undefined_epi32 (),
3684 (__mmask16) -1,
3686}
3687
3688static __inline__ __m512i __DEFAULT_FN_ATTRS512
3689_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3690{
3691 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3692 (__v16si) __W,
3693 (__mmask16) __U,
3695}
3696
3697static __inline__ __m512i __DEFAULT_FN_ATTRS512
3699{
3700 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3701 (__v16si)
3703 (__mmask16) __U,
3705}
3706
3707#define _mm512_cvt_roundpd_epi32(A, R) \
3708 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3709 (__v8si)_mm256_setzero_si256(), \
3710 (__mmask8)-1, (int)(R)))
3711
3712#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3713 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3714 (__v8si)(__m256i)(W), \
3715 (__mmask8)(U), (int)(R)))
3716
3717#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3718 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3719 (__v8si)_mm256_setzero_si256(), \
3720 (__mmask8)(U), (int)(R)))
3721
3722static __inline__ __m256i __DEFAULT_FN_ATTRS512
3724{
3725 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3726 (__v8si)
3728 (__mmask8) -1,
3730}
3731
3732static __inline__ __m256i __DEFAULT_FN_ATTRS512
3733_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3734{
3735 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3736 (__v8si) __W,
3737 (__mmask8) __U,
3739}
3740
3741static __inline__ __m256i __DEFAULT_FN_ATTRS512
3743{
3744 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3745 (__v8si)
3747 (__mmask8) __U,
3749}
3750
3751#define _mm512_cvt_roundps_epu32(A, R) \
3752 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3753 (__v16si)_mm512_setzero_si512(), \
3754 (__mmask16)-1, (int)(R)))
3755
3756#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
3757 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3758 (__v16si)(__m512i)(W), \
3759 (__mmask16)(U), (int)(R)))
3760
3761#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
3762 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3763 (__v16si)_mm512_setzero_si512(), \
3764 (__mmask16)(U), (int)(R)))
3765
3766static __inline__ __m512i __DEFAULT_FN_ATTRS512
3768{
3769 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
3770 (__v16si)\
3772 (__mmask16) -1,\
3774}
3775
3776static __inline__ __m512i __DEFAULT_FN_ATTRS512
3777_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3778{
3779 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3780 (__v16si) __W,
3781 (__mmask16) __U,
3783}
3784
3785static __inline__ __m512i __DEFAULT_FN_ATTRS512
3787{
3788 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3789 (__v16si)
3791 (__mmask16) __U ,
3793}
3794
3795#define _mm512_cvt_roundpd_epu32(A, R) \
3796 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3797 (__v8si)_mm256_setzero_si256(), \
3798 (__mmask8)-1, (int)(R)))
3799
3800#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
3801 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3802 (__v8si)(__m256i)(W), \
3803 (__mmask8)(U), (int)(R)))
3804
3805#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
3806 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3807 (__v8si)_mm256_setzero_si256(), \
3808 (__mmask8)(U), (int)(R)))
3809
3810static __inline__ __m256i __DEFAULT_FN_ATTRS512
3812{
3813 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3814 (__v8si)
3816 (__mmask8) -1,
3818}
3819
3820static __inline__ __m256i __DEFAULT_FN_ATTRS512
3821_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
3822{
3823 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3824 (__v8si) __W,
3825 (__mmask8) __U,
3827}
3828
3829static __inline__ __m256i __DEFAULT_FN_ATTRS512
3831{
3832 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3833 (__v8si)
3835 (__mmask8) __U,
3837}
3838
3839static __inline__ double __DEFAULT_FN_ATTRS512
3841{
3842 return __a[0];
3843}
3844
3845static __inline__ float __DEFAULT_FN_ATTRS512
3847{
3848 return __a[0];
3849}
3850
3851/* Unpack and Interleave */
3852
3853static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3854_mm512_unpackhi_pd(__m512d __a, __m512d __b) {
3855 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
3856 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
3857}
3858
3859static __inline__ __m512d __DEFAULT_FN_ATTRS512
3860_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
3861{
3862 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3863 (__v8df)_mm512_unpackhi_pd(__A, __B),
3864 (__v8df)__W);
3865}
3866
3867static __inline__ __m512d __DEFAULT_FN_ATTRS512
3868_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
3869{
3870 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3871 (__v8df)_mm512_unpackhi_pd(__A, __B),
3872 (__v8df)_mm512_setzero_pd());
3873}
3874
3875static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3876_mm512_unpacklo_pd(__m512d __a, __m512d __b) {
3877 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
3878 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
3879}
3880
3881static __inline__ __m512d __DEFAULT_FN_ATTRS512
3882_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
3883{
3884 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3885 (__v8df)_mm512_unpacklo_pd(__A, __B),
3886 (__v8df)__W);
3887}
3888
3889static __inline__ __m512d __DEFAULT_FN_ATTRS512
3890_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
3891{
3892 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3893 (__v8df)_mm512_unpacklo_pd(__A, __B),
3894 (__v8df)_mm512_setzero_pd());
3895}
3896
3897static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3898_mm512_unpackhi_ps(__m512 __a, __m512 __b) {
3899 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
3900 2, 18, 3, 19,
3901 2+4, 18+4, 3+4, 19+4,
3902 2+8, 18+8, 3+8, 19+8,
3903 2+12, 18+12, 3+12, 19+12);
3904}
3905
3906static __inline__ __m512 __DEFAULT_FN_ATTRS512
3907_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
3908{
3909 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3910 (__v16sf)_mm512_unpackhi_ps(__A, __B),
3911 (__v16sf)__W);
3912}
3913
3914static __inline__ __m512 __DEFAULT_FN_ATTRS512
3915_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
3916{
3917 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3918 (__v16sf)_mm512_unpackhi_ps(__A, __B),
3919 (__v16sf)_mm512_setzero_ps());
3920}
3921
3922static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3923_mm512_unpacklo_ps(__m512 __a, __m512 __b) {
3924 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
3925 0, 16, 1, 17,
3926 0+4, 16+4, 1+4, 17+4,
3927 0+8, 16+8, 1+8, 17+8,
3928 0+12, 16+12, 1+12, 17+12);
3929}
3930
3931static __inline__ __m512 __DEFAULT_FN_ATTRS512
3932_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
3933{
3934 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3935 (__v16sf)_mm512_unpacklo_ps(__A, __B),
3936 (__v16sf)__W);
3937}
3938
3939static __inline__ __m512 __DEFAULT_FN_ATTRS512
3940_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
3941{
3942 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3943 (__v16sf)_mm512_unpacklo_ps(__A, __B),
3944 (__v16sf)_mm512_setzero_ps());
3945}
3946
3947static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3948_mm512_unpackhi_epi32(__m512i __A, __m512i __B) {
3949 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3950 2, 18, 3, 19,
3951 2+4, 18+4, 3+4, 19+4,
3952 2+8, 18+8, 3+8, 19+8,
3953 2+12, 18+12, 3+12, 19+12);
3954}
3955
3956static __inline__ __m512i __DEFAULT_FN_ATTRS512
3957_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
3958{
3959 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3960 (__v16si)_mm512_unpackhi_epi32(__A, __B),
3961 (__v16si)__W);
3962}
3963
3964static __inline__ __m512i __DEFAULT_FN_ATTRS512
3965_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
3966{
3967 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3968 (__v16si)_mm512_unpackhi_epi32(__A, __B),
3969 (__v16si)_mm512_setzero_si512());
3970}
3971
3972static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3973_mm512_unpacklo_epi32(__m512i __A, __m512i __B) {
3974 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3975 0, 16, 1, 17,
3976 0+4, 16+4, 1+4, 17+4,
3977 0+8, 16+8, 1+8, 17+8,
3978 0+12, 16+12, 1+12, 17+12);
3979}
3980
3981static __inline__ __m512i __DEFAULT_FN_ATTRS512
3982_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
3983{
3984 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3985 (__v16si)_mm512_unpacklo_epi32(__A, __B),
3986 (__v16si)__W);
3987}
3988
3989static __inline__ __m512i __DEFAULT_FN_ATTRS512
3990_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
3991{
3992 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3993 (__v16si)_mm512_unpacklo_epi32(__A, __B),
3994 (__v16si)_mm512_setzero_si512());
3995}
3996
3997static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3998_mm512_unpackhi_epi64(__m512i __A, __m512i __B) {
3999 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4000 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4001}
4002
4003static __inline__ __m512i __DEFAULT_FN_ATTRS512
4004_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4005{
4006 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4007 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4008 (__v8di)__W);
4009}
4010
4011static __inline__ __m512i __DEFAULT_FN_ATTRS512
4012_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4013{
4014 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4015 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4016 (__v8di)_mm512_setzero_si512());
4017}
4018
4019static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4020_mm512_unpacklo_epi64(__m512i __A, __m512i __B) {
4021 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4022 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4023}
4024
4025static __inline__ __m512i __DEFAULT_FN_ATTRS512
4026_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4027{
4028 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4029 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4030 (__v8di)__W);
4031}
4032
4033static __inline__ __m512i __DEFAULT_FN_ATTRS512
4034_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4035{
4036 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4037 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4038 (__v8di)_mm512_setzero_si512());
4039}
4040
4041
4042/* SIMD load ops */
4043
4044static __inline __m512i __DEFAULT_FN_ATTRS512
4046{
4047 struct __loadu_si512 {
4048 __m512i_u __v;
4049 } __attribute__((__packed__, __may_alias__));
4050 return ((const struct __loadu_si512*)__P)->__v;
4051}
4052
4053static __inline __m512i __DEFAULT_FN_ATTRS512
4055{
4056 struct __loadu_epi32 {
4057 __m512i_u __v;
4058 } __attribute__((__packed__, __may_alias__));
4059 return ((const struct __loadu_epi32*)__P)->__v;
4060}
4061
4062static __inline __m512i __DEFAULT_FN_ATTRS512
4063_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4064{
4065 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4066 (__v16si) __W,
4067 (__mmask16) __U);
4068}
4069
4070
4071static __inline __m512i __DEFAULT_FN_ATTRS512
4073{
4074 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4075 (__v16si)
4077 (__mmask16) __U);
4078}
4079
4080static __inline __m512i __DEFAULT_FN_ATTRS512
4082{
4083 struct __loadu_epi64 {
4084 __m512i_u __v;
4085 } __attribute__((__packed__, __may_alias__));
4086 return ((const struct __loadu_epi64*)__P)->__v;
4087}
4088
4089static __inline __m512i __DEFAULT_FN_ATTRS512
4090_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4091{
4092 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4093 (__v8di) __W,
4094 (__mmask8) __U);
4095}
4096
4097static __inline __m512i __DEFAULT_FN_ATTRS512
4099{
4100 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4101 (__v8di)
4103 (__mmask8) __U);
4104}
4105
4106static __inline __m512 __DEFAULT_FN_ATTRS512
4107_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4108{
4109 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4110 (__v16sf) __W,
4111 (__mmask16) __U);
4112}
4113
4114static __inline __m512 __DEFAULT_FN_ATTRS512
4116{
4117 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4118 (__v16sf)
4120 (__mmask16) __U);
4121}
4122
4123static __inline __m512d __DEFAULT_FN_ATTRS512
4124_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4125{
4126 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4127 (__v8df) __W,
4128 (__mmask8) __U);
4129}
4130
4131static __inline __m512d __DEFAULT_FN_ATTRS512
4133{
4134 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4135 (__v8df)
4137 (__mmask8) __U);
4138}
4139
4140static __inline __m512d __DEFAULT_FN_ATTRS512
4142{
4143 struct __loadu_pd {
4144 __m512d_u __v;
4145 } __attribute__((__packed__, __may_alias__));
4146 return ((const struct __loadu_pd*)__p)->__v;
4147}
4148
4149static __inline __m512 __DEFAULT_FN_ATTRS512
4151{
4152 struct __loadu_ps {
4153 __m512_u __v;
4154 } __attribute__((__packed__, __may_alias__));
4155 return ((const struct __loadu_ps*)__p)->__v;
4156}
4157
4158static __inline __m512 __DEFAULT_FN_ATTRS512
4160{
4161 return *(const __m512*)__p;
4162}
4163
4164static __inline __m512 __DEFAULT_FN_ATTRS512
4165_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4166{
4167 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4168 (__v16sf) __W,
4169 (__mmask16) __U);
4170}
4171
4172static __inline __m512 __DEFAULT_FN_ATTRS512
4174{
4175 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4176 (__v16sf)
4178 (__mmask16) __U);
4179}
4180
4181static __inline __m512d __DEFAULT_FN_ATTRS512
4183{
4184 return *(const __m512d*)__p;
4185}
4186
4187static __inline __m512d __DEFAULT_FN_ATTRS512
4188_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4189{
4190 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4191 (__v8df) __W,
4192 (__mmask8) __U);
4193}
4194
4195static __inline __m512d __DEFAULT_FN_ATTRS512
4197{
4198 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4199 (__v8df)
4201 (__mmask8) __U);
4202}
4203
4204static __inline __m512i __DEFAULT_FN_ATTRS512
4206{
4207 return *(const __m512i *) __P;
4208}
4209
4210static __inline __m512i __DEFAULT_FN_ATTRS512
4212{
4213 return *(const __m512i *) __P;
4214}
4215
4216static __inline __m512i __DEFAULT_FN_ATTRS512
4218{
4219 return *(const __m512i *) __P;
4220}
4221
4222/* SIMD store ops */
4223
4224static __inline void __DEFAULT_FN_ATTRS512
4225_mm512_storeu_epi64 (void *__P, __m512i __A)
4226{
4227 struct __storeu_epi64 {
4228 __m512i_u __v;
4229 } __attribute__((__packed__, __may_alias__));
4230 ((struct __storeu_epi64*)__P)->__v = __A;
4231}
4232
4233static __inline void __DEFAULT_FN_ATTRS512
4234_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4235{
4236 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4237 (__mmask8) __U);
4238}
4239
4240static __inline void __DEFAULT_FN_ATTRS512
4241_mm512_storeu_si512 (void *__P, __m512i __A)
4242{
4243 struct __storeu_si512 {
4244 __m512i_u __v;
4245 } __attribute__((__packed__, __may_alias__));
4246 ((struct __storeu_si512*)__P)->__v = __A;
4247}
4248
4249static __inline void __DEFAULT_FN_ATTRS512
4250_mm512_storeu_epi32 (void *__P, __m512i __A)
4251{
4252 struct __storeu_epi32 {
4253 __m512i_u __v;
4254 } __attribute__((__packed__, __may_alias__));
4255 ((struct __storeu_epi32*)__P)->__v = __A;
4256}
4257
4258static __inline void __DEFAULT_FN_ATTRS512
4260{
4261 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4262 (__mmask16) __U);
4263}
4264
4265static __inline void __DEFAULT_FN_ATTRS512
4266_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4267{
4268 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4269}
4270
4271static __inline void __DEFAULT_FN_ATTRS512
4272_mm512_storeu_pd(void *__P, __m512d __A)
4273{
4274 struct __storeu_pd {
4275 __m512d_u __v;
4276 } __attribute__((__packed__, __may_alias__));
4277 ((struct __storeu_pd*)__P)->__v = __A;
4278}
4279
4280static __inline void __DEFAULT_FN_ATTRS512
4281_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4282{
4283 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4284 (__mmask16) __U);
4285}
4286
4287static __inline void __DEFAULT_FN_ATTRS512
4288_mm512_storeu_ps(void *__P, __m512 __A)
4289{
4290 struct __storeu_ps {
4291 __m512_u __v;
4292 } __attribute__((__packed__, __may_alias__));
4293 ((struct __storeu_ps*)__P)->__v = __A;
4294}
4295
4296static __inline void __DEFAULT_FN_ATTRS512
4297_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4298{
4299 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4300}
4301
4302static __inline void __DEFAULT_FN_ATTRS512
4303_mm512_store_pd(void *__P, __m512d __A)
4304{
4305 *(__m512d*)__P = __A;
4306}
4307
4308static __inline void __DEFAULT_FN_ATTRS512
4309_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4310{
4311 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4312 (__mmask16) __U);
4313}
4314
4315static __inline void __DEFAULT_FN_ATTRS512
4316_mm512_store_ps(void *__P, __m512 __A)
4317{
4318 *(__m512*)__P = __A;
4319}
4320
4321static __inline void __DEFAULT_FN_ATTRS512
4322_mm512_store_si512 (void *__P, __m512i __A)
4323{
4324 *(__m512i *) __P = __A;
4325}
4326
4327static __inline void __DEFAULT_FN_ATTRS512
4328_mm512_store_epi32 (void *__P, __m512i __A)
4329{
4330 *(__m512i *) __P = __A;
4331}
4332
4333static __inline void __DEFAULT_FN_ATTRS512
4334_mm512_store_epi64 (void *__P, __m512i __A)
4335{
4336 *(__m512i *) __P = __A;
4337}
4338
4339/* Mask ops */
4340
4343 return __builtin_ia32_knothi(__M);
4344}
4345
4346/* Integer compare */
4347
4348#define _mm512_cmpeq_epi32_mask(A, B) \
4349 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4350#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4351 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4352#define _mm512_cmpge_epi32_mask(A, B) \
4353 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4354#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4355 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4356#define _mm512_cmpgt_epi32_mask(A, B) \
4357 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4358#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4359 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4360#define _mm512_cmple_epi32_mask(A, B) \
4361 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4362#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4363 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4364#define _mm512_cmplt_epi32_mask(A, B) \
4365 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4366#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4367 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4368#define _mm512_cmpneq_epi32_mask(A, B) \
4369 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4370#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4371 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4372
4373#define _mm512_cmpeq_epu32_mask(A, B) \
4374 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4375#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4376 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4377#define _mm512_cmpge_epu32_mask(A, B) \
4378 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4379#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4380 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4381#define _mm512_cmpgt_epu32_mask(A, B) \
4382 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4383#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4384 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4385#define _mm512_cmple_epu32_mask(A, B) \
4386 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4387#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4388 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4389#define _mm512_cmplt_epu32_mask(A, B) \
4390 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4391#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4392 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4393#define _mm512_cmpneq_epu32_mask(A, B) \
4394 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4395#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4396 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4397
4398#define _mm512_cmpeq_epi64_mask(A, B) \
4399 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4400#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4401 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4402#define _mm512_cmpge_epi64_mask(A, B) \
4403 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4404#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4405 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4406#define _mm512_cmpgt_epi64_mask(A, B) \
4407 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4408#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4409 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4410#define _mm512_cmple_epi64_mask(A, B) \
4411 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4412#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4413 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4414#define _mm512_cmplt_epi64_mask(A, B) \
4415 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4416#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4417 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4418#define _mm512_cmpneq_epi64_mask(A, B) \
4419 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4420#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4421 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4422
4423#define _mm512_cmpeq_epu64_mask(A, B) \
4424 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4425#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4426 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4427#define _mm512_cmpge_epu64_mask(A, B) \
4428 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4429#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4430 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4431#define _mm512_cmpgt_epu64_mask(A, B) \
4432 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4433#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4434 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4435#define _mm512_cmple_epu64_mask(A, B) \
4436 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4437#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4438 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4439#define _mm512_cmplt_epu64_mask(A, B) \
4440 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4441#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4442 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4443#define _mm512_cmpneq_epu64_mask(A, B) \
4444 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4445#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4446 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4447
4448static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4450 /* This function always performs a signed extension, but __v16qi is a char
4451 which may be signed or unsigned, so use __v16qs. */
4452 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4453}
4454
4455static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4456_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
4457 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4458 (__v16si)_mm512_cvtepi8_epi32(__A),
4459 (__v16si)__W);
4460}
4461
4462static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4464 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4465 (__v16si)_mm512_cvtepi8_epi32(__A),
4466 (__v16si)_mm512_setzero_si512());
4467}
4468
4469static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4471 /* This function always performs a signed extension, but __v16qi is a char
4472 which may be signed or unsigned, so use __v16qs. */
4473 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4474}
4475
4476static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4477_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4478 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4479 (__v8di)_mm512_cvtepi8_epi64(__A),
4480 (__v8di)__W);
4481}
4482
4483static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4485 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4486 (__v8di)_mm512_cvtepi8_epi64(__A),
4487 (__v8di)_mm512_setzero_si512 ());
4488}
4489
4490static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4492 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4493}
4494
4495static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4496_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
4497 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4498 (__v8di)_mm512_cvtepi32_epi64(__X),
4499 (__v8di)__W);
4500}
4501
4502static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4504 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4505 (__v8di)_mm512_cvtepi32_epi64(__X),
4506 (__v8di)_mm512_setzero_si512());
4507}
4508
4509static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4511 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4512}
4513
4514static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4515_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
4516 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4517 (__v16si)_mm512_cvtepi16_epi32(__A),
4518 (__v16si)__W);
4519}
4520
4521static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4523 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4524 (__v16si)_mm512_cvtepi16_epi32(__A),
4525 (__v16si)_mm512_setzero_si512 ());
4526}
4527
4528static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4530 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4531}
4532
4533static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4534_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4535 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4536 (__v8di)_mm512_cvtepi16_epi64(__A),
4537 (__v8di)__W);
4538}
4539
4540static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4542 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4543 (__v8di)_mm512_cvtepi16_epi64(__A),
4544 (__v8di)_mm512_setzero_si512());
4545}
4546
4547static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4549 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4550}
4551
4552static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4553_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
4554 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4555 (__v16si)_mm512_cvtepu8_epi32(__A),
4556 (__v16si)__W);
4557}
4558
4559static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4561 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4562 (__v16si)_mm512_cvtepu8_epi32(__A),
4563 (__v16si)_mm512_setzero_si512());
4564}
4565
4566static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4568 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4569}
4570
4571static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4572_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4573 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4574 (__v8di)_mm512_cvtepu8_epi64(__A),
4575 (__v8di)__W);
4576}
4577
4578static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4580 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4581 (__v8di)_mm512_cvtepu8_epi64(__A),
4582 (__v8di)_mm512_setzero_si512());
4583}
4584
4585static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4587 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4588}
4589
4590static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4591_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
4592 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4593 (__v8di)_mm512_cvtepu32_epi64(__X),
4594 (__v8di)__W);
4595}
4596
4597static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4599 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4600 (__v8di)_mm512_cvtepu32_epi64(__X),
4601 (__v8di)_mm512_setzero_si512());
4602}
4603
4604static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4606 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4607}
4608
4609static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4610_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
4611 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4612 (__v16si)_mm512_cvtepu16_epi32(__A),
4613 (__v16si)__W);
4614}
4615
4616static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4618 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4619 (__v16si)_mm512_cvtepu16_epi32(__A),
4620 (__v16si)_mm512_setzero_si512());
4621}
4622
4623static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4625 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4626}
4627
4628static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4629_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4630 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4631 (__v8di)_mm512_cvtepu16_epi64(__A),
4632 (__v8di)__W);
4633}
4634
4635static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4637 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4638 (__v8di)_mm512_cvtepu16_epi64(__A),
4639 (__v8di)_mm512_setzero_si512());
4640}
4641
4642static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4643_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4644{
4645 return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B);
4646}
4647
4648static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4649_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4650{
4651 return (__m512i)__builtin_ia32_selectd_512(__U,
4652 (__v16si)_mm512_rorv_epi32(__A, __B),
4653 (__v16si)__W);
4654}
4655
4656static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4657_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4658{
4659 return (__m512i)__builtin_ia32_selectd_512(__U,
4660 (__v16si)_mm512_rorv_epi32(__A, __B),
4661 (__v16si)_mm512_setzero_si512());
4662}
4663
4664static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4665_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4666{
4667 return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B);
4668}
4669
4670static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4671_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4672{
4673 return (__m512i)__builtin_ia32_selectq_512(__U,
4674 (__v8di)_mm512_rorv_epi64(__A, __B),
4675 (__v8di)__W);
4676}
4677
4678static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4679_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4680{
4681 return (__m512i)__builtin_ia32_selectq_512(__U,
4682 (__v8di)_mm512_rorv_epi64(__A, __B),
4683 (__v8di)_mm512_setzero_si512());
4684}
4685
4686
4687
4688#define _mm512_cmp_epi32_mask(a, b, p) \
4689 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4690 (__v16si)(__m512i)(b), (int)(p), \
4691 (__mmask16)-1))
4692
4693#define _mm512_cmp_epu32_mask(a, b, p) \
4694 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4695 (__v16si)(__m512i)(b), (int)(p), \
4696 (__mmask16)-1))
4697
4698#define _mm512_cmp_epi64_mask(a, b, p) \
4699 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4700 (__v8di)(__m512i)(b), (int)(p), \
4701 (__mmask8)-1))
4702
4703#define _mm512_cmp_epu64_mask(a, b, p) \
4704 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4705 (__v8di)(__m512i)(b), (int)(p), \
4706 (__mmask8)-1))
4707
4708#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
4709 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4710 (__v16si)(__m512i)(b), (int)(p), \
4711 (__mmask16)(m)))
4712
4713#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
4714 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4715 (__v16si)(__m512i)(b), (int)(p), \
4716 (__mmask16)(m)))
4717
4718#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
4719 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4720 (__v8di)(__m512i)(b), (int)(p), \
4721 (__mmask8)(m)))
4722
4723#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
4724 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4725 (__v8di)(__m512i)(b), (int)(p), \
4726 (__mmask8)(m)))
4727
4728#define _mm512_rol_epi32(a, b) \
4729 ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
4730
4731#define _mm512_mask_rol_epi32(W, U, a, b) \
4732 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4733 (__v16si)_mm512_rol_epi32((a), (b)), \
4734 (__v16si)(__m512i)(W)))
4735
4736#define _mm512_maskz_rol_epi32(U, a, b) \
4737 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4738 (__v16si)_mm512_rol_epi32((a), (b)), \
4739 (__v16si)_mm512_setzero_si512()))
4740
4741#define _mm512_rol_epi64(a, b) \
4742 ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
4743
4744#define _mm512_mask_rol_epi64(W, U, a, b) \
4745 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4746 (__v8di)_mm512_rol_epi64((a), (b)), \
4747 (__v8di)(__m512i)(W)))
4748
4749#define _mm512_maskz_rol_epi64(U, a, b) \
4750 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4751 (__v8di)_mm512_rol_epi64((a), (b)), \
4752 (__v8di)_mm512_setzero_si512()))
4753
4754static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4755_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4756{
4757 return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B);
4758}
4759
4760static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4761_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4762{
4763 return (__m512i)__builtin_ia32_selectd_512(__U,
4764 (__v16si)_mm512_rolv_epi32(__A, __B),
4765 (__v16si)__W);
4766}
4767
4768static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4769_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4770{
4771 return (__m512i)__builtin_ia32_selectd_512(__U,
4772 (__v16si)_mm512_rolv_epi32(__A, __B),
4773 (__v16si)_mm512_setzero_si512());
4774}
4775
4776static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4777_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4778{
4779 return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B);
4780}
4781
4782static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4783_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4784{
4785 return (__m512i)__builtin_ia32_selectq_512(__U,
4786 (__v8di)_mm512_rolv_epi64(__A, __B),
4787 (__v8di)__W);
4788}
4789
4790static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4791_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4792{
4793 return (__m512i)__builtin_ia32_selectq_512(__U,
4794 (__v8di)_mm512_rolv_epi64(__A, __B),
4795 (__v8di)_mm512_setzero_si512());
4796}
4797
4798#define _mm512_ror_epi32(A, B) \
4799 ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
4800
4801#define _mm512_mask_ror_epi32(W, U, A, B) \
4802 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4803 (__v16si)_mm512_ror_epi32((A), (B)), \
4804 (__v16si)(__m512i)(W)))
4805
4806#define _mm512_maskz_ror_epi32(U, A, B) \
4807 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4808 (__v16si)_mm512_ror_epi32((A), (B)), \
4809 (__v16si)_mm512_setzero_si512()))
4810
4811#define _mm512_ror_epi64(A, B) \
4812 ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
4813
4814#define _mm512_mask_ror_epi64(W, U, A, B) \
4815 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4816 (__v8di)_mm512_ror_epi64((A), (B)), \
4817 (__v8di)(__m512i)(W)))
4818
4819#define _mm512_maskz_ror_epi64(U, A, B) \
4820 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4821 (__v8di)_mm512_ror_epi64((A), (B)), \
4822 (__v8di)_mm512_setzero_si512()))
4823
4824static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4825_mm512_slli_epi32(__m512i __A, unsigned int __B) {
4826 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B);
4827}
4828
4829static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4830_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
4831 unsigned int __B) {
4832 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4833 (__v16si)_mm512_slli_epi32(__A, __B),
4834 (__v16si)__W);
4835}
4836
4837static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4838_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
4839 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4840 (__v16si)_mm512_slli_epi32(__A, __B),
4841 (__v16si)_mm512_setzero_si512());
4842}
4843
4844static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4845_mm512_slli_epi64(__m512i __A, unsigned int __B) {
4846 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B);
4847}
4848
4849static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4850_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
4851 unsigned int __B) {
4852 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4853 (__v8di)_mm512_slli_epi64(__A, __B),
4854 (__v8di)__W);
4855}
4856
4857static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4858_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
4859 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4860 (__v8di)_mm512_slli_epi64(__A, __B),
4861 (__v8di)_mm512_setzero_si512());
4862}
4863
4864static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4865_mm512_srli_epi32(__m512i __A, unsigned int __B) {
4866 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B);
4867}
4868
4869static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4870_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
4871 unsigned int __B) {
4872 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4873 (__v16si)_mm512_srli_epi32(__A, __B),
4874 (__v16si)__W);
4875}
4876
4877static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4878_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
4879 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4880 (__v16si)_mm512_srli_epi32(__A, __B),
4881 (__v16si)_mm512_setzero_si512());
4882}
4883
4884static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4885_mm512_srli_epi64(__m512i __A, unsigned int __B) {
4886 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B);
4887}
4888
4889static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4890_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
4891 unsigned int __B) {
4892 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4893 (__v8di)_mm512_srli_epi64(__A, __B),
4894 (__v8di)__W);
4895}
4896
4897static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4898_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
4899 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4900 (__v8di)_mm512_srli_epi64(__A, __B),
4901 (__v8di)_mm512_setzero_si512());
4902}
4903
4904static __inline__ __m512i __DEFAULT_FN_ATTRS512
4905_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4906{
4907 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4908 (__v16si) __W,
4909 (__mmask16) __U);
4910}
4911
4912static __inline__ __m512i __DEFAULT_FN_ATTRS512
4914{
4915 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4916 (__v16si)
4918 (__mmask16) __U);
4919}
4920
4921static __inline__ void __DEFAULT_FN_ATTRS512
4922_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
4923{
4924 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
4925 (__mmask16) __U);
4926}
4927
4928static __inline__ __m512i __DEFAULT_FN_ATTRS512
4929_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
4930{
4931 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
4932 (__v16si) __A,
4933 (__v16si) __W);
4934}
4935
4936static __inline__ __m512i __DEFAULT_FN_ATTRS512
4938{
4939 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
4940 (__v16si) __A,
4941 (__v16si) _mm512_setzero_si512 ());
4942}
4943
4944static __inline__ __m512i __DEFAULT_FN_ATTRS512
4945_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
4946{
4947 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
4948 (__v8di) __A,
4949 (__v8di) __W);
4950}
4951
4952static __inline__ __m512i __DEFAULT_FN_ATTRS512
4954{
4955 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
4956 (__v8di) __A,
4957 (__v8di) _mm512_setzero_si512 ());
4958}
4959
4960static __inline__ __m512i __DEFAULT_FN_ATTRS512
4961_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4962{
4963 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4964 (__v8di) __W,
4965 (__mmask8) __U);
4966}
4967
4968static __inline__ __m512i __DEFAULT_FN_ATTRS512
4970{
4971 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4972 (__v8di)
4974 (__mmask8) __U);
4975}
4976
4977static __inline__ void __DEFAULT_FN_ATTRS512
4978_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
4979{
4980 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
4981 (__mmask8) __U);
4982}
4983
4984static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
4986{
4987 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
4988 0, 0, 2, 2, 4, 4, 6, 6);
4989}
4990
4991static __inline__ __m512d __DEFAULT_FN_ATTRS512
4992_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
4993{
4994 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
4995 (__v8df)_mm512_movedup_pd(__A),
4996 (__v8df)__W);
4997}
4998
4999static __inline__ __m512d __DEFAULT_FN_ATTRS512
5001{
5002 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5003 (__v8df)_mm512_movedup_pd(__A),
5004 (__v8df)_mm512_setzero_pd());
5005}
5006
5007#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5008 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5009 (__v8df)(__m512d)(B), \
5010 (__v8di)(__m512i)(C), (int)(imm), \
5011 (__mmask8)-1, (int)(R)))
5012
5013#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5014 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5015 (__v8df)(__m512d)(B), \
5016 (__v8di)(__m512i)(C), (int)(imm), \
5017 (__mmask8)(U), (int)(R)))
5018
5019#define _mm512_fixupimm_pd(A, B, C, imm) \
5020 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5021 (__v8df)(__m512d)(B), \
5022 (__v8di)(__m512i)(C), (int)(imm), \
5023 (__mmask8)-1, \
5024 _MM_FROUND_CUR_DIRECTION))
5025
5026#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5027 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5028 (__v8df)(__m512d)(B), \
5029 (__v8di)(__m512i)(C), (int)(imm), \
5030 (__mmask8)(U), \
5031 _MM_FROUND_CUR_DIRECTION))
5032
5033#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5034 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5035 (__v8df)(__m512d)(B), \
5036 (__v8di)(__m512i)(C), \
5037 (int)(imm), (__mmask8)(U), \
5038 (int)(R)))
5039
5040#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5041 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5042 (__v8df)(__m512d)(B), \
5043 (__v8di)(__m512i)(C), \
5044 (int)(imm), (__mmask8)(U), \
5045 _MM_FROUND_CUR_DIRECTION))
5046
5047#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5048 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5049 (__v16sf)(__m512)(B), \
5050 (__v16si)(__m512i)(C), (int)(imm), \
5051 (__mmask16)-1, (int)(R)))
5052
5053#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5054 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5055 (__v16sf)(__m512)(B), \
5056 (__v16si)(__m512i)(C), (int)(imm), \
5057 (__mmask16)(U), (int)(R)))
5058
5059#define _mm512_fixupimm_ps(A, B, C, imm) \
5060 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5061 (__v16sf)(__m512)(B), \
5062 (__v16si)(__m512i)(C), (int)(imm), \
5063 (__mmask16)-1, \
5064 _MM_FROUND_CUR_DIRECTION))
5065
5066#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5067 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5068 (__v16sf)(__m512)(B), \
5069 (__v16si)(__m512i)(C), (int)(imm), \
5070 (__mmask16)(U), \
5071 _MM_FROUND_CUR_DIRECTION))
5072
5073#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5074 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5075 (__v16sf)(__m512)(B), \
5076 (__v16si)(__m512i)(C), \
5077 (int)(imm), (__mmask16)(U), \
5078 (int)(R)))
5079
5080#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5081 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5082 (__v16sf)(__m512)(B), \
5083 (__v16si)(__m512i)(C), \
5084 (int)(imm), (__mmask16)(U), \
5085 _MM_FROUND_CUR_DIRECTION))
5086
5087#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5088 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5089 (__v2df)(__m128d)(B), \
5090 (__v2di)(__m128i)(C), (int)(imm), \
5091 (__mmask8)-1, (int)(R)))
5092
5093#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5094 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5095 (__v2df)(__m128d)(B), \
5096 (__v2di)(__m128i)(C), (int)(imm), \
5097 (__mmask8)(U), (int)(R)))
5098
5099#define _mm_fixupimm_sd(A, B, C, imm) \
5100 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5101 (__v2df)(__m128d)(B), \
5102 (__v2di)(__m128i)(C), (int)(imm), \
5103 (__mmask8)-1, \
5104 _MM_FROUND_CUR_DIRECTION))
5105
5106#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5107 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5108 (__v2df)(__m128d)(B), \
5109 (__v2di)(__m128i)(C), (int)(imm), \
5110 (__mmask8)(U), \
5111 _MM_FROUND_CUR_DIRECTION))
5112
5113#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5114 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5115 (__v2df)(__m128d)(B), \
5116 (__v2di)(__m128i)(C), (int)(imm), \
5117 (__mmask8)(U), (int)(R)))
5118
5119#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5120 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5121 (__v2df)(__m128d)(B), \
5122 (__v2di)(__m128i)(C), (int)(imm), \
5123 (__mmask8)(U), \
5124 _MM_FROUND_CUR_DIRECTION))
5125
5126#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5127 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5128 (__v4sf)(__m128)(B), \
5129 (__v4si)(__m128i)(C), (int)(imm), \
5130 (__mmask8)-1, (int)(R)))
5131
5132#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5133 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5134 (__v4sf)(__m128)(B), \
5135 (__v4si)(__m128i)(C), (int)(imm), \
5136 (__mmask8)(U), (int)(R)))
5137
5138#define _mm_fixupimm_ss(A, B, C, imm) \
5139 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5140 (__v4sf)(__m128)(B), \
5141 (__v4si)(__m128i)(C), (int)(imm), \
5142 (__mmask8)-1, \
5143 _MM_FROUND_CUR_DIRECTION))
5144
5145#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5146 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5147 (__v4sf)(__m128)(B), \
5148 (__v4si)(__m128i)(C), (int)(imm), \
5149 (__mmask8)(U), \
5150 _MM_FROUND_CUR_DIRECTION))
5151
5152#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5153 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5154 (__v4sf)(__m128)(B), \
5155 (__v4si)(__m128i)(C), (int)(imm), \
5156 (__mmask8)(U), (int)(R)))
5157
5158#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5159 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5160 (__v4sf)(__m128)(B), \
5161 (__v4si)(__m128i)(C), (int)(imm), \
5162 (__mmask8)(U), \
5163 _MM_FROUND_CUR_DIRECTION))
5164
5165#define _mm_getexp_round_sd(A, B, R) \
5166 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5167 (__v2df)(__m128d)(B), \
5168 (__v2df)_mm_setzero_pd(), \
5169 (__mmask8)-1, (int)(R)))
5170
5171
5172static __inline__ __m128d __DEFAULT_FN_ATTRS128
5173_mm_getexp_sd (__m128d __A, __m128d __B)
5174{
5175 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5176 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5177}
5178
5179static __inline__ __m128d __DEFAULT_FN_ATTRS128
5180_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5181{
5182 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5183 (__v2df) __B,
5184 (__v2df) __W,
5185 (__mmask8) __U,
5187}
5188
5189#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5190 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5191 (__v2df)(__m128d)(B), \
5192 (__v2df)(__m128d)(W), \
5193 (__mmask8)(U), (int)(R)))
5194
5195static __inline__ __m128d __DEFAULT_FN_ATTRS128
5196_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5197{
5198 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5199 (__v2df) __B,
5200 (__v2df) _mm_setzero_pd (),
5201 (__mmask8) __U,
5203}
5204
5205#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5206 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5207 (__v2df)(__m128d)(B), \
5208 (__v2df)_mm_setzero_pd(), \
5209 (__mmask8)(U), (int)(R)))
5210
5211#define _mm_getexp_round_ss(A, B, R) \
5212 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5213 (__v4sf)(__m128)(B), \
5214 (__v4sf)_mm_setzero_ps(), \
5215 (__mmask8)-1, (int)(R)))
5216
5217static __inline__ __m128 __DEFAULT_FN_ATTRS128
5218_mm_getexp_ss (__m128 __A, __m128 __B)
5219{
5220 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5221 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5222}
5223
5224static __inline__ __m128 __DEFAULT_FN_ATTRS128
5225_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5226{
5227 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5228 (__v4sf) __B,
5229 (__v4sf) __W,
5230 (__mmask8) __U,
5232}
5233
5234#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5235 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5236 (__v4sf)(__m128)(B), \
5237 (__v4sf)(__m128)(W), \
5238 (__mmask8)(U), (int)(R)))
5239
5240static __inline__ __m128 __DEFAULT_FN_ATTRS128
5241_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5242{
5243 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5244 (__v4sf) __B,
5245 (__v4sf) _mm_setzero_ps (),
5246 (__mmask8) __U,
5248}
5249
5250#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5251 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5252 (__v4sf)(__m128)(B), \
5253 (__v4sf)_mm_setzero_ps(), \
5254 (__mmask8)(U), (int)(R)))
5255
5256#define _mm_getmant_round_sd(A, B, C, D, R) \
5257 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5258 (__v2df)(__m128d)(B), \
5259 (int)(((D)<<2) | (C)), \
5260 (__v2df)_mm_setzero_pd(), \
5261 (__mmask8)-1, (int)(R)))
5262
5263#define _mm_getmant_sd(A, B, C, D) \
5264 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5265 (__v2df)(__m128d)(B), \
5266 (int)(((D)<<2) | (C)), \
5267 (__v2df)_mm_setzero_pd(), \
5268 (__mmask8)-1, \
5269 _MM_FROUND_CUR_DIRECTION))
5270
5271#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5272 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5273 (__v2df)(__m128d)(B), \
5274 (int)(((D)<<2) | (C)), \
5275 (__v2df)(__m128d)(W), \
5276 (__mmask8)(U), \
5277 _MM_FROUND_CUR_DIRECTION))
5278
5279#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5280 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5281 (__v2df)(__m128d)(B), \
5282 (int)(((D)<<2) | (C)), \
5283 (__v2df)(__m128d)(W), \
5284 (__mmask8)(U), (int)(R)))
5285
5286#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5287 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5288 (__v2df)(__m128d)(B), \
5289 (int)(((D)<<2) | (C)), \
5290 (__v2df)_mm_setzero_pd(), \
5291 (__mmask8)(U), \
5292 _MM_FROUND_CUR_DIRECTION))
5293
5294#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5295 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5296 (__v2df)(__m128d)(B), \
5297 (int)(((D)<<2) | (C)), \
5298 (__v2df)_mm_setzero_pd(), \
5299 (__mmask8)(U), (int)(R)))
5300
5301#define _mm_getmant_round_ss(A, B, C, D, R) \
5302 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5303 (__v4sf)(__m128)(B), \
5304 (int)(((D)<<2) | (C)), \
5305 (__v4sf)_mm_setzero_ps(), \
5306 (__mmask8)-1, (int)(R)))
5307
5308#define _mm_getmant_ss(A, B, C, D) \
5309 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5310 (__v4sf)(__m128)(B), \
5311 (int)(((D)<<2) | (C)), \
5312 (__v4sf)_mm_setzero_ps(), \
5313 (__mmask8)-1, \
5314 _MM_FROUND_CUR_DIRECTION))
5315
5316#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5317 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5318 (__v4sf)(__m128)(B), \
5319 (int)(((D)<<2) | (C)), \
5320 (__v4sf)(__m128)(W), \
5321 (__mmask8)(U), \
5322 _MM_FROUND_CUR_DIRECTION))
5323
5324#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5325 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5326 (__v4sf)(__m128)(B), \
5327 (int)(((D)<<2) | (C)), \
5328 (__v4sf)(__m128)(W), \
5329 (__mmask8)(U), (int)(R)))
5330
5331#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5332 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5333 (__v4sf)(__m128)(B), \
5334 (int)(((D)<<2) | (C)), \
5335 (__v4sf)_mm_setzero_ps(), \
5336 (__mmask8)(U), \
5337 _MM_FROUND_CUR_DIRECTION))
5338
5339#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5340 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5341 (__v4sf)(__m128)(B), \
5342 (int)(((D)<<2) | (C)), \
5343 (__v4sf)_mm_setzero_ps(), \
5344 (__mmask8)(U), (int)(R)))
5345
5346static __inline__ __mmask16
5350
5351#define _mm_comi_round_sd(A, B, P, R) \
5352 ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5353 (int)(P), (int)(R)))
5354
5355#define _mm_comi_round_ss(A, B, P, R) \
5356 ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5357 (int)(P), (int)(R)))
5358
5359#ifdef __x86_64__
5360#define _mm_cvt_roundsd_si64(A, R) \
5361 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5362#endif
5363
5364static __inline__ __m512i
5366 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5367}
5368
5369static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5370_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
5371 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5372 (__v16si)_mm512_sll_epi32(__A, __B),
5373 (__v16si)__W);
5374}
5375
5376static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5377_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
5378 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5379 (__v16si)_mm512_sll_epi32(__A, __B),
5380 (__v16si)_mm512_setzero_si512());
5381}
5382
5383static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5384_mm512_sll_epi64(__m512i __A, __m128i __B) {
5385 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5386}
5387
5388static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5389_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
5390 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5391 (__v8di)_mm512_sll_epi64(__A, __B),
5392 (__v8di)__W);
5393}
5394
5395static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5396_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
5397 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5398 (__v8di)_mm512_sll_epi64(__A, __B),
5399 (__v8di)_mm512_setzero_si512());
5400}
5401
5402static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5403_mm512_sllv_epi32(__m512i __X, __m512i __Y) {
5404 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5405}
5406
5407static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5408_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5409 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5410 (__v16si)_mm512_sllv_epi32(__X, __Y),
5411 (__v16si)__W);
5412}
5413
5414static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5415_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5416 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5417 (__v16si)_mm512_sllv_epi32(__X, __Y),
5418 (__v16si)_mm512_setzero_si512());
5419}
5420
5421static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5422_mm512_sllv_epi64(__m512i __X, __m512i __Y)
5423{
5424 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5425}
5426
5427static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5428_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5429{
5430 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5431 (__v8di)_mm512_sllv_epi64(__X, __Y),
5432 (__v8di)__W);
5433}
5434
5435static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5436_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5437{
5438 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5439 (__v8di)_mm512_sllv_epi64(__X, __Y),
5440 (__v8di)_mm512_setzero_si512());
5441}
5442
5443static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5444_mm512_sra_epi32(__m512i __A, __m128i __B) {
5445 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5446}
5447
5448static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5449_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
5450 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5451 (__v16si)_mm512_sra_epi32(__A, __B),
5452 (__v16si)__W);
5453}
5454
5455static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5456_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
5457 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5458 (__v16si)_mm512_sra_epi32(__A, __B),
5459 (__v16si)_mm512_setzero_si512());
5460}
5461
5462static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5463_mm512_sra_epi64(__m512i __A, __m128i __B) {
5464 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5465}
5466
5467static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5468_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
5469 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5470 (__v8di)_mm512_sra_epi64(__A, __B),
5471 (__v8di)__W);
5472}
5473
5474static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5475_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
5476 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5477 (__v8di)_mm512_sra_epi64(__A, __B),
5478 (__v8di)_mm512_setzero_si512());
5479}
5480
5481static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5482_mm512_srav_epi32(__m512i __X, __m512i __Y) {
5483 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5484}
5485
5486static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5487_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5488 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5489 (__v16si)_mm512_srav_epi32(__X, __Y),
5490 (__v16si)__W);
5491}
5492
5493static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5494_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5495 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5496 (__v16si)_mm512_srav_epi32(__X, __Y),
5497 (__v16si)_mm512_setzero_si512());
5498}
5499
5500static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5501_mm512_srav_epi64(__m512i __X, __m512i __Y)
5502{
5503 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5504}
5505
5506static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5507_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5508{
5509 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5510 (__v8di)_mm512_srav_epi64(__X, __Y),
5511 (__v8di)__W);
5512}
5513
5514static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5515_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5516{
5517 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5518 (__v8di)_mm512_srav_epi64(__X, __Y),
5519 (__v8di)_mm512_setzero_si512());
5520}
5521
5522static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5523_mm512_srl_epi32(__m512i __A, __m128i __B) {
5524 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5525}
5526
5527static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5528_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
5529 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5530 (__v16si)_mm512_srl_epi32(__A, __B),
5531 (__v16si)__W);
5532}
5533
5534static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5535_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
5536 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5537 (__v16si)_mm512_srl_epi32(__A, __B),
5538 (__v16si)_mm512_setzero_si512());
5539}
5540
5541static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5542_mm512_srl_epi64(__m512i __A, __m128i __B) {
5543 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5544}
5545
5546static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5547_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
5548 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5549 (__v8di)_mm512_srl_epi64(__A, __B),
5550 (__v8di)__W);
5551}
5552
5553static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5554_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
5555 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5556 (__v8di)_mm512_srl_epi64(__A, __B),
5557 (__v8di)_mm512_setzero_si512());
5558}
5559
5560static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5561_mm512_srlv_epi32(__m512i __X, __m512i __Y) {
5562 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5563}
5564
5565static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5566_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5567 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5568 (__v16si)_mm512_srlv_epi32(__X, __Y),
5569 (__v16si)__W);
5570}
5571
5572static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5573_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5574 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5575 (__v16si)_mm512_srlv_epi32(__X, __Y),
5576 (__v16si)_mm512_setzero_si512());
5577}
5578
5579static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5580_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5581{
5582 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5583}
5584
5585static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5586_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5587{
5588 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5589 (__v8di)_mm512_srlv_epi64(__X, __Y),
5590 (__v8di)__W);
5591}
5592
5593static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5594_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5595{
5596 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5597 (__v8di)_mm512_srlv_epi64(__X, __Y),
5598 (__v8di)_mm512_setzero_si512());
5599}
5600
5601/// \enum _MM_TERNLOG_ENUM
5602/// A helper to represent the ternary logic operations among vector \a A,
5603/// \a B and \a C. The representation is passed to \a imm.
5609
5610#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5611 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5612 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5613 (unsigned char)(imm), (__mmask16)-1))
5614
5615#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5616 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5617 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5618 (unsigned char)(imm), (__mmask16)(U)))
5619
5620#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5621 ((__m512i)__builtin_ia32_pternlogd512_maskz( \
5622 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5623 (unsigned char)(imm), (__mmask16)(U)))
5624
5625#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5626 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5627 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5628 (unsigned char)(imm), (__mmask8)-1))
5629
5630#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5631 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5632 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5633 (unsigned char)(imm), (__mmask8)(U)))
5634
5635#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5636 ((__m512i)__builtin_ia32_pternlogq512_maskz( \
5637 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5638 (unsigned char)(imm), (__mmask8)(U)))
5639
5640#ifdef __x86_64__
5641#define _mm_cvt_roundsd_i64(A, R) \
5642 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5643#endif
5644
5645#define _mm_cvt_roundsd_si32(A, R) \
5646 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5647
5648#define _mm_cvt_roundsd_i32(A, R) \
5649 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5650
5651#define _mm_cvt_roundsd_u32(A, R) \
5652 ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5653
5654static __inline__ unsigned __DEFAULT_FN_ATTRS128
5655_mm_cvtsd_u32 (__m128d __A)
5656{
5657 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5659}
5660
5661#ifdef __x86_64__
5662#define _mm_cvt_roundsd_u64(A, R) \
5663 ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5664 (int)(R)))
5665
5666static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5667_mm_cvtsd_u64 (__m128d __A)
5668{
5669 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5670 __A,
5672}
5673#endif
5674
5675#define _mm_cvt_roundss_si32(A, R) \
5676 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5677
5678#define _mm_cvt_roundss_i32(A, R) \
5679 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5680
5681#ifdef __x86_64__
5682#define _mm_cvt_roundss_si64(A, R) \
5683 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5684
5685#define _mm_cvt_roundss_i64(A, R) \
5686 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5687#endif
5688
5689#define _mm_cvt_roundss_u32(A, R) \
5690 ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
5691
5692static __inline__ unsigned __DEFAULT_FN_ATTRS128
5693_mm_cvtss_u32 (__m128 __A)
5694{
5695 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
5697}
5698
5699#ifdef __x86_64__
5700#define _mm_cvt_roundss_u64(A, R) \
5701 ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
5702 (int)(R)))
5703
5704static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5705_mm_cvtss_u64 (__m128 __A)
5706{
5707 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
5708 __A,
5710}
5711#endif
5712
5713#define _mm_cvtt_roundsd_i32(A, R) \
5714 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5715
5716#define _mm_cvtt_roundsd_si32(A, R) \
5717 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5718
5719static __inline__ int __DEFAULT_FN_ATTRS128
5720_mm_cvttsd_i32 (__m128d __A)
5721{
5722 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
5724}
5725
5726#ifdef __x86_64__
5727#define _mm_cvtt_roundsd_si64(A, R) \
5728 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5729
5730#define _mm_cvtt_roundsd_i64(A, R) \
5731 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5732
5733static __inline__ long long __DEFAULT_FN_ATTRS128
5734_mm_cvttsd_i64 (__m128d __A)
5735{
5736 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
5738}
5739#endif
5740
5741#define _mm_cvtt_roundsd_u32(A, R) \
5742 ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5743
5744static __inline__ unsigned __DEFAULT_FN_ATTRS128
5745_mm_cvttsd_u32 (__m128d __A)
5746{
5747 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
5749}
5750
5751#ifdef __x86_64__
5752#define _mm_cvtt_roundsd_u64(A, R) \
5753 ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
5754 (int)(R)))
5755
5756static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5757_mm_cvttsd_u64 (__m128d __A)
5758{
5759 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
5760 __A,
5762}
5763#endif
5764
5765#define _mm_cvtt_roundss_i32(A, R) \
5766 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5767
5768#define _mm_cvtt_roundss_si32(A, R) \
5769 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5770
5771static __inline__ int __DEFAULT_FN_ATTRS128
5772_mm_cvttss_i32 (__m128 __A)
5773{
5774 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
5776}
5777
5778#ifdef __x86_64__
5779#define _mm_cvtt_roundss_i64(A, R) \
5780 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5781
5782#define _mm_cvtt_roundss_si64(A, R) \
5783 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5784
5785static __inline__ long long __DEFAULT_FN_ATTRS128
5786_mm_cvttss_i64 (__m128 __A)
5787{
5788 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
5790}
5791#endif
5792
5793#define _mm_cvtt_roundss_u32(A, R) \
5794 ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
5795
5796static __inline__ unsigned __DEFAULT_FN_ATTRS128
5797_mm_cvttss_u32 (__m128 __A)
5798{
5799 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
5801}
5802
5803#ifdef __x86_64__
5804#define _mm_cvtt_roundss_u64(A, R) \
5805 ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
5806 (int)(R)))
5807
5808static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5809_mm_cvttss_u64 (__m128 __A)
5810{
5811 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
5812 __A,
5814}
5815#endif
5816
5817#define _mm512_permute_pd(X, C) \
5818 ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
5819
5820#define _mm512_mask_permute_pd(W, U, X, C) \
5821 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5822 (__v8df)_mm512_permute_pd((X), (C)), \
5823 (__v8df)(__m512d)(W)))
5824
5825#define _mm512_maskz_permute_pd(U, X, C) \
5826 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5827 (__v8df)_mm512_permute_pd((X), (C)), \
5828 (__v8df)_mm512_setzero_pd()))
5829
5830#define _mm512_permute_ps(X, C) \
5831 ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
5832
5833#define _mm512_mask_permute_ps(W, U, X, C) \
5834 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5835 (__v16sf)_mm512_permute_ps((X), (C)), \
5836 (__v16sf)(__m512)(W)))
5837
5838#define _mm512_maskz_permute_ps(U, X, C) \
5839 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5840 (__v16sf)_mm512_permute_ps((X), (C)), \
5841 (__v16sf)_mm512_setzero_ps()))
5842
5843static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5844_mm512_permutevar_pd(__m512d __A, __m512i __C) {
5845 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
5846}
5847
5848static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5849_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) {
5850 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5851 (__v8df)_mm512_permutevar_pd(__A, __C),
5852 (__v8df)__W);
5853}
5854
5855static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5856_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
5857 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5858 (__v8df)_mm512_permutevar_pd(__A, __C),
5859 (__v8df)_mm512_setzero_pd());
5860}
5861
5862static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5863_mm512_permutevar_ps(__m512 __A, __m512i __C) {
5864 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
5865}
5866
5867static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5868_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) {
5869 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
5870 (__v16sf)_mm512_permutevar_ps(__A, __C),
5871 (__v16sf)__W);
5872}
5873
5874static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5875_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
5876 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
5877 (__v16sf)_mm512_permutevar_ps(__A, __C),
5878 (__v16sf)_mm512_setzero_ps());
5879}
5880
5881static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5882_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) {
5883 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
5884 (__v8df)__B);
5885}
5886
5887static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5888_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I,
5889 __m512d __B) {
5890 return (__m512d)__builtin_ia32_selectpd_512(__U,
5891 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5892 (__v8df)__A);
5893}
5894
5895static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5896_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
5897 __m512d __B) {
5898 return (__m512d)__builtin_ia32_selectpd_512(__U,
5899 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5900 (__v8df)(__m512d)__I);
5901}
5902
5903static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5904_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
5905 __m512d __B) {
5906 return (__m512d)__builtin_ia32_selectpd_512(__U,
5907 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5908 (__v8df)_mm512_setzero_pd());
5909}
5910
5911static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5912_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) {
5913 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
5914 (__v16sf) __B);
5915}
5916
5917static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5918_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I,
5919 __m512 __B) {
5920 return (__m512)__builtin_ia32_selectps_512(__U,
5921 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5922 (__v16sf)__A);
5923}
5924
5925static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5926_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U,
5927 __m512 __B) {
5928 return (__m512)__builtin_ia32_selectps_512(__U,
5929 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5930 (__v16sf)(__m512)__I);
5931}
5932
5933static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5934_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I,
5935 __m512 __B) {
5936 return (__m512)__builtin_ia32_selectps_512(__U,
5937 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5938 (__v16sf)_mm512_setzero_ps());
5939}
5940
5941#define _mm512_cvtt_roundpd_epu32(A, R) \
5942 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5943 (__v8si)_mm256_undefined_si256(), \
5944 (__mmask8)-1, (int)(R)))
5945
5946#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
5947 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5948 (__v8si)(__m256i)(W), \
5949 (__mmask8)(U), (int)(R)))
5950
5951#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
5952 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5953 (__v8si)_mm256_setzero_si256(), \
5954 (__mmask8)(U), (int)(R)))
5955
5956static __inline__ __m256i __DEFAULT_FN_ATTRS512
5958{
5959 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5960 (__v8si)
5962 (__mmask8) -1,
5964}
5965
5966static __inline__ __m256i __DEFAULT_FN_ATTRS512
5967_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
5968{
5969 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5970 (__v8si) __W,
5971 (__mmask8) __U,
5973}
5974
5975static __inline__ __m256i __DEFAULT_FN_ATTRS512
5977{
5978 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5979 (__v8si)
5981 (__mmask8) __U,
5983}
5984
5985#define _mm_roundscale_round_sd(A, B, imm, R) \
5986 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
5987 (__v2df)(__m128d)(B), \
5988 (__v2df)_mm_setzero_pd(), \
5989 (__mmask8)-1, (int)(imm), \
5990 (int)(R)))
5991
5992#define _mm_roundscale_sd(A, B, imm) \
5993 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
5994 (__v2df)(__m128d)(B), \
5995 (__v2df)_mm_setzero_pd(), \
5996 (__mmask8)-1, (int)(imm), \
5997 _MM_FROUND_CUR_DIRECTION))
5998
5999#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6000 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6001 (__v2df)(__m128d)(B), \
6002 (__v2df)(__m128d)(W), \
6003 (__mmask8)(U), (int)(imm), \
6004 _MM_FROUND_CUR_DIRECTION))
6005
6006#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6007 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6008 (__v2df)(__m128d)(B), \
6009 (__v2df)(__m128d)(W), \
6010 (__mmask8)(U), (int)(I), \
6011 (int)(R)))
6012
6013#define _mm_maskz_roundscale_sd(U, A, B, I) \
6014 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6015 (__v2df)(__m128d)(B), \
6016 (__v2df)_mm_setzero_pd(), \
6017 (__mmask8)(U), (int)(I), \
6018 _MM_FROUND_CUR_DIRECTION))
6019
6020#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6021 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6022 (__v2df)(__m128d)(B), \
6023 (__v2df)_mm_setzero_pd(), \
6024 (__mmask8)(U), (int)(I), \
6025 (int)(R)))
6026
6027#define _mm_roundscale_round_ss(A, B, imm, R) \
6028 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6029 (__v4sf)(__m128)(B), \
6030 (__v4sf)_mm_setzero_ps(), \
6031 (__mmask8)-1, (int)(imm), \
6032 (int)(R)))
6033
6034#define _mm_roundscale_ss(A, B, imm) \
6035 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6036 (__v4sf)(__m128)(B), \
6037 (__v4sf)_mm_setzero_ps(), \
6038 (__mmask8)-1, (int)(imm), \
6039 _MM_FROUND_CUR_DIRECTION))
6040
6041#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6042 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6043 (__v4sf)(__m128)(B), \
6044 (__v4sf)(__m128)(W), \
6045 (__mmask8)(U), (int)(I), \
6046 _MM_FROUND_CUR_DIRECTION))
6047
6048#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6049 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6050 (__v4sf)(__m128)(B), \
6051 (__v4sf)(__m128)(W), \
6052 (__mmask8)(U), (int)(I), \
6053 (int)(R)))
6054
6055#define _mm_maskz_roundscale_ss(U, A, B, I) \
6056 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6057 (__v4sf)(__m128)(B), \
6058 (__v4sf)_mm_setzero_ps(), \
6059 (__mmask8)(U), (int)(I), \
6060 _MM_FROUND_CUR_DIRECTION))
6061
6062#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6063 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6064 (__v4sf)(__m128)(B), \
6065 (__v4sf)_mm_setzero_ps(), \
6066 (__mmask8)(U), (int)(I), \
6067 (int)(R)))
6068
6069#define _mm512_scalef_round_pd(A, B, R) \
6070 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6071 (__v8df)(__m512d)(B), \
6072 (__v8df)_mm512_undefined_pd(), \
6073 (__mmask8)-1, (int)(R)))
6074
6075#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6076 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6077 (__v8df)(__m512d)(B), \
6078 (__v8df)(__m512d)(W), \
6079 (__mmask8)(U), (int)(R)))
6080
6081#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6082 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6083 (__v8df)(__m512d)(B), \
6084 (__v8df)_mm512_setzero_pd(), \
6085 (__mmask8)(U), (int)(R)))
6086
6087static __inline__ __m512d __DEFAULT_FN_ATTRS512
6088_mm512_scalef_pd (__m512d __A, __m512d __B)
6089{
6090 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6091 (__v8df) __B,
6092 (__v8df)
6094 (__mmask8) -1,
6096}
6097
6098static __inline__ __m512d __DEFAULT_FN_ATTRS512
6099_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6100{
6101 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6102 (__v8df) __B,
6103 (__v8df) __W,
6104 (__mmask8) __U,
6106}
6107
6108static __inline__ __m512d __DEFAULT_FN_ATTRS512
6109_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6110{
6111 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6112 (__v8df) __B,
6113 (__v8df)
6115 (__mmask8) __U,
6117}
6118
6119#define _mm512_scalef_round_ps(A, B, R) \
6120 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6121 (__v16sf)(__m512)(B), \
6122 (__v16sf)_mm512_undefined_ps(), \
6123 (__mmask16)-1, (int)(R)))
6124
6125#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6126 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6127 (__v16sf)(__m512)(B), \
6128 (__v16sf)(__m512)(W), \
6129 (__mmask16)(U), (int)(R)))
6130
6131#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6132 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6133 (__v16sf)(__m512)(B), \
6134 (__v16sf)_mm512_setzero_ps(), \
6135 (__mmask16)(U), (int)(R)))
6136
6137static __inline__ __m512 __DEFAULT_FN_ATTRS512
6138_mm512_scalef_ps (__m512 __A, __m512 __B)
6139{
6140 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6141 (__v16sf) __B,
6142 (__v16sf)
6144 (__mmask16) -1,
6146}
6147
6148static __inline__ __m512 __DEFAULT_FN_ATTRS512
6149_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6150{
6151 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6152 (__v16sf) __B,
6153 (__v16sf) __W,
6154 (__mmask16) __U,
6156}
6157
6158static __inline__ __m512 __DEFAULT_FN_ATTRS512
6159_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6160{
6161 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6162 (__v16sf) __B,
6163 (__v16sf)
6165 (__mmask16) __U,
6167}
6168
6169#define _mm_scalef_round_sd(A, B, R) \
6170 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6171 (__v2df)(__m128d)(B), \
6172 (__v2df)_mm_setzero_pd(), \
6173 (__mmask8)-1, (int)(R)))
6174
6175static __inline__ __m128d __DEFAULT_FN_ATTRS128
6176_mm_scalef_sd (__m128d __A, __m128d __B)
6177{
6178 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6179 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6180 (__mmask8) -1,
6182}
6183
6184static __inline__ __m128d __DEFAULT_FN_ATTRS128
6185_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6186{
6187 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6188 (__v2df) __B,
6189 (__v2df) __W,
6190 (__mmask8) __U,
6192}
6193
6194#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6195 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6196 (__v2df)(__m128d)(B), \
6197 (__v2df)(__m128d)(W), \
6198 (__mmask8)(U), (int)(R)))
6199
6200static __inline__ __m128d __DEFAULT_FN_ATTRS128
6201_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6202{
6203 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6204 (__v2df) __B,
6205 (__v2df) _mm_setzero_pd (),
6206 (__mmask8) __U,
6208}
6209
6210#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6211 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6212 (__v2df)(__m128d)(B), \
6213 (__v2df)_mm_setzero_pd(), \
6214 (__mmask8)(U), (int)(R)))
6215
6216#define _mm_scalef_round_ss(A, B, R) \
6217 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6218 (__v4sf)(__m128)(B), \
6219 (__v4sf)_mm_setzero_ps(), \
6220 (__mmask8)-1, (int)(R)))
6221
6222static __inline__ __m128 __DEFAULT_FN_ATTRS128
6223_mm_scalef_ss (__m128 __A, __m128 __B)
6224{
6225 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6226 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6227 (__mmask8) -1,
6229}
6230
6231static __inline__ __m128 __DEFAULT_FN_ATTRS128
6232_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6233{
6234 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6235 (__v4sf) __B,
6236 (__v4sf) __W,
6237 (__mmask8) __U,
6239}
6240
6241#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6242 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6243 (__v4sf)(__m128)(B), \
6244 (__v4sf)(__m128)(W), \
6245 (__mmask8)(U), (int)(R)))
6246
6247static __inline__ __m128 __DEFAULT_FN_ATTRS128
6248_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6249{
6250 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6251 (__v4sf) __B,
6252 (__v4sf) _mm_setzero_ps (),
6253 (__mmask8) __U,
6255}
6256
6257#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6258 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6259 (__v4sf)(__m128)(B), \
6260 (__v4sf)_mm_setzero_ps(), \
6261 (__mmask8)(U), \
6262 (int)(R)))
6263
6264static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6265_mm512_srai_epi32(__m512i __A, unsigned int __B) {
6266 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B);
6267}
6268
6269static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6270_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
6271 unsigned int __B) {
6272 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6273 (__v16si)_mm512_srai_epi32(__A, __B),
6274 (__v16si)__W);
6275}
6276
6277static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6278_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
6279 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6280 (__v16si)_mm512_srai_epi32(__A, __B),
6281 (__v16si)_mm512_setzero_si512());
6282}
6283
6284static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6285_mm512_srai_epi64(__m512i __A, unsigned int __B) {
6286 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B);
6287}
6288
6289static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6290_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A,
6291 unsigned int __B) {
6292 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6293 (__v8di)_mm512_srai_epi64(__A, __B),
6294 (__v8di)__W);
6295}
6296
6297static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6298_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
6299 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6300 (__v8di)_mm512_srai_epi64(__A, __B),
6301 (__v8di)_mm512_setzero_si512());
6302}
6303
6304#define _mm512_shuffle_f32x4(A, B, imm) \
6305 ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6306 (__v16sf)(__m512)(B), (int)(imm)))
6307
6308#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6309 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6310 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6311 (__v16sf)(__m512)(W)))
6312
6313#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6314 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6315 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6316 (__v16sf)_mm512_setzero_ps()))
6317
6318#define _mm512_shuffle_f64x2(A, B, imm) \
6319 ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6320 (__v8df)(__m512d)(B), (int)(imm)))
6321
6322#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6323 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6324 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6325 (__v8df)(__m512d)(W)))
6326
6327#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6328 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6329 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6330 (__v8df)_mm512_setzero_pd()))
6331
6332#define _mm512_shuffle_i32x4(A, B, imm) \
6333 ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6334 (__v16si)(__m512i)(B), (int)(imm)))
6335
6336#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6337 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6338 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6339 (__v16si)(__m512i)(W)))
6340
6341#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6342 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6343 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6344 (__v16si)_mm512_setzero_si512()))
6345
6346#define _mm512_shuffle_i64x2(A, B, imm) \
6347 ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6348 (__v8di)(__m512i)(B), (int)(imm)))
6349
6350#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6351 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6352 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6353 (__v8di)(__m512i)(W)))
6354
6355#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6356 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6357 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6358 (__v8di)_mm512_setzero_si512()))
6359
6360#define _mm512_shuffle_pd(A, B, M) \
6361 ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6362 (__v8df)(__m512d)(B), (int)(M)))
6363
6364#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6365 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6366 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6367 (__v8df)(__m512d)(W)))
6368
6369#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6370 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6371 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6372 (__v8df)_mm512_setzero_pd()))
6373
6374#define _mm512_shuffle_ps(A, B, M) \
6375 ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6376 (__v16sf)(__m512)(B), (int)(M)))
6377
6378#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6379 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6380 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6381 (__v16sf)(__m512)(W)))
6382
6383#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6384 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6385 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6386 (__v16sf)_mm512_setzero_ps()))
6387
6388#define _mm_sqrt_round_sd(A, B, R) \
6389 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6390 (__v2df)(__m128d)(B), \
6391 (__v2df)_mm_setzero_pd(), \
6392 (__mmask8)-1, (int)(R)))
6393
6394static __inline__ __m128d __DEFAULT_FN_ATTRS128
6395_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6396{
6397 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6398 (__v2df) __B,
6399 (__v2df) __W,
6400 (__mmask8) __U,
6402}
6403
6404#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6405 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6406 (__v2df)(__m128d)(B), \
6407 (__v2df)(__m128d)(W), \
6408 (__mmask8)(U), (int)(R)))
6409
6410static __inline__ __m128d __DEFAULT_FN_ATTRS128
6411_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6412{
6413 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6414 (__v2df) __B,
6415 (__v2df) _mm_setzero_pd (),
6416 (__mmask8) __U,
6418}
6419
6420#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6421 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6422 (__v2df)(__m128d)(B), \
6423 (__v2df)_mm_setzero_pd(), \
6424 (__mmask8)(U), (int)(R)))
6425
6426#define _mm_sqrt_round_ss(A, B, R) \
6427 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6428 (__v4sf)(__m128)(B), \
6429 (__v4sf)_mm_setzero_ps(), \
6430 (__mmask8)-1, (int)(R)))
6431
6432static __inline__ __m128 __DEFAULT_FN_ATTRS128
6433_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6434{
6435 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6436 (__v4sf) __B,
6437 (__v4sf) __W,
6438 (__mmask8) __U,
6440}
6441
6442#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6443 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6444 (__v4sf)(__m128)(B), \
6445 (__v4sf)(__m128)(W), (__mmask8)(U), \
6446 (int)(R)))
6447
6448static __inline__ __m128 __DEFAULT_FN_ATTRS128
6449_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6450{
6451 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6452 (__v4sf) __B,
6453 (__v4sf) _mm_setzero_ps (),
6454 (__mmask8) __U,
6456}
6457
6458#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6459 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6460 (__v4sf)(__m128)(B), \
6461 (__v4sf)_mm_setzero_ps(), \
6462 (__mmask8)(U), (int)(R)))
6463
6464static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6466 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6467 0, 1, 2, 3, 0, 1, 2, 3,
6468 0, 1, 2, 3, 0, 1, 2, 3);
6469}
6470
6471static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6472_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) {
6473 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6474 (__v16sf)_mm512_broadcast_f32x4(__A),
6475 (__v16sf)__O);
6476}
6477
6478static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6480 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6481 (__v16sf)_mm512_broadcast_f32x4(__A),
6482 (__v16sf)_mm512_setzero_ps());
6483}
6484
6485static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6487 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6488 0, 1, 2, 3, 0, 1, 2, 3);
6489}
6490
6491static __inline__ __m512d __DEFAULT_FN_ATTRS512
6492_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
6493{
6494 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6495 (__v8df)_mm512_broadcast_f64x4(__A),
6496 (__v8df)__O);
6497}
6498
6499static __inline__ __m512d __DEFAULT_FN_ATTRS512
6501{
6502 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6503 (__v8df)_mm512_broadcast_f64x4(__A),
6504 (__v8df)_mm512_setzero_pd());
6505}
6506
6507static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6509 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6510 0, 1, 2, 3, 0, 1, 2, 3,
6511 0, 1, 2, 3, 0, 1, 2, 3);
6512}
6513
6514static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6515_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) {
6516 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6517 (__v16si)_mm512_broadcast_i32x4(__A),
6518 (__v16si)__O);
6519}
6520
6521static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6523 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6524 (__v16si)_mm512_broadcast_i32x4(__A),
6525 (__v16si)_mm512_setzero_si512());
6526}
6527
6528static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6530 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6531 0, 1, 2, 3, 0, 1, 2, 3);
6532}
6533
6534static __inline__ __m512i __DEFAULT_FN_ATTRS512
6535_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
6536{
6537 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6538 (__v8di)_mm512_broadcast_i64x4(__A),
6539 (__v8di)__O);
6540}
6541
6542static __inline__ __m512i __DEFAULT_FN_ATTRS512
6544{
6545 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6546 (__v8di)_mm512_broadcast_i64x4(__A),
6547 (__v8di)_mm512_setzero_si512());
6548}
6549
6550static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6551_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) {
6552 return (__m512d)__builtin_ia32_selectpd_512(__M,
6553 (__v8df) _mm512_broadcastsd_pd(__A),
6554 (__v8df) __O);
6555}
6556
6557static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6559 return (__m512d)__builtin_ia32_selectpd_512(__M,
6560 (__v8df) _mm512_broadcastsd_pd(__A),
6561 (__v8df) _mm512_setzero_pd());
6562}
6563
6564static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6565_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) {
6566 return (__m512)__builtin_ia32_selectps_512(__M,
6567 (__v16sf) _mm512_broadcastss_ps(__A),
6568 (__v16sf) __O);
6569}
6570
6571static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6573 return (__m512)__builtin_ia32_selectps_512(__M,
6574 (__v16sf) _mm512_broadcastss_ps(__A),
6575 (__v16sf) _mm512_setzero_ps());
6576}
6577
6578static __inline__ __m128i __DEFAULT_FN_ATTRS512
6580{
6581 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6582 (__v16qi) _mm_undefined_si128 (),
6583 (__mmask16) -1);
6584}
6585
6586static __inline__ __m128i __DEFAULT_FN_ATTRS512
6587_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6588{
6589 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6590 (__v16qi) __O, __M);
6591}
6592
6593static __inline__ __m128i __DEFAULT_FN_ATTRS512
6595{
6596 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6597 (__v16qi) _mm_setzero_si128 (),
6598 __M);
6599}
6600
6601static __inline__ void __DEFAULT_FN_ATTRS512
6603{
6604 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6605}
6606
6607static __inline__ __m256i __DEFAULT_FN_ATTRS512
6609{
6610 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6611 (__v16hi) _mm256_undefined_si256 (),
6612 (__mmask16) -1);
6613}
6614
6615static __inline__ __m256i __DEFAULT_FN_ATTRS512
6616_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6617{
6618 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6619 (__v16hi) __O, __M);
6620}
6621
6622static __inline__ __m256i __DEFAULT_FN_ATTRS512
6624{
6625 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6626 (__v16hi) _mm256_setzero_si256 (),
6627 __M);
6628}
6629
6630static __inline__ void __DEFAULT_FN_ATTRS512
6632{
6633 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6634}
6635
6636static __inline__ __m128i __DEFAULT_FN_ATTRS512
6638{
6639 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6640 (__v16qi) _mm_undefined_si128 (),
6641 (__mmask8) -1);
6642}
6643
6644static __inline__ __m128i __DEFAULT_FN_ATTRS512
6645_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6646{
6647 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6648 (__v16qi) __O, __M);
6649}
6650
6651static __inline__ __m128i __DEFAULT_FN_ATTRS512
6653{
6654 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6655 (__v16qi) _mm_setzero_si128 (),
6656 __M);
6657}
6658
6659static __inline__ void __DEFAULT_FN_ATTRS512
6661{
6662 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6663}
6664
6665static __inline__ __m256i __DEFAULT_FN_ATTRS512
6667{
6668 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6669 (__v8si) _mm256_undefined_si256 (),
6670 (__mmask8) -1);
6671}
6672
6673static __inline__ __m256i __DEFAULT_FN_ATTRS512
6674_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6675{
6676 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6677 (__v8si) __O, __M);
6678}
6679
6680static __inline__ __m256i __DEFAULT_FN_ATTRS512
6682{
6683 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6684 (__v8si) _mm256_setzero_si256 (),
6685 __M);
6686}
6687
6688static __inline__ void __DEFAULT_FN_ATTRS512
6690{
6691 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
6692}
6693
6694static __inline__ __m128i __DEFAULT_FN_ATTRS512
6696{
6697 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6698 (__v8hi) _mm_undefined_si128 (),
6699 (__mmask8) -1);
6700}
6701
6702static __inline__ __m128i __DEFAULT_FN_ATTRS512
6703_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6704{
6705 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6706 (__v8hi) __O, __M);
6707}
6708
6709static __inline__ __m128i __DEFAULT_FN_ATTRS512
6711{
6712 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6713 (__v8hi) _mm_setzero_si128 (),
6714 __M);
6715}
6716
6717static __inline__ void __DEFAULT_FN_ATTRS512
6719{
6720 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
6721}
6722
6723static __inline__ __m128i __DEFAULT_FN_ATTRS512
6725{
6726 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6727 (__v16qi) _mm_undefined_si128 (),
6728 (__mmask16) -1);
6729}
6730
6731static __inline__ __m128i __DEFAULT_FN_ATTRS512
6732_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6733{
6734 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6735 (__v16qi) __O,
6736 __M);
6737}
6738
6739static __inline__ __m128i __DEFAULT_FN_ATTRS512
6741{
6742 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6743 (__v16qi) _mm_setzero_si128 (),
6744 __M);
6745}
6746
6747static __inline__ void __DEFAULT_FN_ATTRS512
6749{
6750 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6751}
6752
6753static __inline__ __m256i __DEFAULT_FN_ATTRS512
6755{
6756 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6757 (__v16hi) _mm256_undefined_si256 (),
6758 (__mmask16) -1);
6759}
6760
6761static __inline__ __m256i __DEFAULT_FN_ATTRS512
6762_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6763{
6764 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6765 (__v16hi) __O,
6766 __M);
6767}
6768
6769static __inline__ __m256i __DEFAULT_FN_ATTRS512
6771{
6772 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6773 (__v16hi) _mm256_setzero_si256 (),
6774 __M);
6775}
6776
6777static __inline__ void __DEFAULT_FN_ATTRS512
6779{
6780 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6781}
6782
6783static __inline__ __m128i __DEFAULT_FN_ATTRS512
6785{
6786 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6787 (__v16qi) _mm_undefined_si128 (),
6788 (__mmask8) -1);
6789}
6790
6791static __inline__ __m128i __DEFAULT_FN_ATTRS512
6792_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6793{
6794 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6795 (__v16qi) __O,
6796 __M);
6797}
6798
6799static __inline__ __m128i __DEFAULT_FN_ATTRS512
6801{
6802 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6803 (__v16qi) _mm_setzero_si128 (),
6804 __M);
6805}
6806
6807static __inline__ void __DEFAULT_FN_ATTRS512
6809{
6810 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6811}
6812
6813static __inline__ __m256i __DEFAULT_FN_ATTRS512
6815{
6816 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6817 (__v8si) _mm256_undefined_si256 (),
6818 (__mmask8) -1);
6819}
6820
6821static __inline__ __m256i __DEFAULT_FN_ATTRS512
6822_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6823{
6824 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6825 (__v8si) __O, __M);
6826}
6827
6828static __inline__ __m256i __DEFAULT_FN_ATTRS512
6830{
6831 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6832 (__v8si) _mm256_setzero_si256 (),
6833 __M);
6834}
6835
6836static __inline__ void __DEFAULT_FN_ATTRS512
6838{
6839 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
6840}
6841
6842static __inline__ __m128i __DEFAULT_FN_ATTRS512
6844{
6845 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6846 (__v8hi) _mm_undefined_si128 (),
6847 (__mmask8) -1);
6848}
6849
6850static __inline__ __m128i __DEFAULT_FN_ATTRS512
6851_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6852{
6853 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6854 (__v8hi) __O, __M);
6855}
6856
6857static __inline__ __m128i __DEFAULT_FN_ATTRS512
6859{
6860 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6861 (__v8hi) _mm_setzero_si128 (),
6862 __M);
6863}
6864
6865static __inline__ void __DEFAULT_FN_ATTRS512
6867{
6868 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
6869}
6870
6871static __inline__ __m128i __DEFAULT_FN_ATTRS512
6873{
6874 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6875 (__v16qi) _mm_undefined_si128 (),
6876 (__mmask16) -1);
6877}
6878
6879static __inline__ __m128i __DEFAULT_FN_ATTRS512
6880_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6881{
6882 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6883 (__v16qi) __O, __M);
6884}
6885
6886static __inline__ __m128i __DEFAULT_FN_ATTRS512
6888{
6889 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6890 (__v16qi) _mm_setzero_si128 (),
6891 __M);
6892}
6893
6894static __inline__ void __DEFAULT_FN_ATTRS512
6896{
6897 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6898}
6899
6900static __inline__ __m256i __DEFAULT_FN_ATTRS512
6902{
6903 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6904 (__v16hi) _mm256_undefined_si256 (),
6905 (__mmask16) -1);
6906}
6907
6908static __inline__ __m256i __DEFAULT_FN_ATTRS512
6909_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6910{
6911 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6912 (__v16hi) __O, __M);
6913}
6914
6915static __inline__ __m256i __DEFAULT_FN_ATTRS512
6917{
6918 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6919 (__v16hi) _mm256_setzero_si256 (),
6920 __M);
6921}
6922
6923static __inline__ void __DEFAULT_FN_ATTRS512
6925{
6926 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
6927}
6928
6929static __inline__ __m128i __DEFAULT_FN_ATTRS512
6931{
6932 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6933 (__v16qi) _mm_undefined_si128 (),
6934 (__mmask8) -1);
6935}
6936
6937static __inline__ __m128i __DEFAULT_FN_ATTRS512
6938_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6939{
6940 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6941 (__v16qi) __O, __M);
6942}
6943
6944static __inline__ __m128i __DEFAULT_FN_ATTRS512
6946{
6947 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6948 (__v16qi) _mm_setzero_si128 (),
6949 __M);
6950}
6951
6952static __inline__ void __DEFAULT_FN_ATTRS512
6954{
6955 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6956}
6957
6958static __inline__ __m256i __DEFAULT_FN_ATTRS512
6960{
6961 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6962 (__v8si) _mm256_undefined_si256 (),
6963 (__mmask8) -1);
6964}
6965
6966static __inline__ __m256i __DEFAULT_FN_ATTRS512
6967_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6968{
6969 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6970 (__v8si) __O, __M);
6971}
6972
6973static __inline__ __m256i __DEFAULT_FN_ATTRS512
6975{
6976 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6977 (__v8si) _mm256_setzero_si256 (),
6978 __M);
6979}
6980
6981static __inline__ void __DEFAULT_FN_ATTRS512
6983{
6984 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
6985}
6986
6987static __inline__ __m128i __DEFAULT_FN_ATTRS512
6989{
6990 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
6991 (__v8hi) _mm_undefined_si128 (),
6992 (__mmask8) -1);
6993}
6994
6995static __inline__ __m128i __DEFAULT_FN_ATTRS512
6996_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6997{
6998 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
6999 (__v8hi) __O, __M);
7000}
7001
7002static __inline__ __m128i __DEFAULT_FN_ATTRS512
7004{
7005 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7006 (__v8hi) _mm_setzero_si128 (),
7007 __M);
7008}
7009
7010static __inline__ void __DEFAULT_FN_ATTRS512
7012{
7013 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7014}
7015
7016#define _mm512_extracti32x4_epi32(A, imm) \
7017 ((__m128i)__builtin_ia32_extracti32x4_mask( \
7018 (__v16si)(__m512i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \
7019 (__mmask8) - 1))
7020
7021#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7022 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7023 (__v4si)(__m128i)(W), \
7024 (__mmask8)(U)))
7025
7026#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7027 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7028 (__v4si)_mm_setzero_si128(), \
7029 (__mmask8)(U)))
7030
7031#define _mm512_extracti64x4_epi64(A, imm) \
7032 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7033 (__v4di)_mm256_setzero_si256(), \
7034 (__mmask8) - 1))
7035
7036#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7037 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7038 (__v4di)(__m256i)(W), \
7039 (__mmask8)(U)))
7040
7041#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7042 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7043 (__v4di)_mm256_setzero_si256(), \
7044 (__mmask8)(U)))
7045
7046#define _mm512_insertf64x4(A, B, imm) \
7047 ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7048 (__v4df)(__m256d)(B), (int)(imm)))
7049
7050#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7051 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7052 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7053 (__v8df)(__m512d)(W)))
7054
7055#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7056 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7057 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7058 (__v8df)_mm512_setzero_pd()))
7059
7060#define _mm512_inserti64x4(A, B, imm) \
7061 ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7062 (__v4di)(__m256i)(B), (int)(imm)))
7063
7064#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7065 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7066 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7067 (__v8di)(__m512i)(W)))
7068
7069#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7070 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7071 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7072 (__v8di)_mm512_setzero_si512()))
7073
7074#define _mm512_insertf32x4(A, B, imm) \
7075 ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7076 (__v4sf)(__m128)(B), (int)(imm)))
7077
7078#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7079 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7080 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7081 (__v16sf)(__m512)(W)))
7082
7083#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7084 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7085 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7086 (__v16sf)_mm512_setzero_ps()))
7087
7088#define _mm512_inserti32x4(A, B, imm) \
7089 ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7090 (__v4si)(__m128i)(B), (int)(imm)))
7091
7092#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7093 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7094 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7095 (__v16si)(__m512i)(W)))
7096
7097#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7098 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7099 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7100 (__v16si)_mm512_setzero_si512()))
7101
7102#define _mm512_getmant_round_pd(A, B, C, R) \
7103 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7104 (int)(((C)<<2) | (B)), \
7105 (__v8df)_mm512_undefined_pd(), \
7106 (__mmask8)-1, (int)(R)))
7107
7108#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7109 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7110 (int)(((C)<<2) | (B)), \
7111 (__v8df)(__m512d)(W), \
7112 (__mmask8)(U), (int)(R)))
7113
7114#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7115 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7116 (int)(((C)<<2) | (B)), \
7117 (__v8df)_mm512_setzero_pd(), \
7118 (__mmask8)(U), (int)(R)))
7119
7120#define _mm512_getmant_pd(A, B, C) \
7121 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7122 (int)(((C)<<2) | (B)), \
7123 (__v8df)_mm512_setzero_pd(), \
7124 (__mmask8)-1, \
7125 _MM_FROUND_CUR_DIRECTION))
7126
7127#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7128 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7129 (int)(((C)<<2) | (B)), \
7130 (__v8df)(__m512d)(W), \
7131 (__mmask8)(U), \
7132 _MM_FROUND_CUR_DIRECTION))
7133
7134#define _mm512_maskz_getmant_pd(U, A, B, C) \
7135 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7136 (int)(((C)<<2) | (B)), \
7137 (__v8df)_mm512_setzero_pd(), \
7138 (__mmask8)(U), \
7139 _MM_FROUND_CUR_DIRECTION))
7140
7141#define _mm512_getmant_round_ps(A, B, C, R) \
7142 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7143 (int)(((C)<<2) | (B)), \
7144 (__v16sf)_mm512_undefined_ps(), \
7145 (__mmask16)-1, (int)(R)))
7146
7147#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7148 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7149 (int)(((C)<<2) | (B)), \
7150 (__v16sf)(__m512)(W), \
7151 (__mmask16)(U), (int)(R)))
7152
7153#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7154 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7155 (int)(((C)<<2) | (B)), \
7156 (__v16sf)_mm512_setzero_ps(), \
7157 (__mmask16)(U), (int)(R)))
7158
7159#define _mm512_getmant_ps(A, B, C) \
7160 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7161 (int)(((C)<<2)|(B)), \
7162 (__v16sf)_mm512_undefined_ps(), \
7163 (__mmask16)-1, \
7164 _MM_FROUND_CUR_DIRECTION))
7165
7166#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7167 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7168 (int)(((C)<<2)|(B)), \
7169 (__v16sf)(__m512)(W), \
7170 (__mmask16)(U), \
7171 _MM_FROUND_CUR_DIRECTION))
7172
7173#define _mm512_maskz_getmant_ps(U, A, B, C) \
7174 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7175 (int)(((C)<<2)|(B)), \
7176 (__v16sf)_mm512_setzero_ps(), \
7177 (__mmask16)(U), \
7178 _MM_FROUND_CUR_DIRECTION))
7179
7180#define _mm512_getexp_round_pd(A, R) \
7181 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7182 (__v8df)_mm512_undefined_pd(), \
7183 (__mmask8)-1, (int)(R)))
7184
7185#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7186 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7187 (__v8df)(__m512d)(W), \
7188 (__mmask8)(U), (int)(R)))
7189
7190#define _mm512_maskz_getexp_round_pd(U, A, R) \
7191 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7192 (__v8df)_mm512_setzero_pd(), \
7193 (__mmask8)(U), (int)(R)))
7194
7195static __inline__ __m512d __DEFAULT_FN_ATTRS512
7196_mm512_getexp_pd (__m512d __A)
7197{
7198 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7199 (__v8df) _mm512_undefined_pd (),
7200 (__mmask8) -1,
7202}
7203
7204static __inline__ __m512d __DEFAULT_FN_ATTRS512
7205_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7206{
7207 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7208 (__v8df) __W,
7209 (__mmask8) __U,
7211}
7212
7213static __inline__ __m512d __DEFAULT_FN_ATTRS512
7215{
7216 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7217 (__v8df) _mm512_setzero_pd (),
7218 (__mmask8) __U,
7220}
7221
7222#define _mm512_getexp_round_ps(A, R) \
7223 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7224 (__v16sf)_mm512_undefined_ps(), \
7225 (__mmask16)-1, (int)(R)))
7226
7227#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7228 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7229 (__v16sf)(__m512)(W), \
7230 (__mmask16)(U), (int)(R)))
7231
7232#define _mm512_maskz_getexp_round_ps(U, A, R) \
7233 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7234 (__v16sf)_mm512_setzero_ps(), \
7235 (__mmask16)(U), (int)(R)))
7236
7237static __inline__ __m512 __DEFAULT_FN_ATTRS512
7239{
7240 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7241 (__v16sf) _mm512_undefined_ps (),
7242 (__mmask16) -1,
7244}
7245
7246static __inline__ __m512 __DEFAULT_FN_ATTRS512
7247_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7248{
7249 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7250 (__v16sf) __W,
7251 (__mmask16) __U,
7253}
7254
7255static __inline__ __m512 __DEFAULT_FN_ATTRS512
7257{
7258 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7259 (__v16sf) _mm512_setzero_ps (),
7260 (__mmask16) __U,
7262}
7263
7264#define _mm512_i64gather_ps(index, addr, scale) \
7265 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7266 (void const *)(addr), \
7267 (__v8di)(__m512i)(index), (__mmask8)-1, \
7268 (int)(scale)))
7269
7270#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7271 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7272 (void const *)(addr), \
7273 (__v8di)(__m512i)(index), \
7274 (__mmask8)(mask), (int)(scale)))
7275
7276#define _mm512_i64gather_epi32(index, addr, scale) \
7277 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7278 (void const *)(addr), \
7279 (__v8di)(__m512i)(index), \
7280 (__mmask8)-1, (int)(scale)))
7281
7282#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7283 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7284 (void const *)(addr), \
7285 (__v8di)(__m512i)(index), \
7286 (__mmask8)(mask), (int)(scale)))
7287
7288#define _mm512_i64gather_pd(index, addr, scale) \
7289 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7290 (void const *)(addr), \
7291 (__v8di)(__m512i)(index), (__mmask8)-1, \
7292 (int)(scale)))
7293
7294#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7295 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7296 (void const *)(addr), \
7297 (__v8di)(__m512i)(index), \
7298 (__mmask8)(mask), (int)(scale)))
7299
7300#define _mm512_i64gather_epi64(index, addr, scale) \
7301 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7302 (void const *)(addr), \
7303 (__v8di)(__m512i)(index), (__mmask8)-1, \
7304 (int)(scale)))
7305
7306#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7307 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7308 (void const *)(addr), \
7309 (__v8di)(__m512i)(index), \
7310 (__mmask8)(mask), (int)(scale)))
7311
7312#define _mm512_i32gather_ps(index, addr, scale) \
7313 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7314 (void const *)(addr), \
7315 (__v16si)(__m512)(index), \
7316 (__mmask16)-1, (int)(scale)))
7317
7318#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7319 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7320 (void const *)(addr), \
7321 (__v16si)(__m512)(index), \
7322 (__mmask16)(mask), (int)(scale)))
7323
7324#define _mm512_i32gather_epi32(index, addr, scale) \
7325 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7326 (void const *)(addr), \
7327 (__v16si)(__m512i)(index), \
7328 (__mmask16)-1, (int)(scale)))
7329
7330#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7331 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7332 (void const *)(addr), \
7333 (__v16si)(__m512i)(index), \
7334 (__mmask16)(mask), (int)(scale)))
7335
7336#define _mm512_i32gather_pd(index, addr, scale) \
7337 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7338 (void const *)(addr), \
7339 (__v8si)(__m256i)(index), (__mmask8)-1, \
7340 (int)(scale)))
7341
7342#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7343 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7344 (void const *)(addr), \
7345 (__v8si)(__m256i)(index), \
7346 (__mmask8)(mask), (int)(scale)))
7347
7348#define _mm512_i32gather_epi64(index, addr, scale) \
7349 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7350 (void const *)(addr), \
7351 (__v8si)(__m256i)(index), (__mmask8)-1, \
7352 (int)(scale)))
7353
7354#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7355 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7356 (void const *)(addr), \
7357 (__v8si)(__m256i)(index), \
7358 (__mmask8)(mask), (int)(scale)))
7359
7360#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7361 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7362 (__v8di)(__m512i)(index), \
7363 (__v8sf)(__m256)(v1), (int)(scale))
7364
7365#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7366 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7367 (__v8di)(__m512i)(index), \
7368 (__v8sf)(__m256)(v1), (int)(scale))
7369
7370#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7371 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7372 (__v8di)(__m512i)(index), \
7373 (__v8si)(__m256i)(v1), (int)(scale))
7374
7375#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7376 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7377 (__v8di)(__m512i)(index), \
7378 (__v8si)(__m256i)(v1), (int)(scale))
7379
7380#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7381 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7382 (__v8di)(__m512i)(index), \
7383 (__v8df)(__m512d)(v1), (int)(scale))
7384
7385#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7386 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7387 (__v8di)(__m512i)(index), \
7388 (__v8df)(__m512d)(v1), (int)(scale))
7389
7390#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7391 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7392 (__v8di)(__m512i)(index), \
7393 (__v8di)(__m512i)(v1), (int)(scale))
7394
7395#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7396 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7397 (__v8di)(__m512i)(index), \
7398 (__v8di)(__m512i)(v1), (int)(scale))
7399
7400#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7401 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7402 (__v16si)(__m512i)(index), \
7403 (__v16sf)(__m512)(v1), (int)(scale))
7404
7405#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7406 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7407 (__v16si)(__m512i)(index), \
7408 (__v16sf)(__m512)(v1), (int)(scale))
7409
7410#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7411 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7412 (__v16si)(__m512i)(index), \
7413 (__v16si)(__m512i)(v1), (int)(scale))
7414
7415#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7416 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7417 (__v16si)(__m512i)(index), \
7418 (__v16si)(__m512i)(v1), (int)(scale))
7419
7420#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7421 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7422 (__v8si)(__m256i)(index), \
7423 (__v8df)(__m512d)(v1), (int)(scale))
7424
7425#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7426 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7427 (__v8si)(__m256i)(index), \
7428 (__v8df)(__m512d)(v1), (int)(scale))
7429
7430#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7431 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7432 (__v8si)(__m256i)(index), \
7433 (__v8di)(__m512i)(v1), (int)(scale))
7434
7435#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7436 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7437 (__v8si)(__m256i)(index), \
7438 (__v8di)(__m512i)(v1), (int)(scale))
7439
7440static __inline__ __m128 __DEFAULT_FN_ATTRS128
7441_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7442{
7443 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7444 (__v4sf)__A,
7445 (__v4sf)__B,
7446 (__mmask8)__U,
7448}
7449
7450#define _mm_fmadd_round_ss(A, B, C, R) \
7451 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7452 (__v4sf)(__m128)(B), \
7453 (__v4sf)(__m128)(C), (__mmask8)-1, \
7454 (int)(R)))
7455
7456#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7457 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7458 (__v4sf)(__m128)(A), \
7459 (__v4sf)(__m128)(B), (__mmask8)(U), \
7460 (int)(R)))
7461
7462static __inline__ __m128 __DEFAULT_FN_ATTRS128
7463_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7464{
7465 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7466 (__v4sf)__B,
7467 (__v4sf)__C,
7468 (__mmask8)__U,
7470}
7471
7472#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7473 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7474 (__v4sf)(__m128)(B), \
7475 (__v4sf)(__m128)(C), (__mmask8)(U), \
7476 (int)(R)))
7477
7478static __inline__ __m128 __DEFAULT_FN_ATTRS128
7479_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7480{
7481 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7482 (__v4sf)__X,
7483 (__v4sf)__Y,
7484 (__mmask8)__U,
7486}
7487
7488#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7489 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7490 (__v4sf)(__m128)(X), \
7491 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7492 (int)(R)))
7493
7494static __inline__ __m128 __DEFAULT_FN_ATTRS128
7495_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7496{
7497 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7498 (__v4sf)__A,
7499 -(__v4sf)__B,
7500 (__mmask8)__U,
7502}
7503
7504#define _mm_fmsub_round_ss(A, B, C, R) \
7505 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7506 (__v4sf)(__m128)(B), \
7507 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7508 (int)(R)))
7509
7510#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7511 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7512 (__v4sf)(__m128)(A), \
7513 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7514 (int)(R)))
7515
7516static __inline__ __m128 __DEFAULT_FN_ATTRS128
7517_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7518{
7519 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7520 (__v4sf)__B,
7521 -(__v4sf)__C,
7522 (__mmask8)__U,
7524}
7525
7526#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7527 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7528 (__v4sf)(__m128)(B), \
7529 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7530 (int)(R)))
7531
7532static __inline__ __m128 __DEFAULT_FN_ATTRS128
7533_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7534{
7535 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7536 (__v4sf)__X,
7537 (__v4sf)__Y,
7538 (__mmask8)__U,
7540}
7541
7542#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7543 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7544 (__v4sf)(__m128)(X), \
7545 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7546 (int)(R)))
7547
7548static __inline__ __m128 __DEFAULT_FN_ATTRS128
7549_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7550{
7551 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7552 -(__v4sf)__A,
7553 (__v4sf)__B,
7554 (__mmask8)__U,
7556}
7557
7558#define _mm_fnmadd_round_ss(A, B, C, R) \
7559 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7560 -(__v4sf)(__m128)(B), \
7561 (__v4sf)(__m128)(C), (__mmask8)-1, \
7562 (int)(R)))
7563
7564#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7565 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7566 -(__v4sf)(__m128)(A), \
7567 (__v4sf)(__m128)(B), (__mmask8)(U), \
7568 (int)(R)))
7569
7570static __inline__ __m128 __DEFAULT_FN_ATTRS128
7571_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7572{
7573 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7574 -(__v4sf)__B,
7575 (__v4sf)__C,
7576 (__mmask8)__U,
7578}
7579
7580#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7581 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7582 -(__v4sf)(__m128)(B), \
7583 (__v4sf)(__m128)(C), (__mmask8)(U), \
7584 (int)(R)))
7585
7586static __inline__ __m128 __DEFAULT_FN_ATTRS128
7587_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7588{
7589 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7590 -(__v4sf)__X,
7591 (__v4sf)__Y,
7592 (__mmask8)__U,
7594}
7595
7596#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7597 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7598 -(__v4sf)(__m128)(X), \
7599 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7600 (int)(R)))
7601
7602static __inline__ __m128 __DEFAULT_FN_ATTRS128
7603_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7604{
7605 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7606 -(__v4sf)__A,
7607 -(__v4sf)__B,
7608 (__mmask8)__U,
7610}
7611
7612#define _mm_fnmsub_round_ss(A, B, C, R) \
7613 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7614 -(__v4sf)(__m128)(B), \
7615 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7616 (int)(R)))
7617
7618#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7619 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7620 -(__v4sf)(__m128)(A), \
7621 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7622 (int)(R)))
7623
7624static __inline__ __m128 __DEFAULT_FN_ATTRS128
7625_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7626{
7627 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7628 -(__v4sf)__B,
7629 -(__v4sf)__C,
7630 (__mmask8)__U,
7632}
7633
7634#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
7635 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7636 -(__v4sf)(__m128)(B), \
7637 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7638 (int)(R)))
7639
7640static __inline__ __m128 __DEFAULT_FN_ATTRS128
7641_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7642{
7643 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7644 -(__v4sf)__X,
7645 (__v4sf)__Y,
7646 (__mmask8)__U,
7648}
7649
7650#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
7651 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7652 -(__v4sf)(__m128)(X), \
7653 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7654 (int)(R)))
7655
7656static __inline__ __m128d __DEFAULT_FN_ATTRS128
7657_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7658{
7659 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7660 (__v2df)__A,
7661 (__v2df)__B,
7662 (__mmask8)__U,
7664}
7665
7666#define _mm_fmadd_round_sd(A, B, C, R) \
7667 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7668 (__v2df)(__m128d)(B), \
7669 (__v2df)(__m128d)(C), (__mmask8)-1, \
7670 (int)(R)))
7671
7672#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
7673 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7674 (__v2df)(__m128d)(A), \
7675 (__v2df)(__m128d)(B), (__mmask8)(U), \
7676 (int)(R)))
7677
7678static __inline__ __m128d __DEFAULT_FN_ATTRS128
7679_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7680{
7681 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7682 (__v2df)__B,
7683 (__v2df)__C,
7684 (__mmask8)__U,
7686}
7687
7688#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
7689 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7690 (__v2df)(__m128d)(B), \
7691 (__v2df)(__m128d)(C), (__mmask8)(U), \
7692 (int)(R)))
7693
7694static __inline__ __m128d __DEFAULT_FN_ATTRS128
7695_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7696{
7697 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7698 (__v2df)__X,
7699 (__v2df)__Y,
7700 (__mmask8)__U,
7702}
7703
7704#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
7705 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7706 (__v2df)(__m128d)(X), \
7707 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7708 (int)(R)))
7709
7710static __inline__ __m128d __DEFAULT_FN_ATTRS128
7711_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7712{
7713 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7714 (__v2df)__A,
7715 -(__v2df)__B,
7716 (__mmask8)__U,
7718}
7719
7720#define _mm_fmsub_round_sd(A, B, C, R) \
7721 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7722 (__v2df)(__m128d)(B), \
7723 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7724 (int)(R)))
7725
7726#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
7727 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7728 (__v2df)(__m128d)(A), \
7729 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7730 (int)(R)))
7731
7732static __inline__ __m128d __DEFAULT_FN_ATTRS128
7733_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7734{
7735 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7736 (__v2df)__B,
7737 -(__v2df)__C,
7738 (__mmask8)__U,
7740}
7741
7742#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
7743 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7744 (__v2df)(__m128d)(B), \
7745 -(__v2df)(__m128d)(C), \
7746 (__mmask8)(U), (int)(R)))
7747
7748static __inline__ __m128d __DEFAULT_FN_ATTRS128
7749_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7750{
7751 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7752 (__v2df)__X,
7753 (__v2df)__Y,
7754 (__mmask8)__U,
7756}
7757
7758#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
7759 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7760 (__v2df)(__m128d)(X), \
7761 (__v2df)(__m128d)(Y), \
7762 (__mmask8)(U), (int)(R)))
7763
7764static __inline__ __m128d __DEFAULT_FN_ATTRS128
7765_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7766{
7767 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7768 -(__v2df)__A,
7769 (__v2df)__B,
7770 (__mmask8)__U,
7772}
7773
7774#define _mm_fnmadd_round_sd(A, B, C, R) \
7775 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7776 -(__v2df)(__m128d)(B), \
7777 (__v2df)(__m128d)(C), (__mmask8)-1, \
7778 (int)(R)))
7779
7780#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
7781 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7782 -(__v2df)(__m128d)(A), \
7783 (__v2df)(__m128d)(B), (__mmask8)(U), \
7784 (int)(R)))
7785
7786static __inline__ __m128d __DEFAULT_FN_ATTRS128
7787_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7788{
7789 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7790 -(__v2df)__B,
7791 (__v2df)__C,
7792 (__mmask8)__U,
7794}
7795
7796#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
7797 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7798 -(__v2df)(__m128d)(B), \
7799 (__v2df)(__m128d)(C), (__mmask8)(U), \
7800 (int)(R)))
7801
7802static __inline__ __m128d __DEFAULT_FN_ATTRS128
7803_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7804{
7805 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7806 -(__v2df)__X,
7807 (__v2df)__Y,
7808 (__mmask8)__U,
7810}
7811
7812#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
7813 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7814 -(__v2df)(__m128d)(X), \
7815 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7816 (int)(R)))
7817
7818static __inline__ __m128d __DEFAULT_FN_ATTRS128
7819_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7820{
7821 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7822 -(__v2df)__A,
7823 -(__v2df)__B,
7824 (__mmask8)__U,
7826}
7827
7828#define _mm_fnmsub_round_sd(A, B, C, R) \
7829 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7830 -(__v2df)(__m128d)(B), \
7831 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7832 (int)(R)))
7833
7834#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
7835 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7836 -(__v2df)(__m128d)(A), \
7837 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7838 (int)(R)))
7839
7840static __inline__ __m128d __DEFAULT_FN_ATTRS128
7841_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7842{
7843 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7844 -(__v2df)__B,
7845 -(__v2df)__C,
7846 (__mmask8)__U,
7848}
7849
7850#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
7851 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7852 -(__v2df)(__m128d)(B), \
7853 -(__v2df)(__m128d)(C), \
7854 (__mmask8)(U), \
7855 (int)(R)))
7856
7857static __inline__ __m128d __DEFAULT_FN_ATTRS128
7858_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7859{
7860 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7861 -(__v2df)__X,
7862 (__v2df)__Y,
7863 (__mmask8)__U,
7865}
7866
7867#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
7868 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7869 -(__v2df)(__m128d)(X), \
7870 (__v2df)(__m128d)(Y), \
7871 (__mmask8)(U), (int)(R)))
7872
7873#define _mm512_permutex_pd(X, C) \
7874 ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
7875
7876#define _mm512_mask_permutex_pd(W, U, X, C) \
7877 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7878 (__v8df)_mm512_permutex_pd((X), (C)), \
7879 (__v8df)(__m512d)(W)))
7880
7881#define _mm512_maskz_permutex_pd(U, X, C) \
7882 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7883 (__v8df)_mm512_permutex_pd((X), (C)), \
7884 (__v8df)_mm512_setzero_pd()))
7885
7886#define _mm512_permutex_epi64(X, C) \
7887 ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
7888
7889#define _mm512_mask_permutex_epi64(W, U, X, C) \
7890 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7891 (__v8di)_mm512_permutex_epi64((X), (C)), \
7892 (__v8di)(__m512i)(W)))
7893
7894#define _mm512_maskz_permutex_epi64(U, X, C) \
7895 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7896 (__v8di)_mm512_permutex_epi64((X), (C)), \
7897 (__v8di)_mm512_setzero_si512()))
7898
7899static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
7900_mm512_permutexvar_pd(__m512i __X, __m512d __Y) {
7901 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
7902}
7903
7904static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
7905_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X,
7906 __m512d __Y) {
7907 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
7908 (__v8df)_mm512_permutexvar_pd(__X, __Y),
7909 (__v8df)__W);
7910}
7911
7912static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
7913_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) {
7914 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
7915 (__v8df)_mm512_permutexvar_pd(__X, __Y),
7916 (__v8df)_mm512_setzero_pd());
7917}
7918
7919static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7920_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) {
7921 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
7922}
7923
7924static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7925_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) {
7926 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7927 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
7928 (__v8di)_mm512_setzero_si512());
7929}
7930
7931static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7932_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X,
7933 __m512i __Y) {
7934 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7935 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
7936 (__v8di)__W);
7937}
7938
7939static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
7940_mm512_permutexvar_ps(__m512i __X, __m512 __Y) {
7941 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
7942}
7943
7944static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
7945_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) {
7946 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
7947 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
7948 (__v16sf)__W);
7949}
7950
7951static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
7952_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) {
7953 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
7954 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
7955 (__v16sf)_mm512_setzero_ps());
7956}
7957
7958static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7959_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) {
7960 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
7961}
7962
7963#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
7964
7965static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7967 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7968 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
7969 (__v16si)_mm512_setzero_si512());
7970}
7971
7972static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7973_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X,
7974 __m512i __Y) {
7975 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7976 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
7977 (__v16si)__W);
7978}
7979
7980#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
7981
7982static __inline__ __mmask16
7984 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
7985}
7986
7989 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
7990}
7991
7994 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
7995}
7996
7997static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
7999 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8000}
8001
8002static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
8004 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8005}
8006
8007static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
8009 return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8010}
8011
8012static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
8014 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8015}
8016
8017static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
8018_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8019 *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8020 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8021}
8022
8025 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8026}
8027
8030 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8031}
8032
8035 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8036}
8037
8038#define _kand_mask16 _mm512_kand
8039#define _kandn_mask16 _mm512_kandn
8040#define _knot_mask16 _mm512_knot
8041#define _kor_mask16 _mm512_kor
8042#define _kxnor_mask16 _mm512_kxnor
8043#define _kxor_mask16 _mm512_kxor
8044
8045#define _kshiftli_mask16(A, I) \
8046 ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
8047
8048#define _kshiftri_mask16(A, I) \
8049 ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
8050
8051static __inline__ unsigned int
8053 return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8054}
8055
8057_cvtu32_mask16(unsigned int __A) {
8058 return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8059}
8060
8061static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8063 return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8064}
8065
8066static __inline__ void __DEFAULT_FN_ATTRS
8068 *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8069}
8070
8071static __inline__ void __DEFAULT_FN_ATTRS512
8072_mm512_stream_si512 (void * __P, __m512i __A)
8073{
8074 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8075 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8076}
8077
8078static __inline__ __m512i __DEFAULT_FN_ATTRS512
8080{
8081 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8082 return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8083}
8084
8085static __inline__ void __DEFAULT_FN_ATTRS512
8086_mm512_stream_pd (void *__P, __m512d __A)
8087{
8088 typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8089 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8090}
8091
8092static __inline__ void __DEFAULT_FN_ATTRS512
8093_mm512_stream_ps (void *__P, __m512 __A)
8094{
8095 typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8096 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8097}
8098
8099static __inline__ __m512d __DEFAULT_FN_ATTRS512
8100_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8101{
8102 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8103 (__v8df) __W,
8104 (__mmask8) __U);
8105}
8106
8107static __inline__ __m512d __DEFAULT_FN_ATTRS512
8109{
8110 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8111 (__v8df)
8113 (__mmask8) __U);
8114}
8115
8116static __inline__ __m512i __DEFAULT_FN_ATTRS512
8117_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8118{
8119 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8120 (__v8di) __W,
8121 (__mmask8) __U);
8122}
8123
8124static __inline__ __m512i __DEFAULT_FN_ATTRS512
8126{
8127 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8128 (__v8di)
8130 (__mmask8) __U);
8131}
8132
8133static __inline__ __m512 __DEFAULT_FN_ATTRS512
8134_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8135{
8136 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8137 (__v16sf) __W,
8138 (__mmask16) __U);
8139}
8140
8141static __inline__ __m512 __DEFAULT_FN_ATTRS512
8143{
8144 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8145 (__v16sf)
8147 (__mmask16) __U);
8148}
8149
8150static __inline__ __m512i __DEFAULT_FN_ATTRS512
8151_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8152{
8153 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8154 (__v16si) __W,
8155 (__mmask16) __U);
8156}
8157
8158static __inline__ __m512i __DEFAULT_FN_ATTRS512
8160{
8161 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8162 (__v16si)
8164 (__mmask16) __U);
8165}
8166
8167#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8168 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8169 (__v4sf)(__m128)(Y), (int)(P), \
8170 (__mmask8)-1, (int)(R)))
8171
8172#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8173 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8174 (__v4sf)(__m128)(Y), (int)(P), \
8175 (__mmask8)(M), (int)(R)))
8176
8177#define _mm_cmp_ss_mask(X, Y, P) \
8178 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8179 (__v4sf)(__m128)(Y), (int)(P), \
8180 (__mmask8)-1, \
8181 _MM_FROUND_CUR_DIRECTION))
8182
8183#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8184 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8185 (__v4sf)(__m128)(Y), (int)(P), \
8186 (__mmask8)(M), \
8187 _MM_FROUND_CUR_DIRECTION))
8188
8189#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8190 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8191 (__v2df)(__m128d)(Y), (int)(P), \
8192 (__mmask8)-1, (int)(R)))
8193
8194#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8195 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8196 (__v2df)(__m128d)(Y), (int)(P), \
8197 (__mmask8)(M), (int)(R)))
8198
8199#define _mm_cmp_sd_mask(X, Y, P) \
8200 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8201 (__v2df)(__m128d)(Y), (int)(P), \
8202 (__mmask8)-1, \
8203 _MM_FROUND_CUR_DIRECTION))
8204
8205#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8206 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8207 (__v2df)(__m128d)(Y), (int)(P), \
8208 (__mmask8)(M), \
8209 _MM_FROUND_CUR_DIRECTION))
8210
8211/* Bit Test */
8212
8213static __inline __mmask16 __DEFAULT_FN_ATTRS512
8214_mm512_test_epi32_mask (__m512i __A, __m512i __B)
8215{
8218}
8219
8220static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8221_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8222{
8223 return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8225}
8226
8227static __inline __mmask8 __DEFAULT_FN_ATTRS512
8228_mm512_test_epi64_mask (__m512i __A, __m512i __B)
8229{
8230 return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8232}
8233
8234static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8235_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8236{
8237 return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8239}
8240
8241static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8242_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8243{
8244 return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8246}
8247
8248static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8249_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8250{
8251 return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8253}
8254
8255static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8256_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8257{
8258 return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8260}
8261
8262static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8263_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8264{
8265 return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8267}
8268
8269static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8271{
8272 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8273 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8274}
8275
8276static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8277_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8278 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8279 (__v16sf)_mm512_movehdup_ps(__A),
8280 (__v16sf)__W);
8281}
8282
8283static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8285 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8286 (__v16sf)_mm512_movehdup_ps(__A),
8287 (__v16sf)_mm512_setzero_ps());
8288}
8289
8290static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8292{
8293 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8294 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8295}
8296
8297static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8298_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8299 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8300 (__v16sf)_mm512_moveldup_ps(__A),
8301 (__v16sf)__W);
8302}
8303
8304static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8306 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8307 (__v16sf)_mm512_moveldup_ps(__A),
8308 (__v16sf)_mm512_setzero_ps());
8309}
8310
8311static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8312_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
8313 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8314}
8315
8316static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8317_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
8318 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8319 _mm_setzero_ps());
8320}
8321
8322static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8323_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
8324 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8325}
8326
8327static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8328_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
8329 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8330 _mm_setzero_pd());
8331}
8332
8333static __inline__ void __DEFAULT_FN_ATTRS128
8334_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8335{
8336 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8337}
8338
8339static __inline__ void __DEFAULT_FN_ATTRS128
8340_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8341{
8342 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8343}
8344
8345static __inline__ __m128 __DEFAULT_FN_ATTRS128
8346_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8347{
8348 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8349 (__v4sf)_mm_setzero_ps(),
8350 0, 4, 4, 4);
8351
8352 return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
8353}
8354
8355static __inline__ __m128 __DEFAULT_FN_ATTRS128
8356_mm_maskz_load_ss (__mmask8 __U, const float* __A)
8357{
8358 return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
8359 (__v4sf) _mm_setzero_ps(),
8360 __U & 1);
8361}
8362
8363static __inline__ __m128d __DEFAULT_FN_ATTRS128
8364_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8365{
8366 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8367 (__v2df)_mm_setzero_pd(),
8368 0, 2);
8369
8370 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
8371}
8372
8373static __inline__ __m128d __DEFAULT_FN_ATTRS128
8374_mm_maskz_load_sd (__mmask8 __U, const double* __A)
8375{
8376 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
8377 (__v2df) _mm_setzero_pd(),
8378 __U & 1);
8379}
8380
8381#define _mm512_shuffle_epi32(A, I) \
8382 ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
8383
8384#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8385 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8386 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8387 (__v16si)(__m512i)(W)))
8388
8389#define _mm512_maskz_shuffle_epi32(U, A, I) \
8390 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8391 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8392 (__v16si)_mm512_setzero_si512()))
8393
8394static __inline__ __m512d __DEFAULT_FN_ATTRS512
8395_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8396{
8397 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8398 (__v8df) __W,
8399 (__mmask8) __U);
8400}
8401
8402static __inline__ __m512d __DEFAULT_FN_ATTRS512
8404{
8405 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8406 (__v8df) _mm512_setzero_pd (),
8407 (__mmask8) __U);
8408}
8409
8410static __inline__ __m512i __DEFAULT_FN_ATTRS512
8411_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8412{
8413 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8414 (__v8di) __W,
8415 (__mmask8) __U);
8416}
8417
8418static __inline__ __m512i __DEFAULT_FN_ATTRS512
8420{
8421 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8422 (__v8di) _mm512_setzero_si512 (),
8423 (__mmask8) __U);
8424}
8425
8426static __inline__ __m512d __DEFAULT_FN_ATTRS512
8427_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8428{
8429 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8430 (__v8df) __W,
8431 (__mmask8) __U);
8432}
8433
8434static __inline__ __m512d __DEFAULT_FN_ATTRS512
8436{
8437 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8438 (__v8df) _mm512_setzero_pd(),
8439 (__mmask8) __U);
8440}
8441
8442static __inline__ __m512i __DEFAULT_FN_ATTRS512
8443_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8444{
8445 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8446 (__v8di) __W,
8447 (__mmask8) __U);
8448}
8449
8450static __inline__ __m512i __DEFAULT_FN_ATTRS512
8452{
8453 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8454 (__v8di) _mm512_setzero_si512(),
8455 (__mmask8) __U);
8456}
8457
8458static __inline__ __m512 __DEFAULT_FN_ATTRS512
8459_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8460{
8461 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8462 (__v16sf) __W,
8463 (__mmask16) __U);
8464}
8465
8466static __inline__ __m512 __DEFAULT_FN_ATTRS512
8468{
8469 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8470 (__v16sf) _mm512_setzero_ps(),
8471 (__mmask16) __U);
8472}
8473
8474static __inline__ __m512i __DEFAULT_FN_ATTRS512
8475_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8476{
8477 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8478 (__v16si) __W,
8479 (__mmask16) __U);
8480}
8481
8482static __inline__ __m512i __DEFAULT_FN_ATTRS512
8484{
8485 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8486 (__v16si) _mm512_setzero_si512(),
8487 (__mmask16) __U);
8488}
8489
8490static __inline__ __m512 __DEFAULT_FN_ATTRS512
8491_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8492{
8493 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8494 (__v16sf) __W,
8495 (__mmask16) __U);
8496}
8497
8498static __inline__ __m512 __DEFAULT_FN_ATTRS512
8500{
8501 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8502 (__v16sf) _mm512_setzero_ps(),
8503 (__mmask16) __U);
8504}
8505
8506static __inline__ __m512i __DEFAULT_FN_ATTRS512
8507_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8508{
8509 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8510 (__v16si) __W,
8511 (__mmask16) __U);
8512}
8513
8514static __inline__ __m512i __DEFAULT_FN_ATTRS512
8516{
8517 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8518 (__v16si) _mm512_setzero_si512(),
8519 (__mmask16) __U);
8520}
8521
8522#define _mm512_cvt_roundps_pd(A, R) \
8523 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8524 (__v8df)_mm512_undefined_pd(), \
8525 (__mmask8)-1, (int)(R)))
8526
8527#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8528 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8529 (__v8df)(__m512d)(W), \
8530 (__mmask8)(U), (int)(R)))
8531
8532#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8533 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8534 (__v8df)_mm512_setzero_pd(), \
8535 (__mmask8)(U), (int)(R)))
8536
8537static __inline__ __m512d
8539 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8540}
8541
8542static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8543_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) {
8544 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8545 (__v8df)_mm512_cvtps_pd(__A),
8546 (__v8df)__W);
8547}
8548
8549static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8551 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8552 (__v8df)_mm512_cvtps_pd(__A),
8553 (__v8df)_mm512_setzero_pd());
8554}
8555
8556static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8558 return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8559}
8560
8561static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8562_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) {
8563 return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8564}
8565
8566static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8567_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) {
8568 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8569 (__v8df)__W);
8570}
8571
8572static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8574 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8575 (__v8df)_mm512_setzero_pd());
8576}
8577
8578static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8579_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8580 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8581 (__v16sf)__W);
8582}
8583
8584static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8586 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8587 (__v16sf)_mm512_setzero_ps());
8588}
8589
8590static __inline__ void __DEFAULT_FN_ATTRS512
8592{
8593 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8594 (__mmask8) __U);
8595}
8596
8597static __inline__ void __DEFAULT_FN_ATTRS512
8599{
8600 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8601 (__mmask8) __U);
8602}
8603
8604static __inline__ void __DEFAULT_FN_ATTRS512
8606{
8607 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
8608 (__mmask16) __U);
8609}
8610
8611static __inline__ void __DEFAULT_FN_ATTRS512
8613{
8614 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
8615 (__mmask16) __U);
8616}
8617
8618#define _mm_cvt_roundsd_ss(A, B, R) \
8619 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8620 (__v2df)(__m128d)(B), \
8621 (__v4sf)_mm_undefined_ps(), \
8622 (__mmask8)-1, (int)(R)))
8623
8624#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
8625 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8626 (__v2df)(__m128d)(B), \
8627 (__v4sf)(__m128)(W), \
8628 (__mmask8)(U), (int)(R)))
8629
8630#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
8631 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8632 (__v2df)(__m128d)(B), \
8633 (__v4sf)_mm_setzero_ps(), \
8634 (__mmask8)(U), (int)(R)))
8635
8636static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8637_mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) {
8638 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8639 (__v2df)__B,
8640 (__v4sf)__W,
8642}
8643
8644static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8645_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) {
8646 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8647 (__v2df)__B,
8648 (__v4sf)_mm_setzero_ps(),
8650}
8651
8652#define _mm_cvtss_i32 _mm_cvtss_si32
8653#define _mm_cvtsd_i32 _mm_cvtsd_si32
8654#define _mm_cvti32_sd _mm_cvtsi32_sd
8655#define _mm_cvti32_ss _mm_cvtsi32_ss
8656#ifdef __x86_64__
8657#define _mm_cvtss_i64 _mm_cvtss_si64
8658#define _mm_cvtsd_i64 _mm_cvtsd_si64
8659#define _mm_cvti64_sd _mm_cvtsi64_sd
8660#define _mm_cvti64_ss _mm_cvtsi64_ss
8661#endif
8662
8663#ifdef __x86_64__
8664#define _mm_cvt_roundi64_sd(A, B, R) \
8665 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8666 (int)(R)))
8667
8668#define _mm_cvt_roundsi64_sd(A, B, R) \
8669 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8670 (int)(R)))
8671#endif
8672
8673#define _mm_cvt_roundsi32_ss(A, B, R) \
8674 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8675
8676#define _mm_cvt_roundi32_ss(A, B, R) \
8677 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8678
8679#ifdef __x86_64__
8680#define _mm_cvt_roundsi64_ss(A, B, R) \
8681 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8682 (int)(R)))
8683
8684#define _mm_cvt_roundi64_ss(A, B, R) \
8685 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8686 (int)(R)))
8687#endif
8688
8689#define _mm_cvt_roundss_sd(A, B, R) \
8690 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8691 (__v4sf)(__m128)(B), \
8692 (__v2df)_mm_undefined_pd(), \
8693 (__mmask8)-1, (int)(R)))
8694
8695#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
8696 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8697 (__v4sf)(__m128)(B), \
8698 (__v2df)(__m128d)(W), \
8699 (__mmask8)(U), (int)(R)))
8700
8701#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
8702 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8703 (__v4sf)(__m128)(B), \
8704 (__v2df)_mm_setzero_pd(), \
8705 (__mmask8)(U), (int)(R)))
8706
8707static __inline__ __m128d __DEFAULT_FN_ATTRS128
8708_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
8709{
8710 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8711 (__v4sf)__B,
8712 (__v2df)__W,
8714}
8715
8716static __inline__ __m128d __DEFAULT_FN_ATTRS128
8717_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
8718{
8719 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8720 (__v4sf)__B,
8721 (__v2df)_mm_setzero_pd(),
8723}
8724
8725static __inline__ __m128d __DEFAULT_FN_ATTRS128
8726_mm_cvtu32_sd (__m128d __A, unsigned __B)
8727{
8728 __A[0] = __B;
8729 return __A;
8730}
8731
8732#ifdef __x86_64__
8733#define _mm_cvt_roundu64_sd(A, B, R) \
8734 ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
8735 (unsigned long long)(B), (int)(R)))
8736
8737static __inline__ __m128d __DEFAULT_FN_ATTRS128
8738_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
8739{
8740 __A[0] = __B;
8741 return __A;
8742}
8743#endif
8744
8745#define _mm_cvt_roundu32_ss(A, B, R) \
8746 ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
8747 (int)(R)))
8748
8749static __inline__ __m128 __DEFAULT_FN_ATTRS128
8750_mm_cvtu32_ss (__m128 __A, unsigned __B)
8751{
8752 __A[0] = __B;
8753 return __A;
8754}
8755
8756#ifdef __x86_64__
8757#define _mm_cvt_roundu64_ss(A, B, R) \
8758 ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
8759 (unsigned long long)(B), (int)(R)))
8760
8761static __inline__ __m128 __DEFAULT_FN_ATTRS128
8762_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
8763{
8764 __A[0] = __B;
8765 return __A;
8766}
8767#endif
8768
8769static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8770_mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A) {
8771 return (__m512i) __builtin_ia32_selectd_512(__M,
8772 (__v16si) _mm512_set1_epi32(__A),
8773 (__v16si) __O);
8774}
8775
8776static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8777_mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A) {
8778 return (__m512i) __builtin_ia32_selectq_512(__M,
8779 (__v8di) _mm512_set1_epi64(__A),
8780 (__v8di) __O);
8781}
8782
8784 char __e63, char __e62, char __e61, char __e60, char __e59, char __e58,
8785 char __e57, char __e56, char __e55, char __e54, char __e53, char __e52,
8786 char __e51, char __e50, char __e49, char __e48, char __e47, char __e46,
8787 char __e45, char __e44, char __e43, char __e42, char __e41, char __e40,
8788 char __e39, char __e38, char __e37, char __e36, char __e35, char __e34,
8789 char __e33, char __e32, char __e31, char __e30, char __e29, char __e28,
8790 char __e27, char __e26, char __e25, char __e24, char __e23, char __e22,
8791 char __e21, char __e20, char __e19, char __e18, char __e17, char __e16,
8792 char __e15, char __e14, char __e13, char __e12, char __e11, char __e10,
8793 char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3,
8794 char __e2, char __e1, char __e0) {
8795
8796 return __extension__ (__m512i)(__v64qi)
8797 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8798 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8799 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8800 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
8801 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
8802 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
8803 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
8804 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
8805}
8806
8808 short __e31, short __e30, short __e29, short __e28, short __e27,
8809 short __e26, short __e25, short __e24, short __e23, short __e22,
8810 short __e21, short __e20, short __e19, short __e18, short __e17,
8811 short __e16, short __e15, short __e14, short __e13, short __e12,
8812 short __e11, short __e10, short __e9, short __e8, short __e7, short __e6,
8813 short __e5, short __e4, short __e3, short __e2, short __e1, short __e0) {
8814 return __extension__ (__m512i)(__v32hi)
8815 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8816 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8817 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8818 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
8819}
8820
8822 int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H,
8823 int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P) {
8824 return __extension__ (__m512i)(__v16si)
8825 { __P, __O, __N, __M, __L, __K, __J, __I,
8826 __H, __G, __F, __E, __D, __C, __B, __A };
8827}
8828
8830 int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8,
8831 int e9, int e10, int e11, int e12, int e13, int e14, int e15) {
8832 return _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4,
8833 e3, e2, e1, e0);
8834}
8835
8836static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8837_mm512_set_epi64(long long __A, long long __B, long long __C, long long __D,
8838 long long __E, long long __F, long long __G, long long __H) {
8839 return __extension__ (__m512i) (__v8di)
8840 { __H, __G, __F, __E, __D, __C, __B, __A };
8841}
8842
8843static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8844_mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3,
8845 long long e4, long long e5, long long e6, long long e7) {
8846 return _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0);
8847}
8848
8849static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8850_mm512_set_pd(double __A, double __B, double __C, double __D, double __E,
8851 double __F, double __G, double __H) {
8852 return __extension__ (__m512d)
8853 { __H, __G, __F, __E, __D, __C, __B, __A };
8854}
8855
8856static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8857_mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5,
8858 double e6, double e7) {
8859 return _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0);
8860}
8861
8862static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8863_mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F,
8864 float __G, float __H, float __I, float __J, float __K, float __L,
8865 float __M, float __N, float __O, float __P) {
8866 return __extension__ (__m512)
8867 { __P, __O, __N, __M, __L, __K, __J, __I,
8868 __H, __G, __F, __E, __D, __C, __B, __A };
8869}
8870
8871static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8872_mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5,
8873 float e6, float e7, float e8, float e9, float e10, float e11,
8874 float e12, float e13, float e14, float e15) {
8875 return _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
8876 e2, e1, e0);
8877}
8878
8879static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8880_mm512_abs_ps(__m512 __A) {
8881 return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
8882}
8883
8884static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8885_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) {
8886 return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
8887}
8888
8889static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8890_mm512_abs_pd(__m512d __A) {
8891 return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
8892}
8893
8894static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8895_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) {
8896 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
8897}
8898
8899/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
8900 * outputs. This class of vector operation forms the basis of many scientific
8901 * computations. In vector-reduction arithmetic, the evaluation order is
8902 * independent of the order of the input elements of V.
8903
8904 * For floating-point intrinsics:
8905 * 1. When using fadd/fmul intrinsics, the order of operations within the
8906 * vector is unspecified (associative math).
8907 * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector
8908 * produce unspecified results.
8909
8910 * Used bisection method. At each step, we partition the vector with previous
8911 * step in half, and the operation is performed on its two halves.
8912 * This takes log2(n) steps where n is the number of elements in the vector.
8913 */
8914
8915static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8917 return __builtin_reduce_add((__v8di)__W);
8918}
8919
8920static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8922 return __builtin_reduce_mul((__v8di)__W);
8923}
8924
8925static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8927 return __builtin_reduce_and((__v8di)__W);
8928}
8929
8930static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8932 return __builtin_reduce_or((__v8di)__W);
8933}
8934
8935static __inline__ long long __DEFAULT_FN_ATTRS512
8937 __W = _mm512_maskz_mov_epi64(__M, __W);
8938 return __builtin_reduce_add((__v8di)__W);
8939}
8940
8941static __inline__ long long __DEFAULT_FN_ATTRS512
8943 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
8944 return __builtin_reduce_mul((__v8di)__W);
8945}
8946
8947static __inline__ long long __DEFAULT_FN_ATTRS512
8949 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __W);
8950 return __builtin_reduce_and((__v8di)__W);
8951}
8952
8953static __inline__ long long __DEFAULT_FN_ATTRS512
8955 __W = _mm512_maskz_mov_epi64(__M, __W);
8956 return __builtin_reduce_or((__v8di)__W);
8957}
8958
8959// -0.0 is used to ignore the start value since it is the neutral value of
8960// floating point addition. For more information, please refer to
8961// https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic
8962static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
8963 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
8964}
8965
8966static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
8967 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
8968}
8969
8970static __inline__ double __DEFAULT_FN_ATTRS512
8972 __W = _mm512_maskz_mov_pd(__M, __W);
8973 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
8974}
8975
8976static __inline__ double __DEFAULT_FN_ATTRS512
8978 __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
8979 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
8980}
8981
8982static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
8984 return __builtin_reduce_add((__v16si)__W);
8985}
8986
8987static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
8989 return __builtin_reduce_mul((__v16si)__W);
8990}
8991
8992static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
8994 return __builtin_reduce_and((__v16si)__W);
8995}
8996
8997static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
8999 return __builtin_reduce_or((__v16si)__W);
9000}
9001
9002static __inline__ int __DEFAULT_FN_ATTRS512
9004 __W = _mm512_maskz_mov_epi32(__M, __W);
9005 return __builtin_reduce_add((__v16si)__W);
9006}
9007
9008static __inline__ int __DEFAULT_FN_ATTRS512
9010 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
9011 return __builtin_reduce_mul((__v16si)__W);
9012}
9013
9014static __inline__ int __DEFAULT_FN_ATTRS512
9016 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __W);
9017 return __builtin_reduce_and((__v16si)__W);
9018}
9019
9020static __inline__ int __DEFAULT_FN_ATTRS512
9022 __W = _mm512_maskz_mov_epi32(__M, __W);
9023 return __builtin_reduce_or((__v16si)__W);
9024}
9025
9026static __inline__ float __DEFAULT_FN_ATTRS512
9028 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9029}
9030
9031static __inline__ float __DEFAULT_FN_ATTRS512
9033 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9034}
9035
9036static __inline__ float __DEFAULT_FN_ATTRS512
9038 __W = _mm512_maskz_mov_ps(__M, __W);
9039 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9040}
9041
9042static __inline__ float __DEFAULT_FN_ATTRS512
9044 __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9045 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9046}
9047
9048static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9050 return __builtin_reduce_max((__v8di)__V);
9051}
9052
9053static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9055 return __builtin_reduce_max((__v8du)__V);
9056}
9057
9058static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9060 return __builtin_reduce_min((__v8di)__V);
9061}
9062
9063static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9065 return __builtin_reduce_min((__v8du)__V);
9066}
9067
9068static __inline__ long long __DEFAULT_FN_ATTRS512
9070 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9071 return __builtin_reduce_max((__v8di)__V);
9072}
9073
9074static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9076 __V = _mm512_maskz_mov_epi64(__M, __V);
9077 return __builtin_reduce_max((__v8du)__V);
9078}
9079
9080static __inline__ long long __DEFAULT_FN_ATTRS512
9082 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9083 return __builtin_reduce_min((__v8di)__V);
9084}
9085
9086static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9088 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V);
9089 return __builtin_reduce_min((__v8du)__V);
9090}
9091static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9093 return __builtin_reduce_max((__v16si)__V);
9094}
9095
9096static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9098 return __builtin_reduce_max((__v16su)__V);
9099}
9100
9101static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9103 return __builtin_reduce_min((__v16si)__V);
9104}
9105
9106static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9108 return __builtin_reduce_min((__v16su)__V);
9109}
9110
9111static __inline__ int __DEFAULT_FN_ATTRS512
9113 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9114 return __builtin_reduce_max((__v16si)__V);
9115}
9116
9117static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9119 __V = _mm512_maskz_mov_epi32(__M, __V);
9120 return __builtin_reduce_max((__v16su)__V);
9121}
9122
9123static __inline__ int __DEFAULT_FN_ATTRS512
9125 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9126 return __builtin_reduce_min((__v16si)__V);
9127}
9128
9129static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9131 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __V);
9132 return __builtin_reduce_min((__v16su)__V);
9133}
9134
9135static __inline__ double __DEFAULT_FN_ATTRS512
9137 return __builtin_ia32_reduce_fmax_pd512(__V);
9138}
9139
9140static __inline__ double __DEFAULT_FN_ATTRS512
9142 return __builtin_ia32_reduce_fmin_pd512(__V);
9143}
9144
9145static __inline__ double __DEFAULT_FN_ATTRS512
9147 __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9148 return __builtin_ia32_reduce_fmax_pd512(__V);
9149}
9150
9151static __inline__ double __DEFAULT_FN_ATTRS512
9153 __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9154 return __builtin_ia32_reduce_fmin_pd512(__V);
9155}
9156
9157static __inline__ float __DEFAULT_FN_ATTRS512
9159 return __builtin_ia32_reduce_fmax_ps512(__V);
9160}
9161
9162static __inline__ float __DEFAULT_FN_ATTRS512
9164 return __builtin_ia32_reduce_fmin_ps512(__V);
9165}
9166
9167static __inline__ float __DEFAULT_FN_ATTRS512
9169 __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9170 return __builtin_ia32_reduce_fmax_ps512(__V);
9171}
9172
9173static __inline__ float __DEFAULT_FN_ATTRS512
9175 __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9176 return __builtin_ia32_reduce_fmin_ps512(__V);
9177}
9178
9179/// Moves the least significant 32 bits of a vector of [16 x i32] to a
9180/// 32-bit signed integer value.
9181///
9182/// \headerfile <x86intrin.h>
9183///
9184/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
9185///
9186/// \param __A
9187/// A vector of [16 x i32]. The least significant 32 bits are moved to the
9188/// destination.
9189/// \returns A 32-bit signed integer containing the moved value.
9190static __inline__ int __DEFAULT_FN_ATTRS512
9192 __v16si __b = (__v16si)__A;
9193 return __b[0];
9194}
9195
9196/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
9197/// locations starting at location \a base_addr at packed 32-bit integer indices
9198/// stored in the lower half of \a vindex scaled by \a scale them in dst.
9199///
9200/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9201///
9202/// \code{.operation}
9203/// FOR j := 0 to 7
9204/// i := j*64
9205/// m := j*32
9206/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9207/// dst[i+63:i] := MEM[addr+63:addr]
9208/// ENDFOR
9209/// dst[MAX:512] := 0
9210/// \endcode
9211#define _mm512_i32logather_pd(vindex, base_addr, scale) \
9212 _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9213
9214/// Loads 8 double-precision (64-bit) floating-point elements from memory
9215/// starting at location \a base_addr at packed 32-bit integer indices stored in
9216/// the lower half of \a vindex scaled by \a scale into dst using writemask
9217/// \a mask (elements are copied from \a src when the corresponding mask bit is
9218/// not set).
9219///
9220/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9221///
9222/// \code{.operation}
9223/// FOR j := 0 to 7
9224/// i := j*64
9225/// m := j*32
9226/// IF mask[j]
9227/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9228/// dst[i+63:i] := MEM[addr+63:addr]
9229/// ELSE
9230/// dst[i+63:i] := src[i+63:i]
9231/// FI
9232/// ENDFOR
9233/// dst[MAX:512] := 0
9234/// \endcode
9235#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \
9236 _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \
9237 (base_addr), (scale))
9238
9239/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9240/// at packed 32-bit integer indices stored in the lower half of \a vindex
9241/// scaled by \a scale and stores them in dst.
9242///
9243/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9244///
9245/// \code{.operation}
9246/// FOR j := 0 to 7
9247/// i := j*64
9248/// m := j*32
9249/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9250/// dst[i+63:i] := MEM[addr+63:addr]
9251/// ENDFOR
9252/// dst[MAX:512] := 0
9253/// \endcode
9254#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
9255 _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9256
9257/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9258/// at packed 32-bit integer indices stored in the lower half of \a vindex
9259/// scaled by \a scale and stores them in dst using writemask \a mask (elements
9260/// are copied from \a src when the corresponding mask bit is not set).
9261///
9262/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9263///
9264/// \code{.operation}
9265/// FOR j := 0 to 7
9266/// i := j*64
9267/// m := j*32
9268/// IF mask[j]
9269/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9270/// dst[i+63:i] := MEM[addr+63:addr]
9271/// ELSE
9272/// dst[i+63:i] := src[i+63:i]
9273/// FI
9274/// ENDFOR
9275/// dst[MAX:512] := 0
9276/// \endcode
9277#define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \
9278 _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \
9279 (base_addr), (scale))
9280
9281/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9282/// and to memory locations starting at location \a base_addr at packed 32-bit
9283/// integer indices stored in \a vindex scaled by \a scale.
9284///
9285/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9286///
9287/// \code{.operation}
9288/// FOR j := 0 to 7
9289/// i := j*64
9290/// m := j*32
9291/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9292/// MEM[addr+63:addr] := v1[i+63:i]
9293/// ENDFOR
9294/// \endcode
9295#define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \
9296 _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9297
9298/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9299/// to memory locations starting at location \a base_addr at packed 32-bit
9300/// integer indices stored in \a vindex scaled by \a scale. Only those elements
9301/// whose corresponding mask bit is set in writemask \a mask are written to
9302/// memory.
9303///
9304/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9305///
9306/// \code{.operation}
9307/// FOR j := 0 to 7
9308/// i := j*64
9309/// m := j*32
9310/// IF mask[j]
9311/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9312/// MEM[addr+63:addr] := a[i+63:i]
9313/// FI
9314/// ENDFOR
9315/// \endcode
9316#define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \
9317 _mm512_mask_i32scatter_pd((base_addr), (mask), \
9318 _mm512_castsi512_si256(vindex), (v1), (scale))
9319
9320/// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in
9321/// memory locations starting at location \a base_addr at packed 32-bit integer
9322/// indices stored in \a vindex scaled by \a scale.
9323///
9324/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9325///
9326/// \code{.operation}
9327/// FOR j := 0 to 7
9328/// i := j*64
9329/// m := j*32
9330/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9331/// MEM[addr+63:addr] := a[i+63:i]
9332/// ENDFOR
9333/// \endcode
9334#define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \
9335 _mm512_i32scatter_epi64((base_addr), \
9336 _mm512_castsi512_si256(vindex), (v1), (scale))
9337
9338/// Stores 8 packed 64-bit integer elements located in a and stores them in
9339/// memory locations starting at location \a base_addr at packed 32-bit integer
9340/// indices stored in \a vindex scaled by scale using writemask \a mask (elements
9341/// whose corresponding mask bit is not set are not written to memory).
9342///
9343/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9344///
9345/// \code{.operation}
9346/// FOR j := 0 to 7
9347/// i := j*64
9348/// m := j*32
9349/// IF mask[j]
9350/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9351/// MEM[addr+63:addr] := a[i+63:i]
9352/// FI
9353/// ENDFOR
9354/// \endcode
9355#define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \
9356 _mm512_mask_i32scatter_epi64((base_addr), (mask), \
9357 _mm512_castsi512_si256(vindex), (v1), (scale))
9358
9359#undef __DEFAULT_FN_ATTRS512
9360#undef __DEFAULT_FN_ATTRS128
9361#undef __DEFAULT_FN_ATTRS
9362#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
9363#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
9364#undef __DEFAULT_FN_ATTRS_CONSTEXPR
9365
9366#endif /* __AVX512FINTRIN_H */
#define __L(__X)
#define __DEFAULT_FN_ATTRS
static __inline__ vector float vector float __b
Definition altivec.h:578
static __inline__ uint32_t volatile uint32_t * __p
Definition arm_acle.h:57
return __v
Definition arm_acle.h:88
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
Definition avx2intrin.h:30
#define __DEFAULT_FN_ATTRS512_CONSTEXPR
#define __DEFAULT_FN_ATTRS512
#define __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR _cvtmask16_u32(__mmask16 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _cvtu32_mask16(unsigned int __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi64(long long __d)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi64(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f32x4(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd_si512(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi64(__m512i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A)
Moves the least significant 32 bits of a vector of [16 x i32] to a 32-bit signed integer value.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_ps(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi64(__m512i __W)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(void *__P, __m512d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi32(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi32(int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8, int e9, int e10, int e11, int e12, int e13, int e14, int e15)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi64(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_si256(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movehdup_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5, float e6, float e7, float e8, float e9, float e10, float e11, float e12, float e13, float e14, float e15)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_pd(double __w)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
_MM_MANTISSA_NORM_ENUM
@ _MM_MANT_NORM_p5_1
@ _MM_MANT_NORM_p5_2
@ _MM_MANT_NORM_1_2
@ _MM_MANT_NORM_p75_1p5
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline __m128 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps512_ps128(__m512 __a)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i64x4(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5, double e6, double e7)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps512_ps256(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi8(char __w)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3, long long e4, long long e5, long long e6, long long e7)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline __m128i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_si128(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_int2mask(int __a)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask2int(__mmask16 __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m256d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd512_pd256(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_pd(__m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi64(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movedup_pd(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_pd(__m512d __a, __m512d __b)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtps_pd(__m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
_MM_CMPINT_ENUM
@ _MM_CMPINT_NE
@ _MM_CMPINT_NLT
@ _MM_CMPINT_LE
@ _MM_CMPINT_EQ
@ _MM_CMPINT_LT
@ _MM_CMPINT_UNUSED
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_knot(__mmask16 __M)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastq_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_pd(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
unsigned char __mmask8
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
_MM_TERNLOG_ENUM
A helper to represent the ternary logic operations among vector A, B and C.
@ _MM_TERNLOG_A
@ _MM_TERNLOG_B
@ _MM_TERNLOG_C
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi32(__m512i __W)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(void *__P, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_ps(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
_MM_MANTISSA_SIGN_ENUM
@ _MM_MANT_SIGN_zero
@ _MM_MANT_SIGN_src
@ _MM_MANT_SIGN_nan
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_ps(__m512 __a, __m512 __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi32(int e0, int e1, int e2, int e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_ps(float __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi32(__m512i __A, __m512i __B)
_MM_PERM_ENUM
@ _MM_PERM_BBCA
@ _MM_PERM_BCBD
@ _MM_PERM_DAAB
@ _MM_PERM_CBBD
@ _MM_PERM_DCCC
@ _MM_PERM_CDBB
@ _MM_PERM_DDDD
@ _MM_PERM_CCCC
@ _MM_PERM_CADA
@ _MM_PERM_BACD
@ _MM_PERM_CCAD
@ _MM_PERM_ABDB
@ _MM_PERM_BBBD
@ _MM_PERM_DCAB
@ _MM_PERM_BABC
@ _MM_PERM_AACD
@ _MM_PERM_BBAB
@ _MM_PERM_DCDB
@ _MM_PERM_BACC
@ _MM_PERM_ABDA
@ _MM_PERM_ACCC
@ _MM_PERM_ADAC
@ _MM_PERM_CCCD
@ _MM_PERM_CADD
@ _MM_PERM_ACCB
@ _MM_PERM_BBDB
@ _MM_PERM_ABBB
@ _MM_PERM_BACB
@ _MM_PERM_CDCA
@ _MM_PERM_ACBC
@ _MM_PERM_ADCB
@ _MM_PERM_BBBC
@ _MM_PERM_DBBA
@ _MM_PERM_BBBB
@ _MM_PERM_DDDB
@ _MM_PERM_CAAA
@ _MM_PERM_ADBB
@ _MM_PERM_ACDB
@ _MM_PERM_DCAD
@ _MM_PERM_DBBC
@ _MM_PERM_BAAB
@ _MM_PERM_BDDD
@ _MM_PERM_BBAD
@ _MM_PERM_DDBA
@ _MM_PERM_CDCD
@ _MM_PERM_CCCA
@ _MM_PERM_DBBB
@ _MM_PERM_DAAD
@ _MM_PERM_DCBA
@ _MM_PERM_CCBC
@ _MM_PERM_ADDD
@ _MM_PERM_DBAC
@ _MM_PERM_ABAB
@ _MM_PERM_CBDB
@ _MM_PERM_CDBC
@ _MM_PERM_AABC
@ _MM_PERM_DABD
@ _MM_PERM_CBBA
@ _MM_PERM_CBAA
@ _MM_PERM_BDDB
@ _MM_PERM_CABC
@ _MM_PERM_BDBD
@ _MM_PERM_BCAD
@ _MM_PERM_ACBA
@ _MM_PERM_ADBA
@ _MM_PERM_ADBC
@ _MM_PERM_DBCB
@ _MM_PERM_CBDC
@ _MM_PERM_CBAD
@ _MM_PERM_ABCC
@ _MM_PERM_AAAD
@ _MM_PERM_CBAC
@ _MM_PERM_CCDA
@ _MM_PERM_CDAC
@ _MM_PERM_BADD
@ _MM_PERM_DAAC
@ _MM_PERM_BCCC
@ _MM_PERM_DBBD
@ _MM_PERM_DDAC
@ _MM_PERM_DACD
@ _MM_PERM_BAAC
@ _MM_PERM_ACCA
@ _MM_PERM_ABDD
@ _MM_PERM_BBCC
@ _MM_PERM_DAAA
@ _MM_PERM_CAAB
@ _MM_PERM_BCDB
@ _MM_PERM_ACBB
@ _MM_PERM_CDAB
@ _MM_PERM_DBDB
@ _MM_PERM_AABB
@ _MM_PERM_DBDA
@ _MM_PERM_BCBA
@ _MM_PERM_CBAB
@ _MM_PERM_DCDC
@ _MM_PERM_BBCB
@ _MM_PERM_CDCB
@ _MM_PERM_AACA
@ _MM_PERM_ACBD
@ _MM_PERM_AAAB
@ _MM_PERM_DCCB
@ _MM_PERM_ADDB
@ _MM_PERM_AAAA
@ _MM_PERM_AACC
@ _MM_PERM_BDDC
@ _MM_PERM_CBBC
@ _MM_PERM_DDCC
@ _MM_PERM_CABD
@ _MM_PERM_AADC
@ _MM_PERM_BCBC
@ _MM_PERM_BCCA
@ _MM_PERM_CCBD
@ _MM_PERM_CBBB
@ _MM_PERM_CDBA
@ _MM_PERM_CACD
@ _MM_PERM_BDAD
@ _MM_PERM_ADCA
@ _MM_PERM_AAAC
@ _MM_PERM_BDDA
@ _MM_PERM_CCAC
@ _MM_PERM_ACDC
@ _MM_PERM_DBCA
@ _MM_PERM_DBAA
@ _MM_PERM_AABD
@ _MM_PERM_CDCC
@ _MM_PERM_DCAA
@ _MM_PERM_DDDC
@ _MM_PERM_CDDB
@ _MM_PERM_AABA
@ _MM_PERM_DDBB
@ _MM_PERM_CDDA
@ _MM_PERM_AADD
@ _MM_PERM_BADC
@ _MM_PERM_BDBA
@ _MM_PERM_DBDD
@ _MM_PERM_BDAC
@ _MM_PERM_DBDC
@ _MM_PERM_BBBA
@ _MM_PERM_DDBC
@ _MM_PERM_BAAA
@ _MM_PERM_BDCC
@ _MM_PERM_DDAB
@ _MM_PERM_BCCB
@ _MM_PERM_BCCD
@ _MM_PERM_ADBD
@ _MM_PERM_ADCC
@ _MM_PERM_CCBB
@ _MM_PERM_CDAA
@ _MM_PERM_BBDA
@ _MM_PERM_CACC
@ _MM_PERM_DCBB
@ _MM_PERM_DABA
@ _MM_PERM_BADB
@ _MM_PERM_ABCA
@ _MM_PERM_CBCC
@ _MM_PERM_ABAD
@ _MM_PERM_BDBC
@ _MM_PERM_DDDA
@ _MM_PERM_ADAB
@ _MM_PERM_CADB
@ _MM_PERM_ADAA
@ _MM_PERM_ACAC
@ _MM_PERM_DADD
@ _MM_PERM_BABD
@ _MM_PERM_ACCD
@ _MM_PERM_CCAA
@ _MM_PERM_AADA
@ _MM_PERM_BDCA
@ _MM_PERM_CDDD
@ _MM_PERM_ABBD
@ _MM_PERM_ACAA
@ _MM_PERM_ACDD
@ _MM_PERM_DABB
@ _MM_PERM_CCCB
@ _MM_PERM_AADB
@ _MM_PERM_DBAD
@ _MM_PERM_BBDD
@ _MM_PERM_BCDC
@ _MM_PERM_CABA
@ _MM_PERM_BBAA
@ _MM_PERM_ADAD
@ _MM_PERM_BADA
@ _MM_PERM_DCDA
@ _MM_PERM_ABBA
@ _MM_PERM_ACAB
@ _MM_PERM_CCDD
@ _MM_PERM_CADC
@ _MM_PERM_DDCB
@ _MM_PERM_BABB
@ _MM_PERM_CCDB
@ _MM_PERM_DDAD
@ _MM_PERM_DBCC
@ _MM_PERM_BCBB
@ _MM_PERM_ADDC
@ _MM_PERM_CCBA
@ _MM_PERM_ABCD
@ _MM_PERM_BCAB
@ _MM_PERM_DCBC
@ _MM_PERM_BCDD
@ _MM_PERM_CCDC
@ _MM_PERM_ABAC
@ _MM_PERM_CBCB
@ _MM_PERM_CCAB
@ _MM_PERM_DDCD
@ _MM_PERM_DACA
@ _MM_PERM_ACAD
@ _MM_PERM_BABA
@ _MM_PERM_CBCD
@ _MM_PERM_CAAD
@ _MM_PERM_DCDD
@ _MM_PERM_BDBB
@ _MM_PERM_BCAA
@ _MM_PERM_ABDC
@ _MM_PERM_BBCD
@ _MM_PERM_CAAC
@ _MM_PERM_BBAC
@ _MM_PERM_CBCA
@ _MM_PERM_DCAC
@ _MM_PERM_ABAA
@ _MM_PERM_CACB
@ _MM_PERM_BBDC
@ _MM_PERM_CDAD
@ _MM_PERM_ADCD
@ _MM_PERM_DADB
@ _MM_PERM_DBCD
@ _MM_PERM_DACC
@ _MM_PERM_DACB
@ _MM_PERM_DCBD
@ _MM_PERM_CACA
@ _MM_PERM_ABBC
@ _MM_PERM_DCCA
@ _MM_PERM_DABC
@ _MM_PERM_CBDD
@ _MM_PERM_DDBD
@ _MM_PERM_DDCA
@ _MM_PERM_BDCD
@ _MM_PERM_CDBD
@ _MM_PERM_ABCB
@ _MM_PERM_CDDC
@ _MM_PERM_AACB
@ _MM_PERM_DDAA
@ _MM_PERM_ADDA
@ _MM_PERM_DADA
@ _MM_PERM_BCDA
@ _MM_PERM_BDAB
@ _MM_PERM_BAAD
@ _MM_PERM_DBAB
@ _MM_PERM_DCCD
@ _MM_PERM_CABB
@ _MM_PERM_BDAA
@ _MM_PERM_BDCB
@ _MM_PERM_ACDA
@ _MM_PERM_DADC
@ _MM_PERM_CBDA
@ _MM_PERM_BCAC
@ _MM_PERM_BACA
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi32(int __s)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_pd(double e0, double e1, double e2, double e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastss_ps(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m128d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd512_pd128(__m512d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
unsigned short __mmask16
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kmov(__mmask16 __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_si512(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps_si512(__m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_ps(float e0, float e1, float e2, float e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi16(short __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_moveldup_ps(__m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd_ps(__m512d __A)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition avxintrin.h:3616
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition avxintrin.h:4287
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition avxintrin.h:4275
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4299
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
Definition emmintrin.h:120
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
Definition emmintrin.h:199
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
Definition emmintrin.h:80
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3878
static __inline__ void int __a
Definition emmintrin.h:4077
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3493
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
Definition emmintrin.h:1887
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition emmintrin.h:1867
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
Definition emmintrin.h:159
static __inline__ void short __D
Definition immintrin.h:342
static __inline__ void const void * __src
__inline unsigned int unsigned int unsigned int * __P
Definition bmi2intrin.h:25
__inline unsigned int unsigned int __Y
Definition bmi2intrin.h:19
#define _MM_FROUND_FLOOR
Definition smmintrin.h:41
#define _MM_FROUND_CEIL
Definition smmintrin.h:42
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
Definition xmmintrin.h:2797
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:160
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
Definition xmmintrin.h:119
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:79
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:2014
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
Definition xmmintrin.h:200