12 "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
15#ifndef __IFMAVLINTRIN_H
16#define __IFMAVLINTRIN_H
19#if defined(__cplusplus) && (__cplusplus >= 201103L)
20#define __DEFAULT_FN_ATTRS128 \
21 __attribute__((__always_inline__, __nodebug__, \
22 __target__("avx512ifma,avx512vl"), \
23 __min_vector_width__(128))) constexpr
24#define __DEFAULT_FN_ATTRS256 \
25 __attribute__((__always_inline__, __nodebug__, \
26 __target__("avx512ifma,avx512vl"), \
27 __min_vector_width__(256))) constexpr
29#define __DEFAULT_FN_ATTRS128 \
30 __attribute__((__always_inline__, __nodebug__, \
31 __target__("avx512ifma,avx512vl"), \
32 __min_vector_width__(128)))
33#define __DEFAULT_FN_ATTRS256 \
34 __attribute__((__always_inline__, __nodebug__, \
35 __target__("avx512ifma,avx512vl"), \
36 __min_vector_width__(256)))
39#if !(defined(__AVXIFMA__) || defined(__AVX512IFMA__))
40#define _mm_madd52hi_epu64(X, Y, Z) \
41 ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \
44#define _mm256_madd52hi_epu64(X, Y, Z) \
45 ((__m256i)__builtin_ia32_vpmadd52huq256((__v4di)(X), (__v4di)(Y), \
48#define _mm_madd52lo_epu64(X, Y, Z) \
49 ((__m128i)__builtin_ia32_vpmadd52luq128((__v2di)(X), (__v2di)(Y), \
52#define _mm256_madd52lo_epu64(X, Y, Z) \
53 ((__m256i)__builtin_ia32_vpmadd52luq256((__v4di)(X), (__v4di)(Y), \
57#if defined(__AVX512IFMA__)
60 return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)
__Y,
66 return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)
__Y,
72 return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)
__Y,
78 return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)
__Y,
85 return (__m128i)__builtin_ia32_selectq_128(
86 __M, (__v2di)__builtin_ia32_vpmadd52huq128(__W, __X,
__Y), (__v2di)__W);
91 return (__m128i)__builtin_ia32_selectq_128(
92 __M, (__v2di)__builtin_ia32_vpmadd52huq128(__X,
__Y, __Z),
97 __m256i __W,
__mmask8 __M, __m256i __X, __m256i
__Y) {
98 return (__m256i)__builtin_ia32_selectq_256(
99 __M, (__v4di)__builtin_ia32_vpmadd52huq256(__W, __X,
__Y), (__v4di)__W);
103 __mmask8 __M, __m256i __X, __m256i
__Y, __m256i __Z) {
104 return (__m256i)__builtin_ia32_selectq_256(
105 __M, (__v4di)__builtin_ia32_vpmadd52huq256(__X,
__Y, __Z),
111 return (__m128i)__builtin_ia32_selectq_128(
112 __M, (__v2di)__builtin_ia32_vpmadd52luq128(__W, __X,
__Y), (__v2di)__W);
117 return (__m128i)__builtin_ia32_selectq_128(
118 __M, (__v2di)__builtin_ia32_vpmadd52luq128(__X,
__Y, __Z),
123 __m256i __W,
__mmask8 __M, __m256i __X, __m256i
__Y) {
124 return (__m256i)__builtin_ia32_selectq_256(
125 __M, (__v4di)__builtin_ia32_vpmadd52luq256(__W, __X,
__Y), (__v4di)__W);
129 __mmask8 __M, __m256i __X, __m256i
__Y, __m256i __Z) {
130 return (__m256i)__builtin_ia32_selectq_256(
131 __M, (__v4di)__builtin_ia32_vpmadd52luq256(__X,
__Y, __Z),
135#undef __DEFAULT_FN_ATTRS128
136#undef __DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
#define _mm256_madd52lo_epu64(X, Y, Z)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
#define _mm_madd52hi_epu64(X, Y, Z)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
#define _mm_madd52lo_epu64(X, Y, Z)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
#define _mm256_madd52hi_epu64(X, Y, Z)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
__inline unsigned int unsigned int __Y