12    "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead." 
   15#ifndef __IFMAVLINTRIN_H 
   16#define __IFMAVLINTRIN_H 
   19#if defined(__cplusplus) && (__cplusplus >= 201103L) 
   20#define __DEFAULT_FN_ATTRS128                                                  \ 
   21  constexpr __attribute__((__always_inline__, __nodebug__,                     \ 
   22                           __target__("avx512ifma,avx512vl"),                  \ 
   23                           __min_vector_width__(128))) 
   24#define __DEFAULT_FN_ATTRS256                                                  \ 
   25  constexpr __attribute__((__always_inline__, __nodebug__,                     \ 
   26                           __target__("avx512ifma,avx512vl"),                  \ 
   27                           __min_vector_width__(256))) 
   29#define __DEFAULT_FN_ATTRS128                                                  \ 
   30  __attribute__((__always_inline__, __nodebug__,                               \ 
   31                 __target__("avx512ifma,avx512vl"),                            \ 
   32                 __min_vector_width__(128))) 
 
   33#define __DEFAULT_FN_ATTRS256                                                  \ 
   34  __attribute__((__always_inline__, __nodebug__,                               \ 
   35                 __target__("avx512ifma,avx512vl"),                            \ 
   36                 __min_vector_width__(256))) 
 
   40#if !(defined(__AVXIFMA__) || defined(__AVX512IFMA__)) 
   41#define _mm_madd52hi_epu64(X, Y, Z)                                            \ 
   42  ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y),            \ 
 
   45#define _mm256_madd52hi_epu64(X, Y, Z)                                         \ 
   46  ((__m256i)__builtin_ia32_vpmadd52huq256((__v4di)(X), (__v4di)(Y),            \ 
 
   49#define _mm_madd52lo_epu64(X, Y, Z)                                            \ 
   50  ((__m128i)__builtin_ia32_vpmadd52luq128((__v2di)(X), (__v2di)(Y),            \ 
 
   53#define _mm256_madd52lo_epu64(X, Y, Z)                                         \ 
   54  ((__m256i)__builtin_ia32_vpmadd52luq256((__v4di)(X), (__v4di)(Y),            \ 
 
   58#if defined(__AVX512IFMA__) 
   61  return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)
__Y,
 
   67  return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)
__Y,
 
   73  return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)
__Y,
 
   79  return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)
__Y,
 
   86  return (__m128i)__builtin_ia32_selectq_128(
 
   87      __M, (__v2di)__builtin_ia32_vpmadd52huq128(__W, __X, 
__Y), (__v2di)__W);
 
 
   92  return (__m128i)__builtin_ia32_selectq_128(
 
   93      __M, (__v2di)__builtin_ia32_vpmadd52huq128(__X, 
__Y, __Z),
 
 
   98    __m256i __W, 
__mmask8 __M, __m256i __X, __m256i 
__Y) {
 
   99  return (__m256i)__builtin_ia32_selectq_256(
 
  100      __M, (__v4di)__builtin_ia32_vpmadd52huq256(__W, __X, 
__Y), (__v4di)__W);
 
 
  104    __mmask8 __M, __m256i __X, __m256i 
__Y, __m256i __Z) {
 
  105  return (__m256i)__builtin_ia32_selectq_256(
 
  106      __M, (__v4di)__builtin_ia32_vpmadd52huq256(__X, 
__Y, __Z),
 
 
  112  return (__m128i)__builtin_ia32_selectq_128(
 
  113      __M, (__v2di)__builtin_ia32_vpmadd52luq128(__W, __X, 
__Y), (__v2di)__W);
 
 
  118  return (__m128i)__builtin_ia32_selectq_128(
 
  119      __M, (__v2di)__builtin_ia32_vpmadd52luq128(__X, 
__Y, __Z),
 
 
  124    __m256i __W, 
__mmask8 __M, __m256i __X, __m256i 
__Y) {
 
  125  return (__m256i)__builtin_ia32_selectq_256(
 
  126      __M, (__v4di)__builtin_ia32_vpmadd52luq256(__W, __X, 
__Y), (__v4di)__W);
 
 
  130    __mmask8 __M, __m256i __X, __m256i 
__Y, __m256i __Z) {
 
  131  return (__m256i)__builtin_ia32_selectq_256(
 
  132      __M, (__v4di)__builtin_ia32_vpmadd52luq256(__X, 
__Y, __Z),
 
 
  136#undef __DEFAULT_FN_ATTRS128 
  137#undef __DEFAULT_FN_ATTRS256 
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
#define _mm256_madd52lo_epu64(X, Y, Z)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
#define _mm_madd52hi_epu64(X, Y, Z)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
#define _mm_madd52lo_epu64(X, Y, Z)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
#define _mm256_madd52hi_epu64(X, Y, Z)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
__inline unsigned int unsigned int __Y