10#error "Never use <avx10_2niintrin.h> directly; include <immintrin.h> instead." 
   15#ifndef __AVX10_2NIINTRIN_H 
   16#define __AVX10_2NIINTRIN_H 
   18#define __DEFAULT_FN_ATTRS128                                                  \ 
   19  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"),        \ 
   20                 __min_vector_width__(128))) 
   21#define __DEFAULT_FN_ATTRS256                                                  \ 
   22  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"),        \ 
   23                 __min_vector_width__(256))) 
   29  return (__m128)__builtin_ia32_vdpphps128((__v4sf)__W, (__v8hf)__A,
 
   37  return (__m128)__builtin_ia32_selectps_128(
 
   38      (
__mmask8)__U, (__v4sf)_mm_dpph_ps(__W, __A, __B), (__v4sf)__W);
 
   45  return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
 
   46                                             (__v4sf)_mm_dpph_ps(__W, __A, __B),
 
   53  return (__m256)__builtin_ia32_vdpphps256((__v8sf)__W, (__v16hf)__A,
 
   58_mm256_mask_dpph_ps(__m256 __W, 
__mmask8 __U, __m256h __A, __m256h __B) {
 
   59  return (__m256)__builtin_ia32_selectps_256(
 
   60      (
__mmask8)__U, (__v8sf)_mm256_dpph_ps(__W, __A, __B), (__v8sf)__W);
 
   64_mm256_maskz_dpph_ps(
__mmask8 __U, __m256 __W, __m256h __A, __m256h __B) {
 
   65  return (__m256)__builtin_ia32_selectps_256(
 
   66      (
__mmask8)__U, (__v8sf)_mm256_dpph_ps(__W, __A, __B),
 
   71#define _mm_mask_mpsadbw_epu8(W, U, A, B, imm)                                 \ 
   72  ((__m128i)__builtin_ia32_selectw_128(                                        \ 
   73      (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)),                \ 
   74      (__v8hi)(__m128i)(W))) 
   76#define _mm_maskz_mpsadbw_epu8(U, A, B, imm)                                   \ 
   77  ((__m128i)__builtin_ia32_selectw_128(                                        \ 
   78      (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)),                \ 
   79      (__v8hi)_mm_setzero_si128())) 
   81#define _mm256_mask_mpsadbw_epu8(W, U, A, B, imm)                              \ 
   82  ((__m256i)__builtin_ia32_selectw_256(                                        \ 
   83      (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)),           \ 
   84      (__v16hi)(__m256i)(W))) 
   86#define _mm256_maskz_mpsadbw_epu8(U, A, B, imm)                                \ 
   87  ((__m256i)__builtin_ia32_selectw_256(                                        \ 
   88      (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)),           \ 
   89      (__v16hi)_mm256_setzero_si256())) 
   93_mm_mask_dpbssd_epi32(__m128i __W, 
__mmask8 __U, __m128i __A, __m128i __B) {
 
   94  return (__m128i)__builtin_ia32_selectd_128(
 
   99_mm_maskz_dpbssd_epi32(
__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
 
  100  return (__m128i)__builtin_ia32_selectd_128(
 
  106_mm256_mask_dpbssd_epi32(__m256i __W, 
__mmask8 __U, __m256i __A, __m256i __B) {
 
  107  return (__m256i)__builtin_ia32_selectd_256(
 
  112_mm256_maskz_dpbssd_epi32(
__mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
 
  113  return (__m256i)__builtin_ia32_selectd_256(
 
  119_mm_mask_dpbssds_epi32(__m128i __W, 
__mmask8 __U, __m128i __A, __m128i __B) {
 
  120  return (__m128i)__builtin_ia32_selectd_128(
 
  125_mm_maskz_dpbssds_epi32(
__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
 
  126  return (__m128i)__builtin_ia32_selectd_128(
 
  132_mm256_mask_dpbssds_epi32(__m256i __W, 
__mmask8 __U, __m256i __A, __m256i __B) {
 
  133  return (__m256i)__builtin_ia32_selectd_256(
 
  138    __mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
 
  139  return (__m256i)__builtin_ia32_selectd_256(
 
  145_mm_mask_dpbsud_epi32(__m128i __W, 
__mmask8 __U, __m128i __A, __m128i __B) {
 
  146  return (__m128i)__builtin_ia32_selectd_128(
 
  151_mm_maskz_dpbsud_epi32(
__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
 
  152  return (__m128i)__builtin_ia32_selectd_128(
 
  158_mm256_mask_dpbsud_epi32(__m256i __W, 
__mmask8 __U, __m256i __A, __m256i __B) {
 
  159  return (__m256i)__builtin_ia32_selectd_256(
 
  164_mm256_maskz_dpbsud_epi32(
__mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
 
  165  return (__m256i)__builtin_ia32_selectd_256(
 
  171_mm_mask_dpbsuds_epi32(__m128i __W, 
__mmask8 __U, __m128i __A, __m128i __B) {
 
  172  return (__m128i)__builtin_ia32_selectd_128(
 
  177_mm_maskz_dpbsuds_epi32(
__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
 
  178  return (__m128i)__builtin_ia32_selectd_128(
 
  184_mm256_mask_dpbsuds_epi32(__m256i __W, 
__mmask8 __U, __m256i __A, __m256i __B) {
 
  185  return (__m256i)__builtin_ia32_selectd_256(
 
  190    __mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
 
  191  return (__m256i)__builtin_ia32_selectd_256(
 
  197_mm_mask_dpbuud_epi32(__m128i __W, 
__mmask8 __U, __m128i __A, __m128i __B) {
 
  198  return (__m128i)__builtin_ia32_selectd_128(
 
  203_mm_maskz_dpbuud_epi32(
__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
 
  204  return (__m128i)__builtin_ia32_selectd_128(
 
  210_mm256_mask_dpbuud_epi32(__m256i __W, 
__mmask8 __U, __m256i __A, __m256i __B) {
 
  211  return (__m256i)__builtin_ia32_selectd_256(
 
  216_mm256_maskz_dpbuud_epi32(
__mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
 
  217  return (__m256i)__builtin_ia32_selectd_256(
 
  223_mm_mask_dpbuuds_epi32(__m128i __W, 
__mmask8 __U, __m128i __A, __m128i __B) {
 
  224  return (__m128i)__builtin_ia32_selectd_128(
 
  229_mm_maskz_dpbuuds_epi32(
__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
 
  230  return (__m128i)__builtin_ia32_selectd_128(
 
  236_mm256_mask_dpbuuds_epi32(__m256i __W, 
__mmask8 __U, __m256i __A, __m256i __B) {
 
  237  return (__m256i)__builtin_ia32_selectd_256(
 
  242    __mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
 
  243  return (__m256i)__builtin_ia32_selectd_256(
 
  250_mm_mask_dpwsud_epi32(__m128i __A, 
__mmask8 __U, __m128i __B, __m128i __C) {
 
  251  return (__m128i)__builtin_ia32_selectd_128(
 
  256_mm_maskz_dpwsud_epi32(
__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 
  257  return (__m128i)__builtin_ia32_selectd_128(
 
  263_mm256_mask_dpwsud_epi32(__m256i __A, 
__mmask8 __U, __m256i __B, __m256i __C) {
 
  264  return (__m256i)__builtin_ia32_selectd_256(
 
  269_mm256_maskz_dpwsud_epi32(
__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 
  270  return (__m256i)__builtin_ia32_selectd_256(
 
  276_mm_mask_dpwsuds_epi32(__m128i __A, 
__mmask8 __U, __m128i __B, __m128i __C) {
 
  277  return (__m128i)__builtin_ia32_selectd_128(
 
  282_mm_maskz_dpwsuds_epi32(
__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 
  283  return (__m128i)__builtin_ia32_selectd_128(
 
  289_mm256_mask_dpwsuds_epi32(__m256i __A, 
__mmask8 __U, __m256i __B, __m256i __C) {
 
  290  return (__m256i)__builtin_ia32_selectd_256(
 
  295    __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 
  296  return (__m256i)__builtin_ia32_selectd_256(
 
  302_mm_mask_dpwusd_epi32(__m128i __A, 
__mmask8 __U, __m128i __B, __m128i __C) {
 
  303  return (__m128i)__builtin_ia32_selectd_128(
 
  308_mm_maskz_dpwusd_epi32(
__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 
  309  return (__m128i)__builtin_ia32_selectd_128(
 
  315_mm256_mask_dpwusd_epi32(__m256i __A, 
__mmask8 __U, __m256i __B, __m256i __C) {
 
  316  return (__m256i)__builtin_ia32_selectd_256(
 
  321_mm256_maskz_dpwusd_epi32(
__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 
  322  return (__m256i)__builtin_ia32_selectd_256(
 
  328_mm_mask_dpwusds_epi32(__m128i __A, 
__mmask8 __U, __m128i __B, __m128i __C) {
 
  329  return (__m128i)__builtin_ia32_selectd_128(
 
  334_mm_maskz_dpwusds_epi32(
__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 
  335  return (__m128i)__builtin_ia32_selectd_128(
 
  341_mm256_mask_dpwusds_epi32(__m256i __A, 
__mmask8 __U, __m256i __B, __m256i __C) {
 
  342  return (__m256i)__builtin_ia32_selectd_256(
 
  347    __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 
  348  return (__m256i)__builtin_ia32_selectd_256(
 
  354_mm_mask_dpwuud_epi32(__m128i __A, 
__mmask8 __U, __m128i __B, __m128i __C) {
 
  355  return (__m128i)__builtin_ia32_selectd_128(
 
  360_mm_maskz_dpwuud_epi32(
__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 
  361  return (__m128i)__builtin_ia32_selectd_128(
 
  367_mm256_mask_dpwuud_epi32(__m256i __A, 
__mmask8 __U, __m256i __B, __m256i __C) {
 
  368  return (__m256i)__builtin_ia32_selectd_256(
 
  373_mm256_maskz_dpwuud_epi32(
__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 
  374  return (__m256i)__builtin_ia32_selectd_256(
 
  380_mm_mask_dpwuuds_epi32(__m128i __A, 
__mmask8 __U, __m128i __B, __m128i __C) {
 
  381  return (__m128i)__builtin_ia32_selectd_128(
 
  386_mm_maskz_dpwuuds_epi32(
__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 
  387  return (__m128i)__builtin_ia32_selectd_128(
 
  393_mm256_mask_dpwuuds_epi32(__m256i __A, 
__mmask8 __U, __m256i __B, __m256i __C) {
 
  394  return (__m256i)__builtin_ia32_selectd_256(
 
  399    __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 
  400  return (__m256i)__builtin_ia32_selectd_256(
 
  405#undef __DEFAULT_FN_ATTRS256 
  406#undef __DEFAULT_FN_ATTRS128 
#define __DEFAULT_FN_ATTRS128
 
#define __DEFAULT_FN_ATTRS256
 
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
 
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
 
#define _mm256_dpwuuds_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16...
 
#define _mm256_dpwsuds_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...
 
#define _mm_dpwusd_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-b...
 
#define _mm256_dpwusds_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-b...
 
#define _mm_dpwsud_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...
 
#define _mm256_dpwsud_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...
 
#define _mm_dpwusds_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-b...
 
#define _mm_dpwuud_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16...
 
#define _mm_dpwsuds_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...
 
#define _mm256_dpwusd_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-b...
 
#define _mm_dpwuuds_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16...
 
#define _mm256_dpwuud_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16...
 
#define _mm_dpbuuds_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in __A with corresponding unsigned 8-b...
 
#define _mm256_dpbuuds_epi32(__W, __A, __B)
corresponding unsigned 8-bit integers in __B, producing 4 intermediate signed 16-bit results.
 
#define _mm256_dpbssd_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding signed 8-bit i...
 
#define _mm_dpbsud_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding unsigned 8-bit...
 
#define _mm256_dpbuud_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in __A with corresponding unsigned 8-b...
 
#define _mm256_dpbsud_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding unsigned 8-bit...
 
#define _mm256_dpbssds_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding signed 8-bit i...
 
#define _mm_dpbssd_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding signed 8-bit i...
 
#define _mm_dpbssds_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding signed 8-bit i...
 
#define _mm256_dpbsuds_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding unsigned 8-bit...
 
#define _mm_dpbuud_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in __A with corresponding unsigned 8-b...
 
#define _mm_dpbsuds_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding unsigned 8-bit...
 
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
 
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.