10#error "Never use <avx10_2niintrin.h> directly; include <immintrin.h> instead."
15#ifndef __AVX10_2NIINTRIN_H
16#define __AVX10_2NIINTRIN_H
18#define __DEFAULT_FN_ATTRS128 \
19 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \
20 __min_vector_width__(128)))
21#define __DEFAULT_FN_ATTRS256 \
22 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \
23 __min_vector_width__(256)))
29 return (__m128)__builtin_ia32_vdpphps128((__v4sf)__W, (__v8hf)__A,
37 return (__m128)__builtin_ia32_selectps_128(
38 (
__mmask8)__U, (__v4sf)_mm_dpph_ps(__W, __A, __B), (__v4sf)__W);
45 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
46 (__v4sf)_mm_dpph_ps(__W, __A, __B),
53 return (__m256)__builtin_ia32_vdpphps256((__v8sf)__W, (__v16hf)__A,
58_mm256_mask_dpph_ps(__m256 __W,
__mmask8 __U, __m256h __A, __m256h __B) {
59 return (__m256)__builtin_ia32_selectps_256(
60 (
__mmask8)__U, (__v8sf)_mm256_dpph_ps(__W, __A, __B), (__v8sf)__W);
64_mm256_maskz_dpph_ps(
__mmask8 __U, __m256 __W, __m256h __A, __m256h __B) {
65 return (__m256)__builtin_ia32_selectps_256(
66 (
__mmask8)__U, (__v8sf)_mm256_dpph_ps(__W, __A, __B),
71#define _mm_mask_mpsadbw_epu8(W, U, A, B, imm) \
72 ((__m128i)__builtin_ia32_selectw_128( \
73 (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)), \
74 (__v8hi)(__m128i)(W)))
76#define _mm_maskz_mpsadbw_epu8(U, A, B, imm) \
77 ((__m128i)__builtin_ia32_selectw_128( \
78 (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)), \
79 (__v8hi)_mm_setzero_si128()))
81#define _mm256_mask_mpsadbw_epu8(W, U, A, B, imm) \
82 ((__m256i)__builtin_ia32_selectw_256( \
83 (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)), \
84 (__v16hi)(__m256i)(W)))
86#define _mm256_maskz_mpsadbw_epu8(U, A, B, imm) \
87 ((__m256i)__builtin_ia32_selectw_256( \
88 (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)), \
89 (__v16hi)_mm256_setzero_si256()))
93_mm_mask_dpbssd_epi32(__m128i __W,
__mmask8 __U, __m128i __A, __m128i __B) {
94 return (__m128i)__builtin_ia32_selectd_128(
99_mm_maskz_dpbssd_epi32(
__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
100 return (__m128i)__builtin_ia32_selectd_128(
106_mm256_mask_dpbssd_epi32(__m256i __W,
__mmask8 __U, __m256i __A, __m256i __B) {
107 return (__m256i)__builtin_ia32_selectd_256(
112_mm256_maskz_dpbssd_epi32(
__mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
113 return (__m256i)__builtin_ia32_selectd_256(
119_mm_mask_dpbssds_epi32(__m128i __W,
__mmask8 __U, __m128i __A, __m128i __B) {
120 return (__m128i)__builtin_ia32_selectd_128(
125_mm_maskz_dpbssds_epi32(
__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
126 return (__m128i)__builtin_ia32_selectd_128(
132_mm256_mask_dpbssds_epi32(__m256i __W,
__mmask8 __U, __m256i __A, __m256i __B) {
133 return (__m256i)__builtin_ia32_selectd_256(
138 __mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
139 return (__m256i)__builtin_ia32_selectd_256(
145_mm_mask_dpbsud_epi32(__m128i __W,
__mmask8 __U, __m128i __A, __m128i __B) {
146 return (__m128i)__builtin_ia32_selectd_128(
151_mm_maskz_dpbsud_epi32(
__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
152 return (__m128i)__builtin_ia32_selectd_128(
158_mm256_mask_dpbsud_epi32(__m256i __W,
__mmask8 __U, __m256i __A, __m256i __B) {
159 return (__m256i)__builtin_ia32_selectd_256(
164_mm256_maskz_dpbsud_epi32(
__mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
165 return (__m256i)__builtin_ia32_selectd_256(
171_mm_mask_dpbsuds_epi32(__m128i __W,
__mmask8 __U, __m128i __A, __m128i __B) {
172 return (__m128i)__builtin_ia32_selectd_128(
177_mm_maskz_dpbsuds_epi32(
__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
178 return (__m128i)__builtin_ia32_selectd_128(
184_mm256_mask_dpbsuds_epi32(__m256i __W,
__mmask8 __U, __m256i __A, __m256i __B) {
185 return (__m256i)__builtin_ia32_selectd_256(
190 __mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
191 return (__m256i)__builtin_ia32_selectd_256(
197_mm_mask_dpbuud_epi32(__m128i __W,
__mmask8 __U, __m128i __A, __m128i __B) {
198 return (__m128i)__builtin_ia32_selectd_128(
203_mm_maskz_dpbuud_epi32(
__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
204 return (__m128i)__builtin_ia32_selectd_128(
210_mm256_mask_dpbuud_epi32(__m256i __W,
__mmask8 __U, __m256i __A, __m256i __B) {
211 return (__m256i)__builtin_ia32_selectd_256(
216_mm256_maskz_dpbuud_epi32(
__mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
217 return (__m256i)__builtin_ia32_selectd_256(
223_mm_mask_dpbuuds_epi32(__m128i __W,
__mmask8 __U, __m128i __A, __m128i __B) {
224 return (__m128i)__builtin_ia32_selectd_128(
229_mm_maskz_dpbuuds_epi32(
__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) {
230 return (__m128i)__builtin_ia32_selectd_128(
236_mm256_mask_dpbuuds_epi32(__m256i __W,
__mmask8 __U, __m256i __A, __m256i __B) {
237 return (__m256i)__builtin_ia32_selectd_256(
242 __mmask8 __U, __m256i __W, __m256i __A, __m256i __B) {
243 return (__m256i)__builtin_ia32_selectd_256(
250_mm_mask_dpwsud_epi32(__m128i __A,
__mmask8 __U, __m128i __B, __m128i __C) {
251 return (__m128i)__builtin_ia32_selectd_128(
256_mm_maskz_dpwsud_epi32(
__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
257 return (__m128i)__builtin_ia32_selectd_128(
263_mm256_mask_dpwsud_epi32(__m256i __A,
__mmask8 __U, __m256i __B, __m256i __C) {
264 return (__m256i)__builtin_ia32_selectd_256(
269_mm256_maskz_dpwsud_epi32(
__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
270 return (__m256i)__builtin_ia32_selectd_256(
276_mm_mask_dpwsuds_epi32(__m128i __A,
__mmask8 __U, __m128i __B, __m128i __C) {
277 return (__m128i)__builtin_ia32_selectd_128(
282_mm_maskz_dpwsuds_epi32(
__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
283 return (__m128i)__builtin_ia32_selectd_128(
289_mm256_mask_dpwsuds_epi32(__m256i __A,
__mmask8 __U, __m256i __B, __m256i __C) {
290 return (__m256i)__builtin_ia32_selectd_256(
295 __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
296 return (__m256i)__builtin_ia32_selectd_256(
302_mm_mask_dpwusd_epi32(__m128i __A,
__mmask8 __U, __m128i __B, __m128i __C) {
303 return (__m128i)__builtin_ia32_selectd_128(
308_mm_maskz_dpwusd_epi32(
__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
309 return (__m128i)__builtin_ia32_selectd_128(
315_mm256_mask_dpwusd_epi32(__m256i __A,
__mmask8 __U, __m256i __B, __m256i __C) {
316 return (__m256i)__builtin_ia32_selectd_256(
321_mm256_maskz_dpwusd_epi32(
__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
322 return (__m256i)__builtin_ia32_selectd_256(
328_mm_mask_dpwusds_epi32(__m128i __A,
__mmask8 __U, __m128i __B, __m128i __C) {
329 return (__m128i)__builtin_ia32_selectd_128(
334_mm_maskz_dpwusds_epi32(
__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
335 return (__m128i)__builtin_ia32_selectd_128(
341_mm256_mask_dpwusds_epi32(__m256i __A,
__mmask8 __U, __m256i __B, __m256i __C) {
342 return (__m256i)__builtin_ia32_selectd_256(
347 __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
348 return (__m256i)__builtin_ia32_selectd_256(
354_mm_mask_dpwuud_epi32(__m128i __A,
__mmask8 __U, __m128i __B, __m128i __C) {
355 return (__m128i)__builtin_ia32_selectd_128(
360_mm_maskz_dpwuud_epi32(
__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
361 return (__m128i)__builtin_ia32_selectd_128(
367_mm256_mask_dpwuud_epi32(__m256i __A,
__mmask8 __U, __m256i __B, __m256i __C) {
368 return (__m256i)__builtin_ia32_selectd_256(
373_mm256_maskz_dpwuud_epi32(
__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
374 return (__m256i)__builtin_ia32_selectd_256(
380_mm_mask_dpwuuds_epi32(__m128i __A,
__mmask8 __U, __m128i __B, __m128i __C) {
381 return (__m128i)__builtin_ia32_selectd_128(
386_mm_maskz_dpwuuds_epi32(
__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
387 return (__m128i)__builtin_ia32_selectd_128(
393_mm256_mask_dpwuuds_epi32(__m256i __A,
__mmask8 __U, __m256i __B, __m256i __C) {
394 return (__m256i)__builtin_ia32_selectd_256(
399 __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
400 return (__m256i)__builtin_ia32_selectd_256(
405#undef __DEFAULT_FN_ATTRS256
406#undef __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
#define _mm256_dpwuuds_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16...
#define _mm256_dpwsuds_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...
#define _mm_dpwusd_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-b...
#define _mm256_dpwusds_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-b...
#define _mm_dpwsud_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...
#define _mm256_dpwsud_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...
#define _mm_dpwusds_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-b...
#define _mm_dpwuud_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16...
#define _mm_dpwsuds_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...
#define _mm256_dpwusd_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-b...
#define _mm_dpwuuds_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16...
#define _mm256_dpwuud_epi32(__W, __A, __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16...
#define _mm_dpbuuds_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in __A with corresponding unsigned 8-b...
#define _mm256_dpbuuds_epi32(__W, __A, __B)
corresponding unsigned 8-bit integers in __B, producing 4 intermediate signed 16-bit results.
#define _mm256_dpbssd_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding signed 8-bit i...
#define _mm_dpbsud_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding unsigned 8-bit...
#define _mm256_dpbuud_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in __A with corresponding unsigned 8-b...
#define _mm256_dpbsud_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding unsigned 8-bit...
#define _mm256_dpbssds_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding signed 8-bit i...
#define _mm_dpbssd_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding signed 8-bit i...
#define _mm_dpbssds_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding signed 8-bit i...
#define _mm256_dpbsuds_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding unsigned 8-bit...
#define _mm_dpbuud_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in __A with corresponding unsigned 8-b...
#define _mm_dpbsuds_epi32(__W, __A, __B)
Multiply groups of 4 adjacent pairs of signed 8-bit integers in __A with corresponding unsigned 8-bit...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.