clang 19.0.0git
Macros | Functions
avxvnniint16intrin.h File Reference

Go to the source code of this file.

Macros

#define __DEFAULT_FN_ATTRS128
 
#define __DEFAULT_FN_ATTRS256
 

Functions

static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsud_epi32 (__m128i __W, __m128i __A, __m128i __B)
 Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.
 
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwsud_epi32 (__m256i __W, __m256i __A, __m256i __B)
 Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.
 
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsuds_epi32 (__m128i __W, __m128i __A, __m128i __B)
 Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.
 
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwsuds_epi32 (__m256i __W, __m256i __A, __m256i __B)
 Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.
 
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwusd_epi32 (__m128i __W, __m128i __A, __m128i __B)
 Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-bit integers in __B, producing 2 intermediate signed 16-bit results.
 
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwusd_epi32 (__m256i __W, __m256i __A, __m256i __B)
 Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-bit integers in __B, producing 2 intermediate signed 16-bit results.
 
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwusds_epi32 (__m128i __W, __m128i __A, __m128i __B)
 Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-bit integers in __B, producing 2 intermediate signed 16-bit results.
 
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwusds_epi32 (__m256i __W, __m256i __A, __m256i __B)
 Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-bit integers in __B, producing 2 intermediate signed 16-bit results.
 
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwuud_epi32 (__m128i __W, __m128i __A, __m128i __B)
 Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.
 
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwuud_epi32 (__m256i __W, __m256i __A, __m256i __B)
 Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.
 
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwuuds_epi32 (__m128i __W, __m128i __A, __m128i __B)
 Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.
 
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwuuds_epi32 (__m256i __W, __m256i __A, __m256i __B)
 Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.
 

Macro Definition Documentation

◆ __DEFAULT_FN_ATTRS128

#define __DEFAULT_FN_ATTRS128
Value:
__attribute__((__always_inline__, __nodebug__, __target__("avxvnniint16"), \
__min_vector_width__(128)))
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.

Definition at line 19 of file avxvnniint16intrin.h.

◆ __DEFAULT_FN_ATTRS256

#define __DEFAULT_FN_ATTRS256
Value:
__attribute__((__always_inline__, __nodebug__, __target__("avxvnniint16"), \
__min_vector_width__(256)))

Definition at line 22 of file avxvnniint16intrin.h.

Function Documentation

◆ _mm256_dpwsud_epi32()

static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwsud_epi32 ( __m256i  __W,
__m256i  __A,
__m256i  __B 
)
static

Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.

Sum these 2 results with the corresponding 32-bit integer in __W, and store the packed 32-bit results in dst.

__m256i _mm256_dpwsud_epi32(__m256i __W, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwsud_epi32(__m256i __W, __m256i __A, __m256i __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...

This intrinsic corresponds to the VPDPWSUD instruction.

Parameters
__WA 256-bit vector of [8 x int].
__AA 256-bit vector of [16 x short].
__BA 256-bit vector of [16 x unsigned short].
Returns
A 256-bit vector of [8 x int].
FOR j := 0 to 7
tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
dst.dword[j] := __W.dword[j] + tmp1 + tmp2
ENDFOR
dst[MAX:256] := 0

Definition at line 94 of file avxvnniint16intrin.h.

◆ _mm256_dpwsuds_epi32()

static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwsuds_epi32 ( __m256i  __W,
__m256i  __A,
__m256i  __B 
)
static

Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.

Sum these 2 results with the corresponding 32-bit integer in __W with signed saturation, and store the packed 32-bit results in dst.

__m256i _mm256_dpwsuds_epi32(__m256i __W, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwsuds_epi32(__m256i __W, __m256i __A, __m256i __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...

This intrinsic corresponds to the VPDPWSUDS instruction.

Parameters
__WA 256-bit vector of [8 x int].
__AA 256-bit vector of [16 x short].
__BA 256-bit vector of [16 x unsigned short].
Returns
A 256-bit vector of [8 x int].
FOR j := 0 to 7
tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
ENDFOR
dst[MAX:256] := 0

Definition at line 169 of file avxvnniint16intrin.h.

◆ _mm256_dpwusd_epi32()

static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwusd_epi32 ( __m256i  __W,
__m256i  __A,
__m256i  __B 
)
static

Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-bit integers in __B, producing 2 intermediate signed 16-bit results.

Sum these 2 results with the corresponding 32-bit integer in __W, and store the packed 32-bit results in dst.

__m256i _mm256_dpwusd_epi32(__m256i __W, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwusd_epi32(__m256i __W, __m256i __A, __m256i __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-b...

This intrinsic corresponds to the VPDPWUSD instruction.

Parameters
__WA 256-bit vector of [8 x int].
__AA 256-bit vector of [16 x unsigned short].
__BA 256-bit vector of [16 x short].
Returns
A 256-bit vector of [8 x int].
FOR j := 0 to 7
tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
dst.dword[j] := __W.dword[j] + tmp1 + tmp2
ENDFOR
dst[MAX:256] := 0

Definition at line 242 of file avxvnniint16intrin.h.

◆ _mm256_dpwusds_epi32()

static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwusds_epi32 ( __m256i  __W,
__m256i  __A,
__m256i  __B 
)
static

Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-bit integers in __B, producing 2 intermediate signed 16-bit results.

Sum these 2 results with the corresponding 32-bit integer in __W with signed saturation, and store the packed 32-bit results in dst.

__m256i _mm256_dpwsuds_epi32(__m256i __W, __m256i __A, __m256i __B)

This intrinsic corresponds to the VPDPWSUDS instruction.

Parameters
__WA 256-bit vector of [8 x int].
__AA 256-bit vector of [16 x unsigned short].
__BA 256-bit vector of [16 x short].
Returns
A 256-bit vector of [8 x int].
FOR j := 0 to 7
tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
ENDFOR
dst[MAX:256] := 0

Definition at line 317 of file avxvnniint16intrin.h.

◆ _mm256_dpwuud_epi32()

static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwuud_epi32 ( __m256i  __W,
__m256i  __A,
__m256i  __B 
)
static

Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.

Sum these 2 results with the corresponding 32-bit integer in __W, and store the packed 32-bit results in dst.

__m256i _mm256_dpwuud_epi32(__m256i __W, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwuud_epi32(__m256i __W, __m256i __A, __m256i __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16...

This intrinsic corresponds to the VPDPWUUD instruction.

Parameters
__WA 256-bit vector of [8 x unsigned int].
__AA 256-bit vector of [16 x unsigned short].
__BA 256-bit vector of [16 x unsigned short].
Returns
A 256-bit vector of [8 x unsigned int].
FOR j := 0 to 7
tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
dst.dword[j] := __W.dword[j] + tmp1 + tmp2
ENDFOR
dst[MAX:256] := 0

Definition at line 390 of file avxvnniint16intrin.h.

◆ _mm256_dpwuuds_epi32()

static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwuuds_epi32 ( __m256i  __W,
__m256i  __A,
__m256i  __B 
)
static

Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.

Sum these 2 results with the corresponding 32-bit integer in __W with signed saturation, and store the packed 32-bit results in dst.

__m256i _mm256_dpwuuds_epi32(__m256i __W, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwuuds_epi32(__m256i __W, __m256i __A, __m256i __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16...

This intrinsic corresponds to the VPDPWSUDS instruction.

Parameters
__WA 256-bit vector of [8 x unsigned int].
__AA 256-bit vector of [16 x unsigned short].
__BA 256-bit vector of [16 x unsigned short].
Returns
A 256-bit vector of [8 x unsigned int].
FOR j := 0 to 7
tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
ENDFOR
dst[MAX:256] := 0

Definition at line 465 of file avxvnniint16intrin.h.

◆ _mm_dpwsud_epi32()

static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsud_epi32 ( __m128i  __W,
__m128i  __A,
__m128i  __B 
)
static

Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.

Sum these 2 results with the corresponding 32-bit integer in __W, and store the packed 32-bit results in dst.

__m128i _mm_dpwsud_epi32(__m128i __W, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsud_epi32(__m128i __W, __m128i __A, __m128i __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...

This intrinsic corresponds to the VPDPWSUD instruction.

Parameters
__WA 128-bit vector of [4 x int].
__AA 128-bit vector of [8 x short].
__BA 128-bit vector of [8 x unsigned short].
Returns
A 128-bit vector of [4 x int].
FOR j := 0 to 3
tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
dst.dword[j] := __W.dword[j] + tmp1 + tmp2
ENDFOR
dst[MAX:128] := 0

Definition at line 56 of file avxvnniint16intrin.h.

◆ _mm_dpwsuds_epi32()

static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsuds_epi32 ( __m128i  __W,
__m128i  __A,
__m128i  __B 
)
static

Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.

Sum these 2 results with the corresponding 32-bit integer in __W with signed saturation, and store the packed 32-bit results in dst.

__m128i _mm_dpwsuds_epi32(__m128i __W, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsuds_epi32(__m128i __W, __m128i __A, __m128i __B)
Multiply groups of 2 adjacent pairs of signed 16-bit integers in __A with corresponding unsigned 16-b...

This intrinsic corresponds to the VPDPWSUDS instruction.

Parameters
__WA 128-bit vector of [4 x int].
__AA 128-bit vector of [8 x short].
__BA 128-bit vector of [8 x unsigned short].
Returns
A 128-bit vector of [4 x int].
FOR j := 0 to 3
tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
ENDFOR
dst[MAX:128] := 0

Definition at line 130 of file avxvnniint16intrin.h.

◆ _mm_dpwusd_epi32()

static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwusd_epi32 ( __m128i  __W,
__m128i  __A,
__m128i  __B 
)
static

Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-bit integers in __B, producing 2 intermediate signed 16-bit results.

Sum these 2 results with the corresponding 32-bit integer in __W, and store the packed 32-bit results in dst.

__m128i _mm_dpbusd_epi32(__m128i __W, __m128i __A, __m128i __B)
#define _mm_dpbusd_epi32(S, A, B)
Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in A with corresponding signed 8-bit i...

This intrinsic corresponds to the VPDPWUSD instruction.

Parameters
__WA 128-bit vector of [4 x int].
__AA 128-bit vector of [8 x unsigned short].
__BA 128-bit vector of [8 x short].
Returns
A 128-bit vector of [4 x int].
FOR j := 0 to 3
tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
dst.dword[j] := __W.dword[j] + tmp1 + tmp2
ENDFOR
dst[MAX:128] := 0

Definition at line 204 of file avxvnniint16intrin.h.

◆ _mm_dpwusds_epi32()

static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwusds_epi32 ( __m128i  __W,
__m128i  __A,
__m128i  __B 
)
static

Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-bit integers in __B, producing 2 intermediate signed 16-bit results.

Sum these 2 results with the corresponding 32-bit integer in __W with signed saturation, and store the packed 32-bit results in dst.

__m128i _mm_dpwusds_epi32(__m128i __W, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwusds_epi32(__m128i __W, __m128i __A, __m128i __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding signed 16-b...

This intrinsic corresponds to the VPDPWSUDS instruction.

Parameters
__WA 128-bit vector of [4 x int].
__AA 128-bit vector of [8 x unsigned short].
__BA 128-bit vector of [8 x short].
Returns
A 128-bit vector of [4 x int].
FOR j := 0 to 3
tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
ENDFOR
dst[MAX:128] := 0

Definition at line 278 of file avxvnniint16intrin.h.

◆ _mm_dpwuud_epi32()

static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwuud_epi32 ( __m128i  __W,
__m128i  __A,
__m128i  __B 
)
static

Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.

Sum these 2 results with the corresponding 32-bit integer in __W, and store the packed 32-bit results in dst.

__m128i _mm_dpwuud_epi32(__m128i __W, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwuud_epi32(__m128i __W, __m128i __A, __m128i __B)
Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16...

This intrinsic corresponds to the VPDPWUUD instruction.

Parameters
__WA 128-bit vector of [4 x unsigned int].
__AA 128-bit vector of [8 x unsigned short].
__BA 128-bit vector of [8 x unsigned short].
Returns
A 128-bit vector of [4 x unsigned int].
FOR j := 0 to 3
tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
dst.dword[j] := __W.dword[j] + tmp1 + tmp2
ENDFOR
dst[MAX:128] := 0

Definition at line 352 of file avxvnniint16intrin.h.

◆ _mm_dpwuuds_epi32()

static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwuuds_epi32 ( __m128i  __W,
__m128i  __A,
__m128i  __B 
)
static

Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in __A with corresponding unsigned 16-bit integers in __B, producing 2 intermediate signed 16-bit results.

Sum these 2 results with the corresponding 32-bit integer in __W with signed saturation, and store the packed 32-bit results in dst.

__m128i _mm_dpwsuds_epi32(__m128i __W, __m128i __A, __m128i __B)

This intrinsic corresponds to the VPDPWSUDS instruction.

Parameters
__WA 128-bit vector of [4 x unsigned int].
__AA 128-bit vector of [8 x unsigned short].
__BA 128-bit vector of [8 x unsigned short].
Returns
A 128-bit vector of [4 x unsigned int].
FOR j := 0 to 3
tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
ENDFOR
dst[MAX:128] := 0

Definition at line 426 of file avxvnniint16intrin.h.