clang 23.0.0git
avx512bmmvlintrin.h File Reference

Go to the source code of this file.

Macros

#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS128_CONSTEXPR   __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256_CONSTEXPR   __DEFAULT_FN_ATTRS256

Functions

static __inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_bmacor16x16x16 (__m256i __A, __m256i __B, __m256i __C)
 Multiplies two 16x16 bit matrices using OR reduction and ORs the product into a third 16x16 bit matrix (which is also the destination).
static __inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_bmacxor16x16x16 (__m256i __A, __m256i __B, __m256i __C)
 Multiplies two 16x16 bit matrices using XOR reduction and XORs the product into a third 16x16 bit matrix (which is also the destination).
static __inline __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm128_bitrev_epi8 (__m128i __A)
 Reverses the bits within each byte of the source vector.
static __inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_bitrev_epi8 (__m256i __A)
 Reverses the bits within each byte of the source vector.
static __inline __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm128_mask_bitrev_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
 Reverses the bits within each byte of the source vector, using a writemask to conditionally select elements.
static __inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_bitrev_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
 Reverses the bits within each byte of the source vector, using a writemask to conditionally select elements.
static __inline __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm128_maskz_bitrev_epi8 (__mmask16 __U, __m128i __A)
 Reverses the bits within each byte of the source vector, zeroing elements based on the writemask.
static __inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_bitrev_epi8 (__mmask32 __U, __m256i __A)
 Reverses the bits within each byte of the source vector, zeroing elements based on the writemask.

Macro Definition Documentation

◆ __DEFAULT_FN_ATTRS128

#define __DEFAULT_FN_ATTRS128
Value:
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bmm,avx512vl"), __min_vector_width__(128)))
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.

Definition at line 19 of file avx512bmmvlintrin.h.

◆ __DEFAULT_FN_ATTRS128_CONSTEXPR

#define __DEFAULT_FN_ATTRS128_CONSTEXPR   __DEFAULT_FN_ATTRS128

Definition at line 30 of file avx512bmmvlintrin.h.

◆ __DEFAULT_FN_ATTRS256

#define __DEFAULT_FN_ATTRS256
Value:
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bmm,avx512vl"), __min_vector_width__(256)))

Definition at line 22 of file avx512bmmvlintrin.h.

◆ __DEFAULT_FN_ATTRS256_CONSTEXPR

#define __DEFAULT_FN_ATTRS256_CONSTEXPR   __DEFAULT_FN_ATTRS256

Definition at line 31 of file avx512bmmvlintrin.h.

Function Documentation

◆ _mm128_bitrev_epi8()

__inline __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm128_bitrev_epi8 ( __m128i __A)
static

Reverses the bits within each byte of the source vector.

For each byte in the source, reverses the order of its 8 bits to generate the corresponding destination byte. For example, 0b10110001 becomes 0b10001101.

This intrinsic corresponds to the VBITREV instruction.

Parameters
__AA 128-bit vector of [16 x i8] where each byte will have its bits reversed.
Returns
A 128-bit vector of [16 x i8] with bit-reversed bytes.

Definition at line 119 of file avx512bmmvlintrin.h.

Referenced by _mm128_mask_bitrev_epi8(), and _mm128_maskz_bitrev_epi8().

◆ _mm128_mask_bitrev_epi8()

__inline __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm128_mask_bitrev_epi8 ( __mmask16 __U,
__m128i __A,
__m128i __B )
static

Reverses the bits within each byte of the source vector, using a writemask to conditionally select elements.

For each byte position, if the corresponding mask bit is 1, the byte from A has its bits reversed and stored in the result. If the mask bit is 0, the corresponding byte from B is copied to the result (merge masking).

This intrinsic corresponds to the VBITREV instruction.

Parameters
__UA 16-bit mask value where each bit controls one byte (per 8-bit element). A 1 performs bit reversal; a 0 selects the passthrough byte from __B.
__AA 128-bit vector of [16 x i8] to be bit-reversed.
__BA 128-bit vector of [16 x i8] providing passthrough values.
Returns
A 128-bit vector combining bit-reversed and passthrough bytes.

Definition at line 162 of file avx512bmmvlintrin.h.

References _mm128_bitrev_epi8().

◆ _mm128_maskz_bitrev_epi8()

__inline __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm128_maskz_bitrev_epi8 ( __mmask16 __U,
__m128i __A )
static

Reverses the bits within each byte of the source vector, zeroing elements based on the writemask.

For each byte position, if the corresponding mask bit is 1, the byte from A has its bits reversed and stored in the result. If the mask bit is 0, the result byte is set to zero (zero masking).

This intrinsic corresponds to the VBITREV instruction.

Parameters
__UA 16-bit mask value where each bit controls one byte (per 8-bit element). A 1 performs bit reversal; a 0 sets the byte to zero.
__AA 128-bit vector of [16 x i8] to be bit-reversed.
Returns
A 128-bit vector with bit-reversed or zeroed bytes.

Definition at line 210 of file avx512bmmvlintrin.h.

References _mm128_bitrev_epi8(), and _mm_setzero_si128().

◆ _mm256_bitrev_epi8()

__inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_bitrev_epi8 ( __m256i __A)
static

Reverses the bits within each byte of the source vector.

For each byte in the source, reverses the order of its 8 bits to generate the corresponding destination byte. For example, 0b10110001 becomes 0b10001101.

This intrinsic corresponds to the VBITREV instruction.

Parameters
__AA 256-bit vector of [32 x i8] where each byte will have its bits reversed.
Returns
A 256-bit vector of [32 x i8] with bit-reversed bytes.

Definition at line 138 of file avx512bmmvlintrin.h.

Referenced by _mm256_mask_bitrev_epi8(), and _mm256_maskz_bitrev_epi8().

◆ _mm256_bmacor16x16x16()

__inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_bmacor16x16x16 ( __m256i __A,
__m256i __B,
__m256i __C )
static

Multiplies two 16x16 bit matrices using OR reduction and ORs the product into a third 16x16 bit matrix (which is also the destination).

For the 256-bit YMM form, the source registers/memory each contain a single 16x16 (256-bit) matrix in bits [255:0]. The operation performs:

for i in 0 to 15
for j in 0 to 15
reduction_bit = __C[16*i+j]
for k in 0 to 15
reduction_bit |= __A[16*i+k] & __B[16*k+j]
end for k
dest[16*i+j] = reduction_bit
end for j
end for i

This intrinsic corresponds to the VBMACOR16X16X16 instruction.

Parameters
__AA 256-bit vector containing a 16x16 bit matrix.
__BA 256-bit vector containing a 16x16 bit matrix.
__CA 256-bit accumulator vector containing the initial values to OR with.
Returns
A 256-bit vector containing the accumulated result.
Note
This instruction does not support masking.

Definition at line 64 of file avx512bmmvlintrin.h.

◆ _mm256_bmacxor16x16x16()

__inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_bmacxor16x16x16 ( __m256i __A,
__m256i __B,
__m256i __C )
static

Multiplies two 16x16 bit matrices using XOR reduction and XORs the product into a third 16x16 bit matrix (which is also the destination).

For the 256-bit YMM form, the source registers/memory each contain a single 16x16 (256-bit) matrix in bits [255:0]. The operation performs:

for i in 0 to 15
for j in 0 to 15
reduction_bit = __C[16*i+j]
for k in 0 to 15
reduction_bit ^= __A[16*i+k] & __B[16*k+j]
end for k
dest[16*i+j] = reduction_bit
end for j
end for i

This intrinsic corresponds to the VBMACXOR16X16X16 instruction.

Parameters
__AA 256-bit vector containing a 16x16 bit matrix.
__BA 256-bit vector containing a 16x16 bit matrix.
__CA 256-bit accumulator vector containing the initial values to XOR with.
Returns
A 256-bit vector containing the accumulated result.
Note
This instruction does not support masking.

Definition at line 99 of file avx512bmmvlintrin.h.

◆ _mm256_mask_bitrev_epi8()

__inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_bitrev_epi8 ( __mmask32 __U,
__m256i __A,
__m256i __B )
static

Reverses the bits within each byte of the source vector, using a writemask to conditionally select elements.

For each byte position, if the corresponding mask bit is 1, the byte from A has its bits reversed and stored in the result. If the mask bit is 0, the corresponding byte from B is copied to the result (merge masking).

This intrinsic corresponds to the VBITREV instruction.

Parameters
__UA 32-bit mask value where each bit controls one byte (per 8-bit element). A 1 performs bit reversal; a 0 selects the passthrough byte from __B.
__AA 256-bit vector of [32 x i8] to be bit-reversed.
__BA 256-bit vector of [32 x i8] providing passthrough values.
Returns
A 256-bit vector combining bit-reversed and passthrough bytes.

Definition at line 187 of file avx512bmmvlintrin.h.

References _mm256_bitrev_epi8().

◆ _mm256_maskz_bitrev_epi8()

__inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_bitrev_epi8 ( __mmask32 __U,
__m256i __A )
static

Reverses the bits within each byte of the source vector, zeroing elements based on the writemask.

For each byte position, if the corresponding mask bit is 1, the byte from A has its bits reversed and stored in the result. If the mask bit is 0, the result byte is set to zero (zero masking).

This intrinsic corresponds to the VBITREV instruction.

Parameters
__UA 32-bit mask value where each bit controls one byte (per 8-bit element). A 1 performs bit reversal; a 0 sets the byte to zero.
__AA 256-bit vector of [32 x i8] to be bit-reversed.
Returns
A 256-bit vector with bit-reversed or zeroed bytes.

Definition at line 234 of file avx512bmmvlintrin.h.

References _mm256_bitrev_epi8(), and _mm256_setzero_si256().