Macros
#define	__DEFAULT_FN_ATTRS128
#define	__DEFAULT_FN_ATTRS256
#define	__DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
#define	__DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256

Functions
static __inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR	_mm256_bmacor16x16x16 (__m256i __A, __m256i __B, __m256i __C)
	Multiplies two 16x16 bit matrices using OR reduction and ORs the product into a third 16x16 bit matrix (which is also the destination).
static __inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR	_mm256_bmacxor16x16x16 (__m256i __A, __m256i __B, __m256i __C)
	Multiplies two 16x16 bit matrices using XOR reduction and XORs the product into a third 16x16 bit matrix (which is also the destination).
static __inline __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR	_mm128_bitrev_epi8 (__m128i __A)
	Reverses the bits within each byte of the source vector.
static __inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR	_mm256_bitrev_epi8 (__m256i __A)
	Reverses the bits within each byte of the source vector.
static __inline __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR	_mm128_mask_bitrev_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
	Reverses the bits within each byte of the source vector, using a writemask to conditionally select elements.
static __inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR	_mm256_mask_bitrev_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
	Reverses the bits within each byte of the source vector, using a writemask to conditionally select elements.
static __inline __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR	_mm128_maskz_bitrev_epi8 (__mmask16 __U, __m128i __A)
	Reverses the bits within each byte of the source vector, zeroing elements based on the writemask.
static __inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR	_mm256_maskz_bitrev_epi8 (__mmask32 __U, __m256i __A)
	Reverses the bits within each byte of the source vector, zeroing elements based on the writemask.

Macro Definition Documentation

◆ __DEFAULT_FN_ATTRS128

#define __DEFAULT_FN_ATTRS128

Value:

__attribute__((__always_inline__, __nodebug__, \

__target__("avx512bmm,avx512vl"), __min_vector_width__(128)))

__attribute__

_Float16 __2f16 __attribute__((ext_vector_type(2)))

Zeroes the upper 128 bits (bits 255:128) of all YMM registers.

Definition __clang_hip_libdevice_declares.h:214

Definition at line 19 of file avx512bmmvlintrin.h.

◆ __DEFAULT_FN_ATTRS128_CONSTEXPR

#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128

Definition at line 30 of file avx512bmmvlintrin.h.

◆ __DEFAULT_FN_ATTRS256

#define __DEFAULT_FN_ATTRS256

Value:

__attribute__((__always_inline__, __nodebug__, \

__target__("avx512bmm,avx512vl"), __min_vector_width__(256)))

Definition at line 22 of file avx512bmmvlintrin.h.

◆ __DEFAULT_FN_ATTRS256_CONSTEXPR

#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256

Definition at line 31 of file avx512bmmvlintrin.h.

Function Documentation

◆ _mm128_bitrev_epi8()

__inline __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm128_bitrev_epi8 ( __m128i __A )

static

Reverses the bits within each byte of the source vector.

For each byte in the source, reverses the order of its 8 bits to generate the corresponding destination byte. For example, 0b10110001 becomes 0b10001101.

This intrinsic corresponds to the VBITREV instruction.

Parameters

__A	A 128-bit vector of [16 x i8] where each byte will have its bits reversed.

Returns: A 128-bit vector of [16 x i8] with bit-reversed bytes.

Definition at line 119 of file avx512bmmvlintrin.h.

Referenced by _mm128_mask_bitrev_epi8(), and _mm128_maskz_bitrev_epi8().

◆ _mm128_mask_bitrev_epi8()

__inline __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm128_mask_bitrev_epi8	(	__mmask16	__U,
		__m128i	__A,
		__m128i	__B )

static

Reverses the bits within each byte of the source vector, using a writemask to conditionally select elements.

For each byte position, if the corresponding mask bit is 1, the byte from A has its bits reversed and stored in the result. If the mask bit is 0, the corresponding byte from B is copied to the result (merge masking).

This intrinsic corresponds to the VBITREV instruction.

Parameters

__U	A 16-bit mask value where each bit controls one byte (per 8-bit element). A 1 performs bit reversal; a 0 selects the passthrough byte from __B.
__A	A 128-bit vector of [16 x i8] to be bit-reversed.
__B	A 128-bit vector of [16 x i8] providing passthrough values.

Returns: A 128-bit vector combining bit-reversed and passthrough bytes.

Definition at line 162 of file avx512bmmvlintrin.h.

References _mm128_bitrev_epi8().

◆ _mm128_maskz_bitrev_epi8()

__inline __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm128_maskz_bitrev_epi8	(	__mmask16	__U,
		__m128i	__A )

static

Reverses the bits within each byte of the source vector, zeroing elements based on the writemask.

For each byte position, if the corresponding mask bit is 1, the byte from A has its bits reversed and stored in the result. If the mask bit is 0, the result byte is set to zero (zero masking).

This intrinsic corresponds to the VBITREV instruction.

Parameters

__U	A 16-bit mask value where each bit controls one byte (per 8-bit element). A 1 performs bit reversal; a 0 sets the byte to zero.
__A	A 128-bit vector of [16 x i8] to be bit-reversed.

Returns: A 128-bit vector with bit-reversed or zeroed bytes.

Definition at line 210 of file avx512bmmvlintrin.h.

References _mm128_bitrev_epi8(), and _mm_setzero_si128().

◆ _mm256_bitrev_epi8()

__inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_bitrev_epi8 ( __m256i __A )

static

Reverses the bits within each byte of the source vector.

For each byte in the source, reverses the order of its 8 bits to generate the corresponding destination byte. For example, 0b10110001 becomes 0b10001101.

This intrinsic corresponds to the VBITREV instruction.

Parameters

__A	A 256-bit vector of [32 x i8] where each byte will have its bits reversed.

Returns: A 256-bit vector of [32 x i8] with bit-reversed bytes.

Definition at line 138 of file avx512bmmvlintrin.h.

Referenced by _mm256_mask_bitrev_epi8(), and _mm256_maskz_bitrev_epi8().

◆ _mm256_bmacor16x16x16()

__inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_bmacor16x16x16	(	__m256i	__A,
		__m256i	__B,
		__m256i	__C )

static

Multiplies two 16x16 bit matrices using OR reduction and ORs the product into a third 16x16 bit matrix (which is also the destination).

For the 256-bit YMM form, the source registers/memory each contain a single 16x16 (256-bit) matrix in bits [255:0]. The operation performs:

for i in 0 to 15
  for j in 0 to 15
    reduction_bit = __C[16*i+j]
    for k in 0 to 15
      reduction_bit |= __A[16*i+k] & __B[16*k+j]
    end for k
    dest[16*i+j] = reduction_bit
  end for j
end for i

This intrinsic corresponds to the VBMACOR16X16X16 instruction.

Parameters

__A	A 256-bit vector containing a 16x16 bit matrix.
__B	A 256-bit vector containing a 16x16 bit matrix.
__C	A 256-bit accumulator vector containing the initial values to OR with.

Returns: A 256-bit vector containing the accumulated result.

Note: This instruction does not support masking.

Definition at line 64 of file avx512bmmvlintrin.h.

◆ _mm256_bmacxor16x16x16()

__inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_bmacxor16x16x16	(	__m256i	__A,
		__m256i	__B,
		__m256i	__C )

static

Multiplies two 16x16 bit matrices using XOR reduction and XORs the product into a third 16x16 bit matrix (which is also the destination).

For the 256-bit YMM form, the source registers/memory each contain a single 16x16 (256-bit) matrix in bits [255:0]. The operation performs:

for i in 0 to 15
  for j in 0 to 15
    reduction_bit = __C[16*i+j]
    for k in 0 to 15
      reduction_bit ^= __A[16*i+k] & __B[16*k+j]
    end for k
    dest[16*i+j] = reduction_bit
  end for j
end for i

This intrinsic corresponds to the VBMACXOR16X16X16 instruction.

Parameters

__A	A 256-bit vector containing a 16x16 bit matrix.
__B	A 256-bit vector containing a 16x16 bit matrix.
__C	A 256-bit accumulator vector containing the initial values to XOR with.

Returns: A 256-bit vector containing the accumulated result.

Note: This instruction does not support masking.

Definition at line 99 of file avx512bmmvlintrin.h.

◆ _mm256_mask_bitrev_epi8()

__inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_bitrev_epi8	(	__mmask32	__U,
		__m256i	__A,
		__m256i	__B )

static

Reverses the bits within each byte of the source vector, using a writemask to conditionally select elements.

For each byte position, if the corresponding mask bit is 1, the byte from A has its bits reversed and stored in the result. If the mask bit is 0, the corresponding byte from B is copied to the result (merge masking).

This intrinsic corresponds to the VBITREV instruction.

Parameters

__U	A 32-bit mask value where each bit controls one byte (per 8-bit element). A 1 performs bit reversal; a 0 selects the passthrough byte from __B.
__A	A 256-bit vector of [32 x i8] to be bit-reversed.
__B	A 256-bit vector of [32 x i8] providing passthrough values.

Returns: A 256-bit vector combining bit-reversed and passthrough bytes.

Definition at line 187 of file avx512bmmvlintrin.h.

References _mm256_bitrev_epi8().

◆ _mm256_maskz_bitrev_epi8()

__inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_bitrev_epi8	(	__mmask32	__U,
		__m256i	__A )

static

Reverses the bits within each byte of the source vector, zeroing elements based on the writemask.

For each byte position, if the corresponding mask bit is 1, the byte from A has its bits reversed and stored in the result. If the mask bit is 0, the result byte is set to zero (zero masking).

This intrinsic corresponds to the VBITREV instruction.

Parameters

__U	A 32-bit mask value where each bit controls one byte (per 8-bit element). A 1 performs bit reversal; a 0 sets the byte to zero.
__A	A 256-bit vector of [32 x i8] to be bit-reversed.

Returns: A 256-bit vector with bit-reversed or zeroed bytes.

Definition at line 234 of file avx512bmmvlintrin.h.

References _mm256_bitrev_epi8(), and _mm256_setzero_si256().

Macros

Functions

Macro Definition Documentation

◆ __DEFAULT_FN_ATTRS128

◆ __DEFAULT_FN_ATTRS128_CONSTEXPR

◆ __DEFAULT_FN_ATTRS256

◆ __DEFAULT_FN_ATTRS256_CONSTEXPR

Function Documentation

◆ _mm128_bitrev_epi8()

◆ _mm128_mask_bitrev_epi8()

◆ _mm128_maskz_bitrev_epi8()

◆ _mm256_bitrev_epi8()

◆ _mm256_bmacor16x16x16()

◆ _mm256_bmacxor16x16x16()

◆ _mm256_mask_bitrev_epi8()

◆ _mm256_maskz_bitrev_epi8()