clang  14.0.0git
Macros | Typedefs | Functions
avx512vlbf16intrin.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define __DEFAULT_FN_ATTRS128
 
#define __DEFAULT_FN_ATTRS256
 

Typedefs

typedef short __m128bh __attribute__((__vector_size__(16), __aligned__(16)))
 

Functions

static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
 Convert Two Packed Single Data to One Packed BF16 Data. More...
 
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mask_cvtne2ps_pbh (__m128bh __W, __mmask8 __U, __m128 __A, __m128 __B)
 Convert Two Packed Single Data to One Packed BF16 Data. More...
 
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_maskz_cvtne2ps_pbh (__mmask8 __U, __m128 __A, __m128 __B)
 Convert Two Packed Single Data to One Packed BF16 Data. More...
 
static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
 Convert Two Packed Single Data to One Packed BF16 Data. More...
 
static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_cvtne2ps_pbh (__m256bh __W, __mmask16 __U, __m256 __A, __m256 __B)
 Convert Two Packed Single Data to One Packed BF16 Data. More...
 
static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtne2ps_pbh (__mmask16 __U, __m256 __A, __m256 __B)
 Convert Two Packed Single Data to One Packed BF16 Data. More...
 
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_cvtneps_pbh (__m128 __A)
 Convert Packed Single Data to Packed BF16 Data. More...
 
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mask_cvtneps_pbh (__m128bh __W, __mmask8 __U, __m128 __A)
 Convert Packed Single Data to Packed BF16 Data. More...
 
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_maskz_cvtneps_pbh (__mmask8 __U, __m128 __A)
 Convert Packed Single Data to Packed BF16 Data. More...
 
static __inline__ __m128bh __DEFAULT_FN_ATTRS256 _mm256_cvtneps_pbh (__m256 __A)
 Convert Packed Single Data to Packed BF16 Data. More...
 
static __inline__ __m128bh __DEFAULT_FN_ATTRS256 _mm256_mask_cvtneps_pbh (__m128bh __W, __mmask8 __U, __m256 __A)
 Convert Packed Single Data to Packed BF16 Data. More...
 
static __inline__ __m128bh __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtneps_pbh (__mmask8 __U, __m256 __A)
 Convert Packed Single Data to Packed BF16 Data. More...
 
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_dpbf16_ps (__m128 __D, __m128bh __A, __m128bh __B)
 Dot Product of BF16 Pairs Accumulated into Packed Single Precision. More...
 
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_dpbf16_ps (__m128 __D, __mmask8 __U, __m128bh __A, __m128bh __B)
 Dot Product of BF16 Pairs Accumulated into Packed Single Precision. More...
 
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_dpbf16_ps (__mmask8 __U, __m128 __D, __m128bh __A, __m128bh __B)
 Dot Product of BF16 Pairs Accumulated into Packed Single Precision. More...
 
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_dpbf16_ps (__m256 __D, __m256bh __A, __m256bh __B)
 Dot Product of BF16 Pairs Accumulated into Packed Single Precision. More...
 
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_dpbf16_ps (__m256 __D, __mmask8 __U, __m256bh __A, __m256bh __B)
 Dot Product of BF16 Pairs Accumulated into Packed Single Precision. More...
 
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbf16_ps (__mmask8 __U, __m256 __D, __m256bh __A, __m256bh __B)
 Dot Product of BF16 Pairs Accumulated into Packed Single Precision. More...
 
static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh (float __A)
 Convert One Single float Data to One BF16 Data. More...
 
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps (__m128bh __A)
 Convert Packed BF16 Data to Packed float Data. More...
 
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps (__m128bh __A)
 Convert Packed BF16 Data to Packed float Data. More...
 
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
 Convert Packed BF16 Data to Packed float Data using zeroing mask. More...
 
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
 Convert Packed BF16 Data to Packed float Data using zeroing mask. More...
 
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtpbh_ps (__m128 __S, __mmask8 __U, __m128bh __A)
 Convert Packed BF16 Data to Packed float Data using merging mask. More...
 
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpbh_ps (__m256 __S, __mmask8 __U, __m128bh __A)
 Convert Packed BF16 Data to Packed float Data using merging mask. More...
 

Macro Definition Documentation

◆ __DEFAULT_FN_ATTRS128

#define __DEFAULT_FN_ATTRS128
Value:
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl, avx512bf16"), __min_vector_width__(128)))

Definition at line 18 of file avx512vlbf16intrin.h.

◆ __DEFAULT_FN_ATTRS256

#define __DEFAULT_FN_ATTRS256
Value:
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl, avx512bf16"), __min_vector_width__(256)))

Definition at line 21 of file avx512vlbf16intrin.h.

Typedef Documentation

◆ __attribute__

typedef short __m128bh __attribute__((__vector_size__(16), __aligned__(16)))

Definition at line 16 of file avx512vlbf16intrin.h.

Function Documentation

◆ _mm256_cvtne2ps_pbh()

static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_cvtne2ps_pbh ( __m256  __A,
__m256  __B 
)
static

Convert Two Packed Single Data to One Packed BF16 Data.

This intrinsic corresponds to the VCVTNE2PS2BF16 instructions.

Parameters
__AA 256-bit vector of [8 x float].
__BA 256-bit vector of [8 x float].
Returns
A 256-bit vector of [16 x bfloat] whose lower 128 bits come from conversion of __B, and higher 128 bits come from conversion of __A.

Definition at line 102 of file avx512vlbf16intrin.h.

Referenced by _mm256_mask_cvtne2ps_pbh(), and _mm256_maskz_cvtne2ps_pbh().

◆ _mm256_cvtneps_pbh()

static __inline__ __m128bh __DEFAULT_FN_ATTRS256 _mm256_cvtneps_pbh ( __m256  __A)
static

Convert Packed Single Data to Packed BF16 Data.

This intrinsic corresponds to the VCVTNEPS2BF16 instructions.

Parameters
__AA 256-bit vector of [8 x float].
Returns
A 128-bit vector of [8 x bfloat] comes from conversion of __A.

Definition at line 222 of file avx512vlbf16intrin.h.

References _mm_undefined_si128().

◆ _mm256_cvtpbh_ps()

static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps ( __m128bh  __A)
static

Convert Packed BF16 Data to Packed float Data.

Parameters
__AA 128-bit vector of [8 x bfloat].
Returns
A 256-bit vector of [8 x float] come from conversion of __A

Definition at line 442 of file avx512vlbf16intrin.h.

References _mm256_castsi256_ps(), _mm256_cvtepi16_epi32(), and _mm256_slli_epi32().

◆ _mm256_dpbf16_ps()

static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_dpbf16_ps ( __m256  __D,
__m256bh  __A,
__m256bh  __B 
)
static

Dot Product of BF16 Pairs Accumulated into Packed Single Precision.

This intrinsic corresponds to the VDPBF16PS instructions.

Parameters
__AA 256-bit vector of [16 x bfloat].
__BA 256-bit vector of [16 x bfloat].
__DA 256-bit vector of [8 x float].
Returns
A 256-bit vector of [8 x float] comes from Dot Product of __A, __B and __D

Definition at line 352 of file avx512vlbf16intrin.h.

References __D.

Referenced by _mm256_mask_dpbf16_ps(), and _mm256_maskz_dpbf16_ps().

◆ _mm256_mask_cvtne2ps_pbh()

static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_cvtne2ps_pbh ( __m256bh  __W,
__mmask16  __U,
__m256  __A,
__m256  __B 
)
static

Convert Two Packed Single Data to One Packed BF16 Data.

This intrinsic corresponds to the VCVTNE2PS2BF16 instructions.

Parameters
__AA 256-bit vector of [8 x float].
__BA 256-bit vector of [8 x float].
__WA 256-bit vector of [16 x bfloat].
__UA 16-bit mask value specifying what is chosen for each element. A 1 means conversion of __A or __B. A 0 means element from __W.
Returns
A 256-bit vector of [16 x bfloat] whose lower 128 bits come from conversion of __B, and higher 128 bits come from conversion of __A.

Definition at line 125 of file avx512vlbf16intrin.h.

References _mm256_cvtne2ps_pbh().

◆ _mm256_mask_cvtneps_pbh()

static __inline__ __m128bh __DEFAULT_FN_ATTRS256 _mm256_mask_cvtneps_pbh ( __m128bh  __W,
__mmask8  __U,
__m256  __A 
)
static

Convert Packed Single Data to Packed BF16 Data.

This intrinsic corresponds to the VCVTNEPS2BF16 instructions.

Parameters
__AA 256-bit vector of [8 x float].
__WA 256-bit vector of [8 x bfloat].
__UA 8-bit mask value specifying what is chosen for each element. A 1 means conversion of __A. A 0 means element from __W.
Returns
A 128-bit vector of [8 x bfloat] comes from conversion of __A.

Definition at line 243 of file avx512vlbf16intrin.h.

◆ _mm256_mask_cvtpbh_ps()

static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpbh_ps ( __m256  __S,
__mmask8  __U,
__m128bh  __A 
)
static

Convert Packed BF16 Data to Packed float Data using merging mask.

Parameters
__SA 256-bit vector of [8 x float]. Elements are copied from __S when the corresponding mask bit is not set.
__UA 8-bit mask. Elements are zeroed out when the corresponding mask bit is not set.
__AA 128-bit vector of [8 x bfloat].
Returns
A 256-bit vector of [8 x float] come from conversion of __A

Definition at line 513 of file avx512vlbf16intrin.h.

References _mm256_castsi256_ps(), _mm256_cvtepi16_epi32(), and _mm256_mask_slli_epi32().

◆ _mm256_mask_dpbf16_ps()

static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_dpbf16_ps ( __m256  __D,
__mmask8  __U,
__m256bh  __A,
__m256bh  __B 
)
static

Dot Product of BF16 Pairs Accumulated into Packed Single Precision.

This intrinsic corresponds to the VDPBF16PS instructions.

Parameters
__AA 256-bit vector of [16 x bfloat].
__BA 256-bit vector of [16 x bfloat].
__DA 256-bit vector of [8 x float].
__UA 16-bit mask value specifying what is chosen for each element. A 1 means __A and __B's dot product accumulated with __D. A 0 means __D.
Returns
A 256-bit vector of [8 x float] comes from Dot Product of __A, __B and __D

Definition at line 376 of file avx512vlbf16intrin.h.

References __D, and _mm256_dpbf16_ps().

◆ _mm256_maskz_cvtne2ps_pbh()

static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtne2ps_pbh ( __mmask16  __U,
__m256  __A,
__m256  __B 
)
static

Convert Two Packed Single Data to One Packed BF16 Data.

This intrinsic corresponds to the VCVTNE2PS2BF16 instructions.

Parameters
__AA 256-bit vector of [8 x float].
__BA 256-bit vector of [8 x float].
__UA 16-bit mask value specifying what is chosen for each element. A 1 means conversion of __A or __B. A 0 means element is zero.
Returns
A 256-bit vector of [16 x bfloat] whose lower 128 bits come from conversion of __B, and higher 128 bits come from conversion of __A.

Definition at line 147 of file avx512vlbf16intrin.h.

References _mm256_cvtne2ps_pbh(), and _mm256_setzero_si256().

◆ _mm256_maskz_cvtneps_pbh()

static __inline__ __m128bh __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtneps_pbh ( __mmask8  __U,
__m256  __A 
)
static

Convert Packed Single Data to Packed BF16 Data.

This intrinsic corresponds to the VCVTNEPS2BF16 instructions.

Parameters
__AA 256-bit vector of [8 x float].
__UA 8-bit mask value specifying what is chosen for each element. A 1 means conversion of __A. A 0 means element is zero.
Returns
A 128-bit vector of [8 x bfloat] comes from conversion of __A.

Definition at line 262 of file avx512vlbf16intrin.h.

References _mm_setzero_si128().

◆ _mm256_maskz_cvtpbh_ps()

static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpbh_ps ( __mmask8  __U,
__m128bh  __A 
)
static

Convert Packed BF16 Data to Packed float Data using zeroing mask.

Parameters
__UA 8-bit mask. Elements are zeroed out when the corresponding mask bit is not set.
__AA 128-bit vector of [8 x bfloat].
Returns
A 256-bit vector of [8 x float] come from conversion of __A

Definition at line 474 of file avx512vlbf16intrin.h.

References _mm256_castsi256_ps(), _mm256_maskz_cvtepi16_epi32(), and _mm256_slli_epi32().

◆ _mm256_maskz_dpbf16_ps()

static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbf16_ps ( __mmask8  __U,
__m256  __D,
__m256bh  __A,
__m256bh  __B 
)
static

Dot Product of BF16 Pairs Accumulated into Packed Single Precision.

This intrinsic corresponds to the VDPBF16PS instructions.

Parameters
__AA 256-bit vector of [16 x bfloat].
__BA 256-bit vector of [16 x bfloat].
__DA 256-bit vector of [8 x float].
__UA 8-bit mask value specifying what is chosen for each element. A 1 means __A and __B's dot product accumulated with __D. A 0 means 0.
Returns
A 256-bit vector of [8 x float] comes from Dot Product of __A, __B and __D

Definition at line 400 of file avx512vlbf16intrin.h.

References __D, _mm256_dpbf16_ps(), and _mm256_setzero_si256().

◆ _mm_cvtne2ps_pbh()

static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_cvtne2ps_pbh ( __m128  __A,
__m128  __B 
)
static

Convert Two Packed Single Data to One Packed BF16 Data.

This intrinsic corresponds to the VCVTNE2PS2BF16 instructions.

Parameters
__AA 128-bit vector of [4 x float].
__BA 128-bit vector of [4 x float].
Returns
A 128-bit vector of [8 x bfloat] whose lower 64 bits come from conversion of __B, and higher 64 bits come from conversion of __A.

Definition at line 38 of file avx512vlbf16intrin.h.

Referenced by _mm_mask_cvtne2ps_pbh(), and _mm_maskz_cvtne2ps_pbh().

◆ _mm_cvtneps_pbh()

static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_cvtneps_pbh ( __m128  __A)
static

Convert Packed Single Data to Packed BF16 Data.

This intrinsic corresponds to the VCVTNEPS2BF16 instructions.

Parameters
__AA 128-bit vector of [4 x float].
Returns
A 128-bit vector of [8 x bfloat] whose lower 64 bits come from conversion of __A, and higher 64 bits are 0.

Definition at line 164 of file avx512vlbf16intrin.h.

References _mm_undefined_si128().

◆ _mm_cvtness_sbh()

static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh ( float  __A)
static

Convert One Single float Data to One BF16 Data.

This intrinsic corresponds to the VCVTNEPS2BF16 instructions.

Parameters
__AA float data.
Returns
A bf16 data whose sign field and exponent field keep unchanged, and fraction field is truncated to 7 bits.

Definition at line 416 of file avx512vlbf16intrin.h.

References _mm_undefined_si128().

◆ _mm_cvtpbh_ps()

static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps ( __m128bh  __A)
static

Convert Packed BF16 Data to Packed float Data.

Parameters
__AA 128-bit vector of [4 x bfloat].
Returns
A 128-bit vector of [4 x float] come from conversion of __A

Definition at line 430 of file avx512vlbf16intrin.h.

References _mm_castsi128_ps(), _mm_cvtepi16_epi32(), and _mm_slli_epi32().

◆ _mm_dpbf16_ps()

static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_dpbf16_ps ( __m128  __D,
__m128bh  __A,
__m128bh  __B 
)
static

Dot Product of BF16 Pairs Accumulated into Packed Single Precision.

This intrinsic corresponds to the VDPBF16PS instructions.

Parameters
__AA 128-bit vector of [8 x bfloat].
__BA 128-bit vector of [8 x bfloat].
__DA 128-bit vector of [4 x float].
Returns
A 128-bit vector of [4 x float] comes from Dot Product of __A, __B and __D

Definition at line 283 of file avx512vlbf16intrin.h.

References __D.

Referenced by _mm_mask_dpbf16_ps(), and _mm_maskz_dpbf16_ps().

◆ _mm_mask_cvtne2ps_pbh()

static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mask_cvtne2ps_pbh ( __m128bh  __W,
__mmask8  __U,
__m128  __A,
__m128  __B 
)
static

Convert Two Packed Single Data to One Packed BF16 Data.

This intrinsic corresponds to the VCVTNE2PS2BF16 instructions.

Parameters
__AA 128-bit vector of [4 x float].
__BA 128-bit vector of [4 x float].
__WA 128-bit vector of [8 x bfloat].
__UA 8-bit mask value specifying what is chosen for each element. A 1 means conversion of __A or __B. A 0 means element from __W.
Returns
A 128-bit vector of [8 x bfloat] whose lower 64 bits come from conversion of __B, and higher 64 bits come from conversion of __A.

Definition at line 61 of file avx512vlbf16intrin.h.

References _mm_cvtne2ps_pbh().

◆ _mm_mask_cvtneps_pbh()

static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mask_cvtneps_pbh ( __m128bh  __W,
__mmask8  __U,
__m128  __A 
)
static

Convert Packed Single Data to Packed BF16 Data.

This intrinsic corresponds to the VCVTNEPS2BF16 instructions.

Parameters
__AA 128-bit vector of [4 x float].
__WA 128-bit vector of [8 x bfloat].
__UA 4-bit mask value specifying what is chosen for each element. A 1 means conversion of __A. A 0 means element from __W.
Returns
A 128-bit vector of [8 x bfloat] whose lower 64 bits come from conversion of __A, and higher 64 bits are 0.

Definition at line 186 of file avx512vlbf16intrin.h.

◆ _mm_mask_cvtpbh_ps()

static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtpbh_ps ( __m128  __S,
__mmask8  __U,
__m128bh  __A 
)
static

Convert Packed BF16 Data to Packed float Data using merging mask.

Parameters
__SA 128-bit vector of [4 x float]. Elements are copied from __S when the corresponding mask bit is not set.
__UA 4-bit mask. Elements are zeroed out when the corresponding mask bit is not set.
__AA 128-bit vector of [4 x bfloat].
Returns
A 128-bit vector of [4 x float] come from conversion of __A

Definition at line 493 of file avx512vlbf16intrin.h.

References _mm_castsi128_ps(), _mm_cvtepi16_epi32(), and _mm_mask_slli_epi32().

◆ _mm_mask_dpbf16_ps()

static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_dpbf16_ps ( __m128  __D,
__mmask8  __U,
__m128bh  __A,
__m128bh  __B 
)
static

Dot Product of BF16 Pairs Accumulated into Packed Single Precision.

This intrinsic corresponds to the VDPBF16PS instructions.

Parameters
__AA 128-bit vector of [8 x bfloat].
__BA 128-bit vector of [8 x bfloat].
__DA 128-bit vector of [4 x float].
__UA 8-bit mask value specifying what is chosen for each element. A 1 means __A and __B's dot product accumulated with __D. A 0 means __D.
Returns
A 128-bit vector of [4 x float] comes from Dot Product of __A, __B and __D

Definition at line 307 of file avx512vlbf16intrin.h.

References __D, and _mm_dpbf16_ps().

◆ _mm_maskz_cvtne2ps_pbh()

static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_maskz_cvtne2ps_pbh ( __mmask8  __U,
__m128  __A,
__m128  __B 
)
static

Convert Two Packed Single Data to One Packed BF16 Data.

This intrinsic corresponds to the VCVTNE2PS2BF16 instructions.

Parameters
__AA 128-bit vector of [4 x float].
__BA 128-bit vector of [4 x float].
__UA 8-bit mask value specifying what is chosen for each element. A 1 means conversion of __A or __B. A 0 means element is zero.
Returns
A 128-bit vector of [8 x bfloat] whose lower 64 bits come from conversion of __B, and higher 64 bits come from conversion of __A.

Definition at line 83 of file avx512vlbf16intrin.h.

References _mm_cvtne2ps_pbh(), and _mm_setzero_si128().

◆ _mm_maskz_cvtneps_pbh()

static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_maskz_cvtneps_pbh ( __mmask8  __U,
__m128  __A 
)
static

Convert Packed Single Data to Packed BF16 Data.

This intrinsic corresponds to the VCVTNEPS2BF16 instructions.

Parameters
__AA 128-bit vector of [4 x float].
__UA 4-bit mask value specifying what is chosen for each element. A 1 means conversion of __A. A 0 means element is zero.
Returns
A 128-bit vector of [8 x bfloat] whose lower 64 bits come from conversion of __A, and higher 64 bits are 0.

Definition at line 206 of file avx512vlbf16intrin.h.

References _mm_setzero_si128().

◆ _mm_maskz_cvtpbh_ps()

static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpbh_ps ( __mmask8  __U,
__m128bh  __A 
)
static

Convert Packed BF16 Data to Packed float Data using zeroing mask.

Parameters
__UA 4-bit mask. Elements are zeroed out when the corresponding mask bit is not set.
__AA 128-bit vector of [4 x bfloat].
Returns
A 128-bit vector of [4 x float] come from conversion of __A

Definition at line 458 of file avx512vlbf16intrin.h.

References _mm_castsi128_ps(), _mm_maskz_cvtepi16_epi32(), and _mm_slli_epi32().

◆ _mm_maskz_dpbf16_ps()

static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_dpbf16_ps ( __mmask8  __U,
__m128  __D,
__m128bh  __A,
__m128bh  __B 
)
static

Dot Product of BF16 Pairs Accumulated into Packed Single Precision.

This intrinsic corresponds to the VDPBF16PS instructions.

Parameters
__AA 128-bit vector of [8 x bfloat].
__BA 128-bit vector of [8 x bfloat].
__DA 128-bit vector of [4 x float].
__UA 8-bit mask value specifying what is chosen for each element. A 1 means __A and __B's dot product accumulated with __D. A 0 means 0.
Returns
A 128-bit vector of [4 x float] comes from Dot Product of __A, __B and __D

Definition at line 331 of file avx512vlbf16intrin.h.

References __D, _mm_dpbf16_ps(), and _mm_setzero_si128().

__attribute__
short __m128bh __attribute__((__vector_size__(16), __aligned__(16)))
Definition: avx512vlbf16intrin.h:16