clang 18.0.0git
|
Go to the source code of this file.
Macros | |
#define | __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128))) |
#define | __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256))) |
Functions | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C) |
Computes a multiply-add of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C) |
Computes a multiply-add of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C) |
Computes a scalar multiply-add of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C) |
Computes a scalar multiply-add of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C) |
Computes a multiply-subtract of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C) |
Computes a multiply-subtract of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C) |
Computes a scalar multiply-subtract of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C) |
Computes a scalar multiply-subtract of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C) |
Computes a negated multiply-add of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C) |
Computes a negated multiply-add of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C) |
Computes a scalar negated multiply-add of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C) |
Computes a scalar negated multiply-add of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C) |
Computes a negated multiply-subtract of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C) |
Computes a negated multiply-subtract of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C) |
Computes a scalar negated multiply-subtract of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C) |
Computes a scalar negated multiply-subtract of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C) |
Computes a multiply with alternating add/subtract of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C) |
Computes a multiply with alternating add/subtract of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C) |
Computes a multiply with alternating add/subtract of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C) |
Computes a multiply with alternating add/subtract of 128-bit vectors of [2 x double]. | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C) |
Computes a multiply-add of 256-bit vectors of [8 x float]. | |
static __inline__ __m256d __DEFAULT_FN_ATTRS256 | _mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C) |
Computes a multiply-add of 256-bit vectors of [4 x double]. | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C) |
Computes a multiply-subtract of 256-bit vectors of [8 x float]. | |
static __inline__ __m256d __DEFAULT_FN_ATTRS256 | _mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C) |
Computes a multiply-subtract of 256-bit vectors of [4 x double]. | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C) |
Computes a negated multiply-add of 256-bit vectors of [8 x float]. | |
static __inline__ __m256d __DEFAULT_FN_ATTRS256 | _mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C) |
Computes a negated multiply-add of 256-bit vectors of [4 x double]. | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C) |
Computes a negated multiply-subtract of 256-bit vectors of [8 x float]. | |
static __inline__ __m256d __DEFAULT_FN_ATTRS256 | _mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C) |
Computes a negated multiply-subtract of 256-bit vectors of [4 x double]. | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C) |
Computes a multiply with alternating add/subtract of 256-bit vectors of [8 x float]. | |
static __inline__ __m256d __DEFAULT_FN_ATTRS256 | _mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C) |
Computes a multiply with alternating add/subtract of 256-bit vectors of [4 x double]. | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C) |
Computes a vector multiply with alternating add/subtract of 256-bit vectors of [8 x float]. | |
static __inline__ __m256d __DEFAULT_FN_ATTRS256 | _mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C) |
Computes a vector multiply with alternating add/subtract of 256-bit vectors of [4 x double]. | |
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128))) |
Definition at line 18 of file fmaintrin.h.
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256))) |
Definition at line 19 of file fmaintrin.h.
|
static |
Computes a multiply-add of 256-bit vectors of [4 x double].
For each element, computes (__A * __B) + __C
.
This intrinsic corresponds to the VFMADD213PD
instruction.
__A | A 256-bit vector of [4 x double] containing the multiplicand. |
__B | A 256-bit vector of [4 x double] containing the multiplier. |
__C | A 256-bit vector of [4 x double] containing the addend. |
Definition at line 540 of file fmaintrin.h.
|
static |
Computes a multiply-add of 256-bit vectors of [8 x float].
For each element, computes (__A * __B) + __C
.
This intrinsic corresponds to the VFMADD213PS
instruction.
__A | A 256-bit vector of [8 x float] containing the multiplicand. |
__B | A 256-bit vector of [8 x float] containing the multiplier. |
__C | A 256-bit vector of [8 x float] containing the addend. |
Definition at line 520 of file fmaintrin.h.
|
static |
Computes a multiply with alternating add/subtract of 256-bit vectors of [4 x double].
This intrinsic corresponds to the VFMADDSUB213PD
instruction.
__A | A 256-bit vector of [4 x double] containing the multiplicand. |
__B | A 256-bit vector of [4 x double] containing the multiplier. |
__C | A 256-bit vector of [4 x double] containing the addend/subtrahend. |
Definition at line 716 of file fmaintrin.h.
|
static |
Computes a multiply with alternating add/subtract of 256-bit vectors of [8 x float].
This intrinsic corresponds to the VFMADDSUB213PS
instruction.
__A | A 256-bit vector of [8 x float] containing the multiplicand. |
__B | A 256-bit vector of [8 x float] containing the multiplier. |
__C | A 256-bit vector of [8 x float] containing the addend/subtrahend. |
Definition at line 690 of file fmaintrin.h.
|
static |
Computes a multiply-subtract of 256-bit vectors of [4 x double].
For each element, computes (__A * __B) - __C
.
This intrinsic corresponds to the VFMSUB213PD
instruction.
__A | A 256-bit vector of [4 x double] containing the multiplicand. |
__B | A 256-bit vector of [4 x double] containing the multiplier. |
__C | A 256-bit vector of [4 x double] containing the subtrahend. |
Definition at line 580 of file fmaintrin.h.
|
static |
Computes a multiply-subtract of 256-bit vectors of [8 x float].
For each element, computes (__A * __B) - __C
.
This intrinsic corresponds to the VFMSUB213PS
instruction.
__A | A 256-bit vector of [8 x float] containing the multiplicand. |
__B | A 256-bit vector of [8 x float] containing the multiplier. |
__C | A 256-bit vector of [8 x float] containing the subtrahend. |
Definition at line 560 of file fmaintrin.h.
|
static |
Computes a vector multiply with alternating add/subtract of 256-bit vectors of [4 x double].
This intrinsic corresponds to the VFMSUBADD213PD
instruction.
__A | A 256-bit vector of [4 x double] containing the multiplicand. |
__B | A 256-bit vector of [4 x double] containing the multiplier. |
__C | A 256-bit vector of [4 x double] containing the addend/subtrahend. |
Definition at line 772 of file fmaintrin.h.
|
static |
Computes a vector multiply with alternating add/subtract of 256-bit vectors of [8 x float].
This intrinsic corresponds to the VFMSUBADD213PS
instruction.
__A | A 256-bit vector of [8 x float] containing the multiplicand. |
__B | A 256-bit vector of [8 x float] containing the multiplier. |
__C | A 256-bit vector of [8 x float] containing the addend/subtrahend. |
Definition at line 746 of file fmaintrin.h.
|
static |
Computes a negated multiply-add of 256-bit vectors of [4 x double].
For each element, computes -(__A * __B) + __C
.
This intrinsic corresponds to the VFNMADD213PD
instruction.
__A | A 256-bit vector of [4 x double] containing the multiplicand. |
__B | A 256-bit vector of [4 x double] containing the multiplier. |
__C | A 256-bit vector of [4 x double] containing the addend. |
Definition at line 620 of file fmaintrin.h.
|
static |
Computes a negated multiply-add of 256-bit vectors of [8 x float].
For each element, computes -(__A * __B) + __C
.
This intrinsic corresponds to the VFNMADD213PS
instruction.
__A | A 256-bit vector of [8 x float] containing the multiplicand. |
__B | A 256-bit vector of [8 x float] containing the multiplier. |
__C | A 256-bit vector of [8 x float] containing the addend. |
Definition at line 600 of file fmaintrin.h.
|
static |
Computes a negated multiply-subtract of 256-bit vectors of [4 x double].
For each element, computes -(__A * __B) - __C
.
This intrinsic corresponds to the VFNMSUB213PD
instruction.
__A | A 256-bit vector of [4 x double] containing the multiplicand. |
__B | A 256-bit vector of [4 x double] containing the multiplier. |
__C | A 256-bit vector of [4 x double] containing the subtrahend. |
Definition at line 660 of file fmaintrin.h.
|
static |
Computes a negated multiply-subtract of 256-bit vectors of [8 x float].
For each element, computes -(__A * __B) - __C
.
This intrinsic corresponds to the VFNMSUB213PS
instruction.
__A | A 256-bit vector of [8 x float] containing the multiplicand. |
__B | A 256-bit vector of [8 x float] containing the multiplier. |
__C | A 256-bit vector of [8 x float] containing the subtrahend. |
Definition at line 640 of file fmaintrin.h.
|
static |
Computes a multiply-add of 128-bit vectors of [2 x double].
For each element, computes (__A * __B) + __C
.
This intrinsic corresponds to the VFMADD213PD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand. |
__B | A 128-bit vector of [2 x double] containing the multiplier. |
__C | A 128-bit vector of [2 x double] containing the addend. |
Definition at line 56 of file fmaintrin.h.
|
static |
Computes a multiply-add of 128-bit vectors of [4 x float].
For each element, computes (__A * __B) + __C
.
This intrinsic corresponds to the VFMADD213PS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand. |
__B | A 128-bit vector of [4 x float] containing the multiplier. |
__C | A 128-bit vector of [4 x float] containing the addend. |
Definition at line 36 of file fmaintrin.h.
|
static |
Computes a scalar multiply-add of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double].
This intrinsic corresponds to the VFMADD213SD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand in the low 64 bits. |
__B | A 128-bit vector of [2 x double] containing the multiplier in the low 64 bits. |
__C | A 128-bit vector of [2 x double] containing the addend in the low 64 bits. |
Definition at line 112 of file fmaintrin.h.
|
static |
Computes a scalar multiply-add of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float].
This intrinsic corresponds to the VFMADD213SS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand in the low 32 bits. |
__B | A 128-bit vector of [4 x float] containing the multiplier in the low 32 bits. |
__C | A 128-bit vector of [4 x float] containing the addend in the low 32 bits. |
Definition at line 84 of file fmaintrin.h.
|
static |
Computes a multiply with alternating add/subtract of 128-bit vectors of [2 x double].
This intrinsic corresponds to the VFMADDSUB213PD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand. |
__B | A 128-bit vector of [2 x double] containing the multiplier. |
__C | A 128-bit vector of [2 x double] containing the addend/subtrahend. |
Definition at line 450 of file fmaintrin.h.
|
static |
Computes a multiply with alternating add/subtract of 128-bit vectors of [4 x float].
This intrinsic corresponds to the VFMADDSUB213PS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand. |
__B | A 128-bit vector of [4 x float] containing the multiplier. |
__C | A 128-bit vector of [4 x float] containing the addend/subtrahend. |
Definition at line 426 of file fmaintrin.h.
|
static |
Computes a multiply-subtract of 128-bit vectors of [2 x double].
For each element, computes (__A * __B) - __C
.
This intrinsic corresponds to the VFMSUB213PD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand. |
__B | A 128-bit vector of [2 x double] containing the multiplier. |
__C | A 128-bit vector of [2 x double] containing the addend. |
Definition at line 152 of file fmaintrin.h.
|
static |
Computes a multiply-subtract of 128-bit vectors of [4 x float].
For each element, computes (__A * __B) - __C
.
This intrinsic corresponds to the VFMSUB213PS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand. |
__B | A 128-bit vector of [4 x float] containing the multiplier. |
__C | A 128-bit vector of [4 x float] containing the subtrahend. |
Definition at line 132 of file fmaintrin.h.
|
static |
Computes a scalar multiply-subtract of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double].
This intrinsic corresponds to the VFMSUB213SD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand in the low 64 bits. |
__B | A 128-bit vector of [2 x double] containing the multiplier in the low 64 bits. |
__C | A 128-bit vector of [2 x double] containing the subtrahend in the low 64 bits. |
Definition at line 208 of file fmaintrin.h.
|
static |
Computes a scalar multiply-subtract of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float].
This intrinsic corresponds to the VFMSUB213SS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand in the low 32 bits. |
__B | A 128-bit vector of [4 x float] containing the multiplier in the low 32 bits. |
__C | A 128-bit vector of [4 x float] containing the subtrahend in the low 32 bits. |
Definition at line 180 of file fmaintrin.h.
|
static |
Computes a multiply with alternating add/subtract of 128-bit vectors of [2 x double].
This intrinsic corresponds to the VFMADDSUB213PD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand. |
__B | A 128-bit vector of [2 x double] containing the multiplier. |
__C | A 128-bit vector of [2 x double] containing the addend/subtrahend. |
Definition at line 500 of file fmaintrin.h.
|
static |
Computes a multiply with alternating add/subtract of 128-bit vectors of [4 x float].
This intrinsic corresponds to the VFMSUBADD213PS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand. |
__B | A 128-bit vector of [4 x float] containing the multiplier. |
__C | A 128-bit vector of [4 x float] containing the addend/subtrahend. |
Definition at line 476 of file fmaintrin.h.
|
static |
Computes a negated multiply-add of 128-bit vectors of [2 x double].
For each element, computes -(__A * __B) + __C
.
This intrinsic corresponds to the VFNMADD213PD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand. |
__B | A 128-bit vector of [2 x double] containing the multiplier. |
__C | A 128-bit vector of [2 x double] containing the addend. |
Definition at line 248 of file fmaintrin.h.
|
static |
Computes a negated multiply-add of 128-bit vectors of [4 x float].
For each element, computes -(__A * __B) + __C
.
This intrinsic corresponds to the VFNMADD213DPS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand. |
__B | A 128-bit vector of [4 x float] containing the multiplier. |
__C | A 128-bit vector of [4 x float] containing the addend. |
Definition at line 228 of file fmaintrin.h.
|
static |
Computes a scalar negated multiply-add of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double].
This intrinsic corresponds to the VFNMADD213SD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand in the low 64 bits. |
__B | A 128-bit vector of [2 x double] containing the multiplier in the low 64 bits. |
__C | A 128-bit vector of [2 x double] containing the addend in the low 64 bits. |
Definition at line 304 of file fmaintrin.h.
|
static |
Computes a scalar negated multiply-add of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float].
This intrinsic corresponds to the VFNMADD213SS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand in the low 32 bits. |
__B | A 128-bit vector of [4 x float] containing the multiplier in the low 32 bits. |
__C | A 128-bit vector of [4 x float] containing the addend in the low 32 bits. |
Definition at line 276 of file fmaintrin.h.
|
static |
Computes a negated multiply-subtract of 128-bit vectors of [2 x double].
For each element, computes -(__A * __B) - __C
.
This intrinsic corresponds to the VFNMSUB213PD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand. |
__B | A 128-bit vector of [2 x double] containing the multiplier. |
__C | A 128-bit vector of [2 x double] containing the subtrahend. |
Definition at line 344 of file fmaintrin.h.
|
static |
Computes a negated multiply-subtract of 128-bit vectors of [4 x float].
For each element, computes -(__A * __B) - __C
.
This intrinsic corresponds to the VFNMSUB213PS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand. |
__B | A 128-bit vector of [4 x float] containing the multiplier. |
__C | A 128-bit vector of [4 x float] containing the subtrahend. |
Definition at line 324 of file fmaintrin.h.
|
static |
Computes a scalar negated multiply-subtract of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double].
This intrinsic corresponds to the VFNMSUB213SD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand in the low 64 bits. |
__B | A 128-bit vector of [2 x double] containing the multiplier in the low 64 bits. |
__C | A 128-bit vector of [2 x double] containing the subtrahend in the low 64 bits. |
Definition at line 400 of file fmaintrin.h.
|
static |
Computes a scalar negated multiply-subtract of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float].
This intrinsic corresponds to the VFNMSUB213SS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand in the low 32 bits. |
__B | A 128-bit vector of [4 x float] containing the multiplier in the low 32 bits. |
__C | A 128-bit vector of [4 x float] containing the subtrahend in the low 32 bits. |
Definition at line 372 of file fmaintrin.h.