clang 20.0.0git
|
Go to the source code of this file.
Macros | |
#define | __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128))) |
#define | __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256))) |
Functions | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C) |
Computes a multiply-add of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C) |
Computes a multiply-add of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C) |
Computes a scalar multiply-add of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C) |
Computes a scalar multiply-add of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C) |
Computes a multiply-subtract of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C) |
Computes a multiply-subtract of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C) |
Computes a scalar multiply-subtract of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C) |
Computes a scalar multiply-subtract of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C) |
Computes a negated multiply-add of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C) |
Computes a negated multiply-add of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C) |
Computes a scalar negated multiply-add of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C) |
Computes a scalar negated multiply-add of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C) |
Computes a negated multiply-subtract of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C) |
Computes a negated multiply-subtract of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C) |
Computes a scalar negated multiply-subtract of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C) |
Computes a scalar negated multiply-subtract of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C) |
Computes a multiply with alternating add/subtract of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C) |
Computes a multiply with alternating add/subtract of 128-bit vectors of [2 x double]. | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C) |
Computes a multiply with alternating add/subtract of 128-bit vectors of [4 x float]. | |
static __inline__ __m128d __DEFAULT_FN_ATTRS128 | _mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C) |
Computes a multiply with alternating add/subtract of 128-bit vectors of [2 x double]. | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C) |
Computes a multiply-add of 256-bit vectors of [8 x float]. | |
static __inline__ __m256d __DEFAULT_FN_ATTRS256 | _mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C) |
Computes a multiply-add of 256-bit vectors of [4 x double]. | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C) |
Computes a multiply-subtract of 256-bit vectors of [8 x float]. | |
static __inline__ __m256d __DEFAULT_FN_ATTRS256 | _mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C) |
Computes a multiply-subtract of 256-bit vectors of [4 x double]. | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C) |
Computes a negated multiply-add of 256-bit vectors of [8 x float]. | |
static __inline__ __m256d __DEFAULT_FN_ATTRS256 | _mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C) |
Computes a negated multiply-add of 256-bit vectors of [4 x double]. | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C) |
Computes a negated multiply-subtract of 256-bit vectors of [8 x float]. | |
static __inline__ __m256d __DEFAULT_FN_ATTRS256 | _mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C) |
Computes a negated multiply-subtract of 256-bit vectors of [4 x double]. | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C) |
Computes a multiply with alternating add/subtract of 256-bit vectors of [8 x float]. | |
static __inline__ __m256d __DEFAULT_FN_ATTRS256 | _mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C) |
Computes a multiply with alternating add/subtract of 256-bit vectors of [4 x double]. | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C) |
Computes a vector multiply with alternating add/subtract of 256-bit vectors of [8 x float]. | |
static __inline__ __m256d __DEFAULT_FN_ATTRS256 | _mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C) |
Computes a vector multiply with alternating add/subtract of 256-bit vectors of [4 x double]. | |
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128))) |
Definition at line 18 of file fmaintrin.h.
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256))) |
Definition at line 19 of file fmaintrin.h.
|
static |
Computes a multiply-add of 256-bit vectors of [4 x double].
For each element, computes (__A * __B) + __C
.
This intrinsic corresponds to the VFMADD213PD
instruction.
__A | A 256-bit vector of [4 x double] containing the multiplicand. |
__B | A 256-bit vector of [4 x double] containing the multiplier. |
__C | A 256-bit vector of [4 x double] containing the addend. |
Definition at line 552 of file fmaintrin.h.
|
static |
Computes a multiply-add of 256-bit vectors of [8 x float].
For each element, computes (__A * __B) + __C
.
This intrinsic corresponds to the VFMADD213PS
instruction.
__A | A 256-bit vector of [8 x float] containing the multiplicand. |
__B | A 256-bit vector of [8 x float] containing the multiplier. |
__C | A 256-bit vector of [8 x float] containing the addend. |
Definition at line 532 of file fmaintrin.h.
|
static |
Computes a multiply with alternating add/subtract of 256-bit vectors of [4 x double].
This intrinsic corresponds to the VFMADDSUB213PD
instruction.
__A | A 256-bit vector of [4 x double] containing the multiplicand. |
__B | A 256-bit vector of [4 x double] containing the multiplier. |
__C | A 256-bit vector of [4 x double] containing the addend/subtrahend. |
Definition at line 730 of file fmaintrin.h.
|
static |
Computes a multiply with alternating add/subtract of 256-bit vectors of [8 x float].
This intrinsic corresponds to the VFMADDSUB213PS
instruction.
__A | A 256-bit vector of [8 x float] containing the multiplicand. |
__B | A 256-bit vector of [8 x float] containing the multiplier. |
__C | A 256-bit vector of [8 x float] containing the addend/subtrahend. |
Definition at line 703 of file fmaintrin.h.
|
static |
Computes a multiply-subtract of 256-bit vectors of [4 x double].
For each element, computes (__A * __B) - __C
.
This intrinsic corresponds to the VFMSUB213PD
instruction.
__A | A 256-bit vector of [4 x double] containing the multiplicand. |
__B | A 256-bit vector of [4 x double] containing the multiplier. |
__C | A 256-bit vector of [4 x double] containing the subtrahend. |
Definition at line 592 of file fmaintrin.h.
|
static |
Computes a multiply-subtract of 256-bit vectors of [8 x float].
For each element, computes (__A * __B) - __C
.
This intrinsic corresponds to the VFMSUB213PS
instruction.
__A | A 256-bit vector of [8 x float] containing the multiplicand. |
__B | A 256-bit vector of [8 x float] containing the multiplier. |
__C | A 256-bit vector of [8 x float] containing the subtrahend. |
Definition at line 572 of file fmaintrin.h.
|
static |
Computes a vector multiply with alternating add/subtract of 256-bit vectors of [4 x double].
This intrinsic corresponds to the VFMSUBADD213PD
instruction.
__A | A 256-bit vector of [4 x double] containing the multiplicand. |
__B | A 256-bit vector of [4 x double] containing the multiplier. |
__C | A 256-bit vector of [4 x double] containing the addend/subtrahend. |
Definition at line 788 of file fmaintrin.h.
|
static |
Computes a vector multiply with alternating add/subtract of 256-bit vectors of [8 x float].
This intrinsic corresponds to the VFMSUBADD213PS
instruction.
__A | A 256-bit vector of [8 x float] containing the multiplicand. |
__B | A 256-bit vector of [8 x float] containing the multiplier. |
__C | A 256-bit vector of [8 x float] containing the addend/subtrahend. |
Definition at line 761 of file fmaintrin.h.
|
static |
Computes a negated multiply-add of 256-bit vectors of [4 x double].
For each element, computes -(__A * __B) + __C
.
This intrinsic corresponds to the VFNMADD213PD
instruction.
__A | A 256-bit vector of [4 x double] containing the multiplicand. |
__B | A 256-bit vector of [4 x double] containing the multiplier. |
__C | A 256-bit vector of [4 x double] containing the addend. |
Definition at line 632 of file fmaintrin.h.
|
static |
Computes a negated multiply-add of 256-bit vectors of [8 x float].
For each element, computes -(__A * __B) + __C
.
This intrinsic corresponds to the VFNMADD213PS
instruction.
__A | A 256-bit vector of [8 x float] containing the multiplicand. |
__B | A 256-bit vector of [8 x float] containing the multiplier. |
__C | A 256-bit vector of [8 x float] containing the addend. |
Definition at line 612 of file fmaintrin.h.
|
static |
Computes a negated multiply-subtract of 256-bit vectors of [4 x double].
For each element, computes -(__A * __B) - __C
.
This intrinsic corresponds to the VFNMSUB213PD
instruction.
__A | A 256-bit vector of [4 x double] containing the multiplicand. |
__B | A 256-bit vector of [4 x double] containing the multiplier. |
__C | A 256-bit vector of [4 x double] containing the subtrahend. |
Definition at line 672 of file fmaintrin.h.
|
static |
Computes a negated multiply-subtract of 256-bit vectors of [8 x float].
For each element, computes -(__A * __B) - __C
.
This intrinsic corresponds to the VFNMSUB213PS
instruction.
__A | A 256-bit vector of [8 x float] containing the multiplicand. |
__B | A 256-bit vector of [8 x float] containing the multiplier. |
__C | A 256-bit vector of [8 x float] containing the subtrahend. |
Definition at line 652 of file fmaintrin.h.
|
static |
Computes a multiply-add of 128-bit vectors of [2 x double].
For each element, computes (__A * __B) + __C
.
This intrinsic corresponds to the VFMADD213PD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand. |
__B | A 128-bit vector of [2 x double] containing the multiplier. |
__C | A 128-bit vector of [2 x double] containing the addend. |
Definition at line 56 of file fmaintrin.h.
|
static |
Computes a multiply-add of 128-bit vectors of [4 x float].
For each element, computes (__A * __B) + __C
.
This intrinsic corresponds to the VFMADD213PS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand. |
__B | A 128-bit vector of [4 x float] containing the multiplier. |
__C | A 128-bit vector of [4 x float] containing the addend. |
Definition at line 36 of file fmaintrin.h.
|
static |
Computes a scalar multiply-add of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double].
This intrinsic corresponds to the VFMADD213SD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand in the low 64 bits. |
__B | A 128-bit vector of [2 x double] containing the multiplier in the low 64 bits. |
__C | A 128-bit vector of [2 x double] containing the addend in the low 64 bits. |
Definition at line 114 of file fmaintrin.h.
|
static |
Computes a scalar multiply-add of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float].
This intrinsic corresponds to the VFMADD213SS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand in the low 32 bits. |
__B | A 128-bit vector of [4 x float] containing the multiplier in the low 32 bits. |
__C | A 128-bit vector of [4 x float] containing the addend in the low 32 bits. |
Definition at line 85 of file fmaintrin.h.
|
static |
Computes a multiply with alternating add/subtract of 128-bit vectors of [2 x double].
This intrinsic corresponds to the VFMADDSUB213PD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand. |
__B | A 128-bit vector of [2 x double] containing the multiplier. |
__C | A 128-bit vector of [2 x double] containing the addend/subtrahend. |
Definition at line 460 of file fmaintrin.h.
|
static |
Computes a multiply with alternating add/subtract of 128-bit vectors of [4 x float].
This intrinsic corresponds to the VFMADDSUB213PS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand. |
__B | A 128-bit vector of [4 x float] containing the multiplier. |
__C | A 128-bit vector of [4 x float] containing the addend/subtrahend. |
Definition at line 435 of file fmaintrin.h.
|
static |
Computes a multiply-subtract of 128-bit vectors of [2 x double].
For each element, computes (__A * __B) - __C
.
This intrinsic corresponds to the VFMSUB213PD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand. |
__B | A 128-bit vector of [2 x double] containing the multiplier. |
__C | A 128-bit vector of [2 x double] containing the addend. |
Definition at line 154 of file fmaintrin.h.
|
static |
Computes a multiply-subtract of 128-bit vectors of [4 x float].
For each element, computes (__A * __B) - __C
.
This intrinsic corresponds to the VFMSUB213PS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand. |
__B | A 128-bit vector of [4 x float] containing the multiplier. |
__C | A 128-bit vector of [4 x float] containing the subtrahend. |
Definition at line 134 of file fmaintrin.h.
|
static |
Computes a scalar multiply-subtract of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double].
This intrinsic corresponds to the VFMSUB213SD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand in the low 64 bits. |
__B | A 128-bit vector of [2 x double] containing the multiplier in the low 64 bits. |
__C | A 128-bit vector of [2 x double] containing the subtrahend in the low 64 bits. |
Definition at line 212 of file fmaintrin.h.
|
static |
Computes a scalar multiply-subtract of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float].
This intrinsic corresponds to the VFMSUB213SS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand in the low 32 bits. |
__B | A 128-bit vector of [4 x float] containing the multiplier in the low 32 bits. |
__C | A 128-bit vector of [4 x float] containing the subtrahend in the low 32 bits. |
Definition at line 183 of file fmaintrin.h.
|
static |
Computes a multiply with alternating add/subtract of 128-bit vectors of [2 x double].
This intrinsic corresponds to the VFMADDSUB213PD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand. |
__B | A 128-bit vector of [2 x double] containing the multiplier. |
__C | A 128-bit vector of [2 x double] containing the addend/subtrahend. |
Definition at line 512 of file fmaintrin.h.
|
static |
Computes a multiply with alternating add/subtract of 128-bit vectors of [4 x float].
This intrinsic corresponds to the VFMSUBADD213PS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand. |
__B | A 128-bit vector of [4 x float] containing the multiplier. |
__C | A 128-bit vector of [4 x float] containing the addend/subtrahend. |
Definition at line 487 of file fmaintrin.h.
|
static |
Computes a negated multiply-add of 128-bit vectors of [2 x double].
For each element, computes -(__A * __B) + __C
.
This intrinsic corresponds to the VFNMADD213PD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand. |
__B | A 128-bit vector of [2 x double] containing the multiplier. |
__C | A 128-bit vector of [2 x double] containing the addend. |
Definition at line 252 of file fmaintrin.h.
|
static |
Computes a negated multiply-add of 128-bit vectors of [4 x float].
For each element, computes -(__A * __B) + __C
.
This intrinsic corresponds to the VFNMADD213DPS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand. |
__B | A 128-bit vector of [4 x float] containing the multiplier. |
__C | A 128-bit vector of [4 x float] containing the addend. |
Definition at line 232 of file fmaintrin.h.
|
static |
Computes a scalar negated multiply-add of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double].
This intrinsic corresponds to the VFNMADD213SD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand in the low 64 bits. |
__B | A 128-bit vector of [2 x double] containing the multiplier in the low 64 bits. |
__C | A 128-bit vector of [2 x double] containing the addend in the low 64 bits. |
Definition at line 310 of file fmaintrin.h.
|
static |
Computes a scalar negated multiply-add of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float].
This intrinsic corresponds to the VFNMADD213SS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand in the low 32 bits. |
__B | A 128-bit vector of [4 x float] containing the multiplier in the low 32 bits. |
__C | A 128-bit vector of [4 x float] containing the addend in the low 32 bits. |
Definition at line 281 of file fmaintrin.h.
|
static |
Computes a negated multiply-subtract of 128-bit vectors of [2 x double].
For each element, computes -(__A * __B) - __C
.
This intrinsic corresponds to the VFNMSUB213PD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand. |
__B | A 128-bit vector of [2 x double] containing the multiplier. |
__C | A 128-bit vector of [2 x double] containing the subtrahend. |
Definition at line 350 of file fmaintrin.h.
|
static |
Computes a negated multiply-subtract of 128-bit vectors of [4 x float].
For each element, computes -(__A * __B) - __C
.
This intrinsic corresponds to the VFNMSUB213PS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand. |
__B | A 128-bit vector of [4 x float] containing the multiplier. |
__C | A 128-bit vector of [4 x float] containing the subtrahend. |
Definition at line 330 of file fmaintrin.h.
|
static |
Computes a scalar negated multiply-subtract of the double-precision values in the low 64 bits of 128-bit vectors of [2 x double].
This intrinsic corresponds to the VFNMSUB213SD
instruction.
__A | A 128-bit vector of [2 x double] containing the multiplicand in the low 64 bits. |
__B | A 128-bit vector of [2 x double] containing the multiplier in the low 64 bits. |
__C | A 128-bit vector of [2 x double] containing the subtrahend in the low 64 bits. |
Definition at line 408 of file fmaintrin.h.
|
static |
Computes a scalar negated multiply-subtract of the single-precision values in the low 32 bits of 128-bit vectors of [4 x float].
This intrinsic corresponds to the VFNMSUB213SS
instruction.
__A | A 128-bit vector of [4 x float] containing the multiplicand in the low 32 bits. |
__B | A 128-bit vector of [4 x float] containing the multiplier in the low 32 bits. |
__C | A 128-bit vector of [4 x float] containing the subtrahend in the low 32 bits. |
Definition at line 379 of file fmaintrin.h.