13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
20#if defined(__EVEX512__) && !defined(__AVX10_1_512__)
21#define __DEFAULT_FN_ATTRS \
22 __attribute__((__always_inline__, __nodebug__, \
23 __target__("sse3,no-evex512"), __min_vector_width__(128)))
25#define __DEFAULT_FN_ATTRS \
26 __attribute__((__always_inline__, __nodebug__, __target__("sse3"), \
27 __min_vector_width__(128)))
30#if defined(__cplusplus) && (__cplusplus >= 201103L)
31#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
33#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
53 return (__m128i)__builtin_ia32_lddqu((
char const *)
__p);
72 return __builtin_ia32_addsubps((__v4sf)
__a, (__v4sf)
__b);
95 return __builtin_ia32_haddps((__v4sf)
__a, (__v4sf)
__b);
118 return __builtin_ia32_hsubps((__v4sf)
__a, (__v4sf)
__b);
140 return __builtin_shufflevector((__v4sf)
__a, (__v4sf)
__a, 1, 1, 3, 3);
161 return __builtin_shufflevector((__v4sf)
__a, (__v4sf)
__a, 0, 0, 2, 2);
180 return __builtin_ia32_addsubpd((__v2df)
__a, (__v2df)
__b);
203 return __builtin_ia32_haddpd((__v2df)
__a, (__v2df)
__b);
226 return __builtin_ia32_hsubpd((__v2df)
__a, (__v2df)
__b);
244#define _mm_loaddup_pd(dp) _mm_load1_pd(dp)
262 return __builtin_shufflevector((__v2df)
__a, (__v2df)
__a, 0, 0);
286 __builtin_ia32_monitor(
__p, __extensions, __hints);
308 __builtin_ia32_mwait(__extensions, __hints);
311#undef __DEFAULT_FN_ATTRS
312#undef __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ vector float vector float __b
static __inline__ uint32_t volatile uint32_t * __p
static __inline__ void int __a
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hadd_pd(__m128d __a, __m128d __b)
Horizontally adds the pairs of values contained in two 128-bit vectors of [2 x double].
#define __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hadd_ps(__m128 __a, __m128 __b)
Horizontally adds the adjacent pairs of values contained in two 128-bit vectors of [4 x float].
static __inline__ void __DEFAULT_FN_ATTRS _mm_mwait(unsigned __extensions, unsigned __hints)
Used with the MONITOR instruction to wait while the processor is in the monitor event pending state.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_addsub_pd(__m128d __a, __m128d __b)
Adds the even-indexed values and subtracts the odd-indexed values of two 128-bit vectors of [2 x doub...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hsub_pd(__m128d __a, __m128d __b)
Horizontally subtracts the pairs of values contained in two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movedup_pd(__m128d __a)
Moves and duplicates the double-precision value in the lower bits of a 128-bit vector of [2 x double]...
#define __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_moveldup_ps(__m128 __a)
Duplicates even-indexed values from a 128-bit vector of [4 x float] to float values stored in a 128-b...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_addsub_ps(__m128 __a, __m128 __b)
Adds the even-indexed values and subtracts the odd-indexed values of two 128-bit vectors of [4 x floa...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hsub_ps(__m128 __a, __m128 __b)
Horizontally subtracts the adjacent pairs of values contained in two 128-bit vectors of [4 x float].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_lddqu_si128(__m128i_u const *__p)
Loads data from an unaligned memory location to elements in a 128-bit vector.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movehdup_ps(__m128 __a)
Moves and duplicates odd-indexed values from a 128-bit vector of [4 x float] to float values stored i...
static __inline__ void __DEFAULT_FN_ATTRS _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
Establishes a linear address memory range to be monitored and puts the processor in the monitor event...