13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
19#if !defined(__SCE__) || __has_feature(modules) || defined(__MMX__)
23#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE__)
27#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE2__)
31#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE3__)
35#if !defined(__SCE__) || __has_feature(modules) || defined(__SSSE3__)
39#if !defined(__SCE__) || __has_feature(modules) || \
40 (defined(__SSE4_2__) || defined(__SSE4_1__))
44#if !defined(__SCE__) || __has_feature(modules) || \
45 (defined(__AES__) || defined(__PCLMUL__))
49#if !defined(__SCE__) || __has_feature(modules) || defined(__CLFLUSHOPT__)
53#if !defined(__SCE__) || __has_feature(modules) || defined(__CLWB__)
57#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX__)
61#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX2__)
65#if !defined(__SCE__) || __has_feature(modules) || defined(__F16C__)
72#if !defined(__SCE__) || __has_feature(modules) || defined(__BMI2__)
76#if !defined(__SCE__) || __has_feature(modules) || defined(__LZCNT__)
80#if !defined(__SCE__) || __has_feature(modules) || defined(__POPCNT__)
84#if !defined(__SCE__) || __has_feature(modules) || defined(__FMA__)
88#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512F__)
92#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VL__)
96#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BW__)
100#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BITALG__)
104#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512CD__)
108#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
112#if !defined(__SCE__) || __has_feature(modules) || \
113 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
117#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VNNI__)
121#if !defined(__SCE__) || __has_feature(modules) || \
122 (defined(__AVX512VL__) && defined(__AVX512VNNI__))
126#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNI__)
130#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512DQ__)
134#if !defined(__SCE__) || __has_feature(modules) || \
135 (defined(__AVX512VL__) && defined(__AVX512BITALG__))
139#if !defined(__SCE__) || __has_feature(modules) || \
140 (defined(__AVX512VL__) && defined(__AVX512BW__))
144#if !defined(__SCE__) || __has_feature(modules) || \
145 (defined(__AVX512VL__) && defined(__AVX512CD__))
149#if !defined(__SCE__) || __has_feature(modules) || \
150 (defined(__AVX512VL__) && defined(__AVX512DQ__))
154#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512ER__)
158#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__)
162#if !defined(__SCE__) || __has_feature(modules) || \
163 (defined(__AVX512IFMA__) && defined(__AVX512VL__))
167#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXIFMA__)
171#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI__)
175#if !defined(__SCE__) || __has_feature(modules) || \
176 (defined(__AVX512VBMI__) && defined(__AVX512VL__))
180#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI2__)
184#if !defined(__SCE__) || __has_feature(modules) || \
185 (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
189#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512PF__)
193#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__)
197#if !defined(__SCE__) || __has_feature(modules) || \
198 (defined(__AVX512VL__) && defined(__AVX512FP16__))
202#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BF16__)
206#if !defined(__SCE__) || __has_feature(modules) || \
207 (defined(__AVX512VL__) && defined(__AVX512BF16__))
211#if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__)
215#if !defined(__SCE__) || __has_feature(modules) || defined(__VPCLMULQDQ__)
219#if !defined(__SCE__) || __has_feature(modules) || defined(__VAES__)
223#if !defined(__SCE__) || __has_feature(modules) || defined(__GFNI__)
227#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT8__)
231#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXNECONVERT__)
235#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA512__)
239#if !defined(__SCE__) || __has_feature(modules) || defined(__SM3__)
243#if !defined(__SCE__) || __has_feature(modules) || defined(__SM4__)
247#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT16__)
251#if !defined(__SCE__) || __has_feature(modules) || defined(__RDPID__)
259static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"rdpid")))
261 return __builtin_ia32_rdpid();
265#if !defined(__SCE__) || __has_feature(modules) || defined(__RDRND__)
275static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
276_rdrand16_step(
unsigned short *
__p)
278 return (
int)__builtin_ia32_rdrand16_step(
__p);
290static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
291_rdrand32_step(
unsigned int *
__p)
293 return (
int)__builtin_ia32_rdrand32_step(
__p);
305static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
306_rdrand64_step(
unsigned long long *
__p)
309 return (
int)__builtin_ia32_rdrand64_step(
__p);
313 unsigned int __lo, __hi;
314 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
315 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
316 if (__res_lo && __res_hi) {
317 *
__p = ((
unsigned long long)__hi << 32) | (
unsigned long long)__lo;
327#if !defined(__SCE__) || __has_feature(modules) || defined(__FSGSBASE__)
336static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
339 return __builtin_ia32_rdfsbase32();
349static __inline__
unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
352 return __builtin_ia32_rdfsbase64();
362static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
365 return __builtin_ia32_rdgsbase32();
375static __inline__
unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
378 return __builtin_ia32_rdgsbase64();
389static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
390_writefsbase_u32(
unsigned int __V)
392 __builtin_ia32_wrfsbase32(__V);
403static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
404_writefsbase_u64(
unsigned long long __V)
406 __builtin_ia32_wrfsbase64(__V);
417static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
418_writegsbase_u32(
unsigned int __V)
420 __builtin_ia32_wrgsbase32(__V);
431static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
432_writegsbase_u64(
unsigned long long __V)
434 __builtin_ia32_wrgsbase64(__V);
440#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVBE__)
457static __inline__
short __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
458_loadbe_i16(
void const *
__P) {
462 return (
short)__builtin_bswap16(((
const struct __loadu_i16*)
__P)->
__v);
475static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
477 struct __storeu_i16 {
480 ((
struct __storeu_i16*)
__P)->__v = __builtin_bswap16((
unsigned short)
__D);
492static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
493_loadbe_i32(
void const *
__P) {
497 return (
int)__builtin_bswap32(((
const struct __loadu_i32*)
__P)->
__v);
510static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
511_storebe_i32(
void *
__P,
int __D) {
512 struct __storeu_i32 {
515 ((
struct __storeu_i32*)
__P)->__v = __builtin_bswap32((
unsigned int)
__D);
528static __inline__
long long __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
529_loadbe_i64(
void const *
__P) {
531 unsigned long long __v;
533 return (
long long)__builtin_bswap64(((
const struct __loadu_i64*)
__P)->
__v);
546static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
547_storebe_i64(
void *
__P,
long long __D) {
548 struct __storeu_i64 {
549 unsigned long long __v;
551 ((
struct __storeu_i64*)
__P)->__v = __builtin_bswap64((
unsigned long long)
__D);
556#if !defined(__SCE__) || __has_feature(modules) || defined(__RTM__)
561#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA__)
565#if !defined(__SCE__) || __has_feature(modules) || defined(__FXSR__)
572#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEOPT__)
576#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEC__)
580#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVES__)
584#if !defined(__SCE__) || __has_feature(modules) || defined(__SHSTK__)
591#if !defined(__SCE__) || __has_feature(modules) || defined(__ADX__)
595#if !defined(__SCE__) || __has_feature(modules) || defined(__RDSEED__)
599#if !defined(__SCE__) || __has_feature(modules) || defined(__WBNOINVD__)
603#if !defined(__SCE__) || __has_feature(modules) || defined(__CLDEMOTE__)
607#if !defined(__SCE__) || __has_feature(modules) || defined(__WAITPKG__)
611#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVDIRI__) || \
612 defined(__MOVDIR64B__)
616#if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__)
620#if !defined(__SCE__) || __has_feature(modules) || defined(__SGX__)
624#if !defined(__SCE__) || __has_feature(modules) || defined(__PTWRITE__)
628#if !defined(__SCE__) || __has_feature(modules) || defined(__INVPCID__)
631#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__)
635#if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) || \
640#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TILE__) || \
641 defined(__AMX_INT8__) || defined(__AMX_BF16__)
645#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_COMPLEX__)
649#if !defined(__SCE__) || __has_feature(modules) || \
650 defined(__AVX512VP2INTERSECT__)
654#if !defined(__SCE__) || __has_feature(modules) || \
655 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
659#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
663#if !defined(__SCE__) || __has_feature(modules) || defined(__SERIALIZE__)
667#if !defined(__SCE__) || __has_feature(modules) || defined(__TSXLDTRK__)
671#if defined(_MSC_VER) && __has_extension(gnu_asm)
673#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
680#if defined(__i386__) || defined(__x86_64__)
682_InterlockedExchange_HLEAcquire(
long volatile *_Target,
long _Value) {
683 __asm__ __volatile__(
".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
684 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
688_InterlockedExchange_HLERelease(
long volatile *_Target,
long _Value) {
689 __asm__ __volatile__(
".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
690 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
694#if defined(__x86_64__)
696_InterlockedExchange64_HLEAcquire(__int64
volatile *_Target, __int64 _Value) {
697 __asm__ __volatile__(
".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
698 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
702_InterlockedExchange64_HLERelease(__int64
volatile *_Target, __int64 _Value) {
703 __asm__ __volatile__(
".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
704 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
711#if defined(__i386__) || defined(__x86_64__)
713_InterlockedCompareExchange_HLEAcquire(
long volatile *_Destination,
714 long _Exchange,
long _Comparand) {
715 __asm__ __volatile__(
".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
716 :
"+a" (_Comparand),
"+m" (*_Destination)
717 :
"r" (_Exchange) :
"memory");
721_InterlockedCompareExchange_HLERelease(
long volatile *_Destination,
722 long _Exchange,
long _Comparand) {
723 __asm__ __volatile__(
".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
724 :
"+a" (_Comparand),
"+m" (*_Destination)
725 :
"r" (_Exchange) :
"memory");
729#if defined(__x86_64__)
731_InterlockedCompareExchange64_HLEAcquire(__int64
volatile *_Destination,
732 __int64 _Exchange, __int64 _Comparand) {
733 __asm__ __volatile__(
".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
734 :
"+a" (_Comparand),
"+m" (*_Destination)
735 :
"r" (_Exchange) :
"memory");
739_InterlockedCompareExchange64_HLERelease(__int64
volatile *_Destination,
740 __int64 _Exchange, __int64 _Comparand) {
741 __asm__ __volatile__(
".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
742 :
"+a" (_Comparand),
"+m" (*_Destination)
743 :
"r" (_Exchange) :
"memory");
751#undef __DEFAULT_FN_ATTRS
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
#define __DEFAULT_FN_ATTRS
static __inline__ uint32_t volatile uint32_t * __p
static __inline__ void short __D
struct __storeu_i16 *__P __v
__inline unsigned int unsigned int unsigned int * __P