13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
19#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
24#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
29#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
34#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
39#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
44#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
45 (defined(__SSE4_2__) || defined(__SSE4_1__))
49#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
50 (defined(__AES__) || defined(__PCLMUL__))
54#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
55 defined(__CLFLUSHOPT__)
59#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
64#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
69#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
74#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
82#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
87#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
92#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
97#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
102#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
107#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
108 defined(__AVX512VL__)
112#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
113 defined(__AVX512BW__)
117#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
118 defined(__AVX512BITALG__)
122#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
123 defined(__AVX512CD__)
127#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
128 defined(__AVX512VPOPCNTDQ__)
132#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
133 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
137#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
138 defined(__AVX512VNNI__)
142#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
143 (defined(__AVX512VL__) && defined(__AVX512VNNI__))
147#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
152#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
153 defined(__AVX512DQ__)
157#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
158 (defined(__AVX512VL__) && defined(__AVX512BITALG__))
162#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
163 (defined(__AVX512VL__) && defined(__AVX512BW__))
167#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
168 (defined(__AVX512VL__) && defined(__AVX512CD__))
172#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
173 (defined(__AVX512VL__) && defined(__AVX512DQ__))
177#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
178 defined(__AVX512ER__)
182#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
183 defined(__AVX512IFMA__)
187#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
188 (defined(__AVX512IFMA__) && defined(__AVX512VL__))
192#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
197#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
198 defined(__AVX512VBMI__)
202#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
203 (defined(__AVX512VBMI__) && defined(__AVX512VL__))
207#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
208 defined(__AVX512VBMI2__)
212#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
213 (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
217#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
218 defined(__AVX512PF__)
222#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
223 defined(__AVX512FP16__)
227#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
228 (defined(__AVX512VL__) && defined(__AVX512FP16__))
232#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
233 defined(__AVX512BF16__)
237#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
238 (defined(__AVX512VL__) && defined(__AVX512BF16__))
242#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
247#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
248 defined(__VPCLMULQDQ__)
252#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
257#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
262#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
263 defined(__AVXVNNIINT8__)
267#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
268 defined(__AVXNECONVERT__)
272#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
277#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
282#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
287#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
288 defined(__AVXVNNIINT16__)
292#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
299static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"rdpid")))
301 return __builtin_ia32_rdpid();
305#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
316static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
317_rdrand16_step(
unsigned short *
__p)
319 return (
int)__builtin_ia32_rdrand16_step(
__p);
331static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
332_rdrand32_step(
unsigned int *
__p)
334 return (
int)__builtin_ia32_rdrand32_step(
__p);
346static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
347_rdrand64_step(
unsigned long long *
__p)
350 return (
int)__builtin_ia32_rdrand64_step(
__p);
354 unsigned int __lo, __hi;
355 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
356 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
357 if (__res_lo && __res_hi) {
358 *
__p = ((
unsigned long long)__hi << 32) | (
unsigned long long)__lo;
368#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
369 defined(__FSGSBASE__)
378static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
381 return __builtin_ia32_rdfsbase32();
391static __inline__
unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
394 return __builtin_ia32_rdfsbase64();
404static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
407 return __builtin_ia32_rdgsbase32();
417static __inline__
unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
420 return __builtin_ia32_rdgsbase64();
431static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
432_writefsbase_u32(
unsigned int __V)
434 __builtin_ia32_wrfsbase32(__V);
445static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
446_writefsbase_u64(
unsigned long long __V)
448 __builtin_ia32_wrfsbase64(__V);
459static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
460_writegsbase_u32(
unsigned int __V)
462 __builtin_ia32_wrgsbase32(__V);
473static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
474_writegsbase_u64(
unsigned long long __V)
476 __builtin_ia32_wrgsbase64(__V);
482#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
491static __inline__
short __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
492_loadbe_i16(
void const *
__P) {
496 return (
short)__builtin_bswap16(((
const struct __loadu_i16*)
__P)->
__v);
499static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
501 struct __storeu_i16 {
504 ((
struct __storeu_i16*)
__P)->__v = __builtin_bswap16((
unsigned short)
__D);
507static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
508_loadbe_i32(
void const *
__P) {
512 return (
int)__builtin_bswap32(((
const struct __loadu_i32*)
__P)->
__v);
515static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
516_storebe_i32(
void *
__P,
int __D) {
517 struct __storeu_i32 {
520 ((
struct __storeu_i32*)
__P)->__v = __builtin_bswap32((
unsigned int)
__D);
524static __inline__
long long __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
525_loadbe_i64(
void const *
__P) {
527 unsigned long long __v;
529 return (
long long)__builtin_bswap64(((
const struct __loadu_i64*)
__P)->
__v);
532static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
533_storebe_i64(
void *
__P,
long long __D) {
534 struct __storeu_i64 {
535 unsigned long long __v;
537 ((
struct __storeu_i64*)
__P)->__v = __builtin_bswap64((
unsigned long long)
__D);
542#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
548#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
553#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
561#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
562 defined(__XSAVEOPT__)
566#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
571#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
576#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
585#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
590#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
591 defined(__WBNOINVD__)
595#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
596 defined(__CLDEMOTE__)
600#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
605#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
606 defined(__MOVDIRI__) || defined(__MOVDIR64B__)
610#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
615#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
620#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
625#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
629#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
630 defined(__AMX_FP16__)
634#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
635 defined(__KL__) || defined(__WIDEKL__)
639#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
640 defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__)
644#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
645 defined(__AMX_COMPLEX__)
649#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
650 defined(__AVX512VP2INTERSECT__)
654#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
655 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
659#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
664#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
665 defined(__SERIALIZE__)
669#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
670 defined(__TSXLDTRK__)
674#if defined(_MSC_VER) && __has_extension(gnu_asm)
676#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
683#if defined(__i386__) || defined(__x86_64__)
685_InterlockedExchange_HLEAcquire(
long volatile *_Target,
long _Value) {
686 __asm__ __volatile__(
".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
687 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
691_InterlockedExchange_HLERelease(
long volatile *_Target,
long _Value) {
692 __asm__ __volatile__(
".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
693 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
697#if defined(__x86_64__)
699_InterlockedExchange64_HLEAcquire(__int64
volatile *_Target, __int64 _Value) {
700 __asm__ __volatile__(
".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
701 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
705_InterlockedExchange64_HLERelease(__int64
volatile *_Target, __int64 _Value) {
706 __asm__ __volatile__(
".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
707 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
714#if defined(__i386__) || defined(__x86_64__)
716_InterlockedCompareExchange_HLEAcquire(
long volatile *_Destination,
717 long _Exchange,
long _Comparand) {
718 __asm__ __volatile__(
".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
719 :
"+a" (_Comparand),
"+m" (*_Destination)
720 :
"r" (_Exchange) :
"memory");
724_InterlockedCompareExchange_HLERelease(
long volatile *_Destination,
725 long _Exchange,
long _Comparand) {
726 __asm__ __volatile__(
".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
727 :
"+a" (_Comparand),
"+m" (*_Destination)
728 :
"r" (_Exchange) :
"memory");
732#if defined(__x86_64__)
734_InterlockedCompareExchange64_HLEAcquire(__int64
volatile *_Destination,
735 __int64 _Exchange, __int64 _Comparand) {
736 __asm__ __volatile__(
".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
737 :
"+a" (_Comparand),
"+m" (*_Destination)
738 :
"r" (_Exchange) :
"memory");
742_InterlockedCompareExchange64_HLERelease(__int64
volatile *_Destination,
743 __int64 _Exchange, __int64 _Comparand) {
744 __asm__ __volatile__(
".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
745 :
"+a" (_Comparand),
"+m" (*_Destination)
746 :
"r" (_Exchange) :
"memory");
754#undef __DEFAULT_FN_ATTRS
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
#define __DEFAULT_FN_ATTRS
__INLINE unsigned char unsigned int unsigned int unsigned int * __p
static __inline__ void short __D
struct __storeu_i16 *__P __v
__inline unsigned int unsigned int unsigned int * __P