13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
19#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
24#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
29#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
34#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
39#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
44#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
45 (defined(__SSE4_2__) || defined(__SSE4_1__))
49#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
50 (defined(__AES__) || defined(__PCLMUL__))
54#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
55 defined(__CLFLUSHOPT__)
59#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
64#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
69#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
74#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
82#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
87#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
92#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
97#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
102#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
107#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
108 defined(__AVX512VL__)
112#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
113 defined(__AVX512BW__)
117#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
118 defined(__AVX512BITALG__)
122#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
123 defined(__AVX512CD__)
127#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
128 defined(__AVX512VPOPCNTDQ__)
132#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
133 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
137#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
138 defined(__AVX512VNNI__)
142#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
143 (defined(__AVX512VL__) && defined(__AVX512VNNI__))
147#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
152#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
153 defined(__AVX512DQ__)
157#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
158 (defined(__AVX512VL__) && defined(__AVX512BITALG__))
162#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
163 (defined(__AVX512VL__) && defined(__AVX512BW__))
167#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
168 (defined(__AVX512VL__) && defined(__AVX512CD__))
172#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
173 (defined(__AVX512VL__) && defined(__AVX512DQ__))
177#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
178 defined(__AVX512ER__)
182#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
183 defined(__AVX512IFMA__)
187#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
188 (defined(__AVX512IFMA__) && defined(__AVX512VL__))
192#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
197#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
198 defined(__AVX512VBMI__)
202#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
203 (defined(__AVX512VBMI__) && defined(__AVX512VL__))
207#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
208 defined(__AVX512VBMI2__)
212#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
213 (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
217#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
218 defined(__AVX512PF__)
222#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
223 defined(__AVX512FP16__)
227#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
228 (defined(__AVX512VL__) && defined(__AVX512FP16__))
232#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
233 defined(__AVX512BF16__)
237#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
238 (defined(__AVX512VL__) && defined(__AVX512BF16__))
242#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
247#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
248 defined(__VPCLMULQDQ__)
252#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
257#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
262#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
263 defined(__AVXVNNIINT8__)
267#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
268 defined(__AVXNECONVERT__)
272#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
279static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"rdpid")))
281 return __builtin_ia32_rdpid();
285#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
287static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
288_rdrand16_step(
unsigned short *
__p)
290 return (
int)__builtin_ia32_rdrand16_step(
__p);
293static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
294_rdrand32_step(
unsigned int *
__p)
296 return (
int)__builtin_ia32_rdrand32_step(
__p);
300static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
301_rdrand64_step(
unsigned long long *
__p)
303 return (
int)__builtin_ia32_rdrand64_step(
__p);
308static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
309_rdrand64_step(
unsigned long long *
__p)
311 unsigned int __lo, __hi;
312 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
313 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
314 if (__res_lo && __res_hi) {
315 *
__p = ((
unsigned long long)__hi << 32) | (
unsigned long long)__lo;
325#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
326 defined(__FSGSBASE__)
328static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
331 return __builtin_ia32_rdfsbase32();
334static __inline__
unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
337 return __builtin_ia32_rdfsbase64();
340static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
343 return __builtin_ia32_rdgsbase32();
346static __inline__
unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
349 return __builtin_ia32_rdgsbase64();
352static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
353_writefsbase_u32(
unsigned int __V)
355 __builtin_ia32_wrfsbase32(__V);
358static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
359_writefsbase_u64(
unsigned long long __V)
361 __builtin_ia32_wrfsbase64(__V);
364static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
365_writegsbase_u32(
unsigned int __V)
367 __builtin_ia32_wrgsbase32(__V);
370static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
371_writegsbase_u64(
unsigned long long __V)
373 __builtin_ia32_wrgsbase64(__V);
379#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
388static __inline__
short __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
389_loadbe_i16(
void const *
__P) {
393 return (
short)__builtin_bswap16(((
const struct __loadu_i16*)
__P)->
__v);
396static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
398 struct __storeu_i16 {
401 ((
struct __storeu_i16*)
__P)->__v = __builtin_bswap16((
unsigned short)
__D);
404static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
405_loadbe_i32(
void const *
__P) {
409 return (
int)__builtin_bswap32(((
const struct __loadu_i32*)
__P)->
__v);
412static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
413_storebe_i32(
void *
__P,
int __D) {
414 struct __storeu_i32 {
417 ((
struct __storeu_i32*)
__P)->__v = __builtin_bswap32((
unsigned int)
__D);
421static __inline__
long long __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
422_loadbe_i64(
void const *
__P) {
424 unsigned long long __v;
426 return (
long long)__builtin_bswap64(((
const struct __loadu_i64*)
__P)->
__v);
429static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"movbe")))
430_storebe_i64(
void *
__P,
long long __D) {
431 struct __storeu_i64 {
432 unsigned long long __v;
434 ((
struct __storeu_i64*)
__P)->__v = __builtin_bswap64((
unsigned long long)
__D);
439#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
445#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
450#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
458#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
459 defined(__XSAVEOPT__)
463#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
468#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
473#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
482#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
487#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
488 defined(__WBNOINVD__)
492#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
493 defined(__CLDEMOTE__)
497#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
502#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
503 defined(__MOVDIRI__) || defined(__MOVDIR64B__)
507#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
512#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
517#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
522#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
526#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
527 defined(__AMX_FP16__)
531#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
532 defined(__KL__) || defined(__WIDEKL__)
536#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
537 defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__)
541#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
542 defined(__AVX512VP2INTERSECT__)
546#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
547 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
551#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
556#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
557 defined(__SERIALIZE__)
561#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
562 defined(__TSXLDTRK__)
566#if defined(_MSC_VER) && __has_extension(gnu_asm)
568#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
575#if defined(__i386__) || defined(__x86_64__)
577_InterlockedExchange_HLEAcquire(
long volatile *_Target,
long _Value) {
578 __asm__ __volatile__(
".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
579 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
583_InterlockedExchange_HLERelease(
long volatile *_Target,
long _Value) {
584 __asm__ __volatile__(
".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
585 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
589#if defined(__x86_64__)
591_InterlockedExchange64_HLEAcquire(__int64
volatile *_Target, __int64 _Value) {
592 __asm__ __volatile__(
".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
593 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
597_InterlockedExchange64_HLERelease(__int64
volatile *_Target, __int64 _Value) {
598 __asm__ __volatile__(
".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
599 :
"+r" (_Value),
"+m" (*_Target) ::
"memory");
606#if defined(__i386__) || defined(__x86_64__)
608_InterlockedCompareExchange_HLEAcquire(
long volatile *_Destination,
609 long _Exchange,
long _Comparand) {
610 __asm__ __volatile__(
".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
611 :
"+a" (_Comparand),
"+m" (*_Destination)
612 :
"r" (_Exchange) :
"memory");
616_InterlockedCompareExchange_HLERelease(
long volatile *_Destination,
617 long _Exchange,
long _Comparand) {
618 __asm__ __volatile__(
".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
619 :
"+a" (_Comparand),
"+m" (*_Destination)
620 :
"r" (_Exchange) :
"memory");
624#if defined(__x86_64__)
626_InterlockedCompareExchange64_HLEAcquire(__int64
volatile *_Destination,
627 __int64 _Exchange, __int64 _Comparand) {
628 __asm__ __volatile__(
".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
629 :
"+a" (_Comparand),
"+m" (*_Destination)
630 :
"r" (_Exchange) :
"memory");
634_InterlockedCompareExchange64_HLERelease(__int64
volatile *_Destination,
635 __int64 _Exchange, __int64 _Comparand) {
636 __asm__ __volatile__(
".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
637 :
"+a" (_Comparand),
"+m" (*_Destination)
638 :
"r" (_Exchange) :
"memory");
646#undef __DEFAULT_FN_ATTRS
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
#define __DEFAULT_FN_ATTRS
static __inline unsigned char unsigned int unsigned int unsigned int * __p
static __inline__ void short __D
struct __storeu_i16 *__P __v
__inline unsigned int unsigned int unsigned int * __P