doxygen/immintrin_8h_source.html

/*===---- immintrin.h - Intel intrinsics -----------------------------------===

 *

 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 * See https://llvm.org/LICENSE.txt for license information.

 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 *

 *===-----------------------------------------------------------------------===

 */


#ifndef __IMMINTRIN_H

#define __IMMINTRIN_H


#if !defined(__i386__) && !defined(__x86_64__)

#error "This header is only meant to be used on x86 and x64 architecture"

#endif


#include <x86gprintrin.h>


#if !defined(__SCE__) || __has_feature(modules) || defined(__MMX__)

#include <mmintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE__)

#include <xmmintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE2__)

#include <emmintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE3__)

#include <pmmintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__SSSE3__)

#include <tmmintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) ||                             \

    (defined(__SSE4_2__) || defined(__SSE4_1__))

#include <smmintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) ||                             \

    (defined(__AES__) || defined(__PCLMUL__))

#include <wmmintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__CLFLUSHOPT__)

#include <clflushoptintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__CLWB__)

#include <clwbintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX__)

#include <avxintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX2__)

#include <avx2intrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__F16C__)

#include <f16cintrin.h>

#endif


/* No feature check desired due to internal checks */

#include <bmiintrin.h>


#if !defined(__SCE__) || __has_feature(modules) || defined(__BMI2__)

#include <bmi2intrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__LZCNT__)

#include <lzcntintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__POPCNT__)

#include <popcntintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__FMA__)

#include <fmaintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512F__)

#include <avx512fintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VL__)

#include <avx512vlintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BW__)

#include <avx512bwintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BITALG__)

#include <avx512bitalgintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512CD__)

#include <avx512cdintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)

#include <avx512vpopcntdqintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) ||                             \

    (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))

#include <avx512vpopcntdqvlintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VNNI__)

#include <avx512vnniintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) ||                             \

    (defined(__AVX512VL__) && defined(__AVX512VNNI__))

#include <avx512vlvnniintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNI__)

#include <avxvnniintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512DQ__)

#include <avx512dqintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) ||                             \

    (defined(__AVX512VL__) && defined(__AVX512BITALG__))

#include <avx512vlbitalgintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) ||                             \

    (defined(__AVX512VL__) && defined(__AVX512BW__))

#include <avx512vlbwintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) ||                             \

    (defined(__AVX512VL__) && defined(__AVX512CD__))

#include <avx512vlcdintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) ||                             \

    (defined(__AVX512VL__) && defined(__AVX512DQ__))

#include <avx512vldqintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512ER__)

#include <avx512erintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__)

#include <avx512ifmaintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) ||                             \

    (defined(__AVX512IFMA__) && defined(__AVX512VL__))

#include <avx512ifmavlintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXIFMA__)

#include <avxifmaintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI__)

#include <avx512vbmiintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) ||                             \

    (defined(__AVX512VBMI__) && defined(__AVX512VL__))

#include <avx512vbmivlintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI2__)

#include <avx512vbmi2intrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) ||                             \

    (defined(__AVX512VBMI2__) && defined(__AVX512VL__))

#include <avx512vlvbmi2intrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512PF__)

#include <avx512pfintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__)

#include <avx512fp16intrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) ||                             \

    (defined(__AVX512VL__) && defined(__AVX512FP16__))

#include <avx512vlfp16intrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BF16__)

#include <avx512bf16intrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) ||                             \

    (defined(__AVX512VL__) && defined(__AVX512BF16__))

#include <avx512vlbf16intrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__)

#include <pkuintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__VPCLMULQDQ__)

#include <vpclmulqdqintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__VAES__)

#include <vaesintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__GFNI__)

#include <gfniintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT8__)

#include <avxvnniint8intrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXNECONVERT__)

#include <avxneconvertintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA512__)

#include <sha512intrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__SM3__)

#include <sm3intrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__SM4__)

#include <sm4intrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT16__)

#include <avxvnniint16intrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__RDPID__)

/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).

///

/// \headerfile <immintrin.h>

///

/// This intrinsic corresponds to the <c> RDPID </c> instruction.

///

/// \returns The 32-bit contents of the MSR.

static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))

_rdpid_u32(void) {

  return __builtin_ia32_rdpid();

}

#endif // __RDPID__


#if !defined(__SCE__) || __has_feature(modules) || defined(__RDRND__)

/// Returns a 16-bit hardware-generated random value.

///

/// \headerfile <immintrin.h>

///

/// This intrinsic corresponds to the <c> RDRAND </c> instruction.

///

/// \param __p

///    A pointer to a 16-bit memory location to place the random value.

/// \returns 1 if the value was successfully generated, 0 otherwise.

static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))

_rdrand16_step(unsigned short *__p)

{

  return (int)__builtin_ia32_rdrand16_step(__p);

}


/// Returns a 32-bit hardware-generated random value.

///

/// \headerfile <immintrin.h>

///

/// This intrinsic corresponds to the <c> RDRAND </c> instruction.

///

/// \param __p

///    A pointer to a 32-bit memory location to place the random value.

/// \returns 1 if the value was successfully generated, 0 otherwise.

static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))

_rdrand32_step(unsigned int *__p)

{

  return (int)__builtin_ia32_rdrand32_step(__p);

}


/// Returns a 64-bit hardware-generated random value.

///

/// \headerfile <immintrin.h>

///

/// This intrinsic corresponds to the <c> RDRAND </c> instruction.

///

/// \param __p

///    A pointer to a 64-bit memory location to place the random value.

/// \returns 1 if the value was successfully generated, 0 otherwise.

static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))

_rdrand64_step(unsigned long long *__p)

{

#ifdef __x86_64__

  return (int)__builtin_ia32_rdrand64_step(__p);

#else

  // We need to emulate the functionality of 64-bit rdrand with 2 32-bit

  // rdrand instructions.

  unsigned int __lo, __hi;

  unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);

  unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);

  if (__res_lo && __res_hi) {

    *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo;

    return 1;

  } else {

    *__p = 0;

    return 0;

  }

#endif

}

#endif /* __RDRND__ */


#if !defined(__SCE__) || __has_feature(modules) || defined(__FSGSBASE__)

#ifdef __x86_64__

/// Reads the FS base register.

///

/// \headerfile <immintrin.h>

///

/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.

///

/// \returns The lower 32 bits of the FS base register.

static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))

_readfsbase_u32(void)

{

  return __builtin_ia32_rdfsbase32();

}


/// Reads the FS base register.

///

/// \headerfile <immintrin.h>

///

/// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.

///

/// \returns The contents of the FS base register.

static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))

_readfsbase_u64(void)

{

  return __builtin_ia32_rdfsbase64();

}


/// Reads the GS base register.

///

/// \headerfile <immintrin.h>

///

/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.

///

/// \returns The lower 32 bits of the GS base register.

static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))

_readgsbase_u32(void)

{

  return __builtin_ia32_rdgsbase32();

}


/// Reads the GS base register.

///

/// \headerfile <immintrin.h>

///

/// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.

///

/// \returns The contents of the GS base register.

static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))

_readgsbase_u64(void)

{

  return __builtin_ia32_rdgsbase64();

}


/// Modifies the FS base register.

///

/// \headerfile <immintrin.h>

///

/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.

///

/// \param __V

///    Value to use for the lower 32 bits of the FS base register.

static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))

_writefsbase_u32(unsigned int __V)

{

  __builtin_ia32_wrfsbase32(__V);

}


/// Modifies the FS base register.

///

/// \headerfile <immintrin.h>

///

/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.

///

/// \param __V

///    Value to use for the FS base register.

static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))

_writefsbase_u64(unsigned long long __V)

{

  __builtin_ia32_wrfsbase64(__V);

}


/// Modifies the GS base register.

///

/// \headerfile <immintrin.h>

///

/// This intrinsic corresponds to the <c> WRGSBASE </c> instruction.

///

/// \param __V

///    Value to use for the lower 32 bits of the GS base register.

static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))

_writegsbase_u32(unsigned int __V)

{

  __builtin_ia32_wrgsbase32(__V);

}


/// Modifies the GS base register.

///

/// \headerfile <immintrin.h>

///

/// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.

///

/// \param __V

///    Value to use for GS base register.

static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))

_writegsbase_u64(unsigned long long __V)

{

  __builtin_ia32_wrgsbase64(__V);

}


#endif

#endif /* __FSGSBASE__ */


#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVBE__)


/* The structs used below are to force the load/store to be unaligned. This

 * is accomplished with the __packed__ attribute. The __may_alias__ prevents

 * tbaa metadata from being generated based on the struct and the type of the

 * field inside of it.

 */


/// Load a 16-bit value from memory and swap its bytes.

///

/// \headerfile <x86intrin.h>

///

/// This intrinsic corresponds to the MOVBE instruction.

///

/// \param __P

///    A pointer to the 16-bit value to load.

/// \returns The byte-swapped value.

static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))

_loadbe_i16(void const * __P) {

  struct __loadu_i16 {

    unsigned short __v;

  } __attribute__((__packed__, __may_alias__));

  return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);

}


/// Swap the bytes of a 16-bit value and store it to memory.

///

/// \headerfile <x86intrin.h>

///

/// This intrinsic corresponds to the MOVBE instruction.

///

/// \param __P

///    A pointer to the memory for storing the swapped value.

/// \param __D

///    The 16-bit value to be byte-swapped.

static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))

_storebe_i16(void * __P, short __D) {

  struct __storeu_i16 {

    unsigned short __v;

  } __attribute__((__packed__, __may_alias__));

  ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);

}


/// Load a 32-bit value from memory and swap its bytes.

///

/// \headerfile <x86intrin.h>

///

/// This intrinsic corresponds to the MOVBE instruction.

///

/// \param __P

///    A pointer to the 32-bit value to load.

/// \returns The byte-swapped value.

static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))

_loadbe_i32(void const * __P) {

  struct __loadu_i32 {

    unsigned int __v;

  } __attribute__((__packed__, __may_alias__));

  return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);

}


/// Swap the bytes of a 32-bit value and store it to memory.

///

/// \headerfile <x86intrin.h>

///

/// This intrinsic corresponds to the MOVBE instruction.

///

/// \param __P

///    A pointer to the memory for storing the swapped value.

/// \param __D

///    The 32-bit value to be byte-swapped.

static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))

_storebe_i32(void * __P, int __D) {

  struct __storeu_i32 {

    unsigned int __v;

  } __attribute__((__packed__, __may_alias__));

  ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D);

}


#ifdef __x86_64__

/// Load a 64-bit value from memory and swap its bytes.

///

/// \headerfile <x86intrin.h>

///

/// This intrinsic corresponds to the MOVBE instruction.

///

/// \param __P

///    A pointer to the 64-bit value to load.

/// \returns The byte-swapped value.

static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))

_loadbe_i64(void const * __P) {

  struct __loadu_i64 {

    unsigned long long __v;

  } __attribute__((__packed__, __may_alias__));

  return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);

}


/// Swap the bytes of a 64-bit value and store it to memory.

///

/// \headerfile <x86intrin.h>

///

/// This intrinsic corresponds to the MOVBE instruction.

///

/// \param __P

///    A pointer to the memory for storing the swapped value.

/// \param __D

///    The 64-bit value to be byte-swapped.

static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))

_storebe_i64(void * __P, long long __D) {

  struct __storeu_i64 {

    unsigned long long __v;

  } __attribute__((__packed__, __may_alias__));

  ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D);

}

#endif

#endif /* __MOVBE */


#if !defined(__SCE__) || __has_feature(modules) || defined(__RTM__)

#include <rtmintrin.h>

#include <xtestintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA__)

#include <shaintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__FXSR__)

#include <fxsrintrin.h>

#endif


/* No feature check desired due to internal MSC_VER checks */

#include <xsaveintrin.h>


#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEOPT__)

#include <xsaveoptintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEC__)

#include <xsavecintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVES__)

#include <xsavesintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__SHSTK__)

#include <cetintrin.h>

#endif


/* Intrinsics inside adcintrin.h are available at all times. */

#include <adcintrin.h>


#if !defined(__SCE__) || __has_feature(modules) || defined(__ADX__)

#include <adxintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__RDSEED__)

#include <rdseedintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__WBNOINVD__)

#include <wbnoinvdintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__CLDEMOTE__)

#include <cldemoteintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__WAITPKG__)

#include <waitpkgintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVDIRI__) ||     \

    defined(__MOVDIR64B__)

#include <movdirintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__)

#include <pconfigintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__SGX__)

#include <sgxintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__PTWRITE__)

#include <ptwriteintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__INVPCID__)

#include <invpcidintrin.h>

#endif

#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__)

#include <amxfp16intrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) ||          \

    defined(__WIDEKL__)

#include <keylockerintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TILE__) ||    \

    defined(__AMX_INT8__) || defined(__AMX_BF16__)

#include <amxintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_COMPLEX__)

#include <amxcomplexintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) ||                             \

    defined(__AVX512VP2INTERSECT__)

#include <avx512vp2intersectintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) ||                             \

    (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))

#include <avx512vlvp2intersectintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)

#include <enqcmdintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__SERIALIZE__)

#include <serializeintrin.h>

#endif


#if !defined(__SCE__) || __has_feature(modules) || defined(__TSXLDTRK__)

#include <tsxldtrkintrin.h>

#endif


#if defined(_MSC_VER) && __has_extension(gnu_asm)

/* Define the default attributes for these intrinsics */

#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))

#ifdef __cplusplus

extern "C" {

#endif

/*----------------------------------------------------------------------------*\

|* Interlocked Exchange HLE

\*----------------------------------------------------------------------------*/

#if defined(__i386__) || defined(__x86_64__)

static __inline__ long __DEFAULT_FN_ATTRS

_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {

  __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"

                       : "+r" (_Value), "+m" (*_Target) :: "memory");

  return _Value;

}

static __inline__ long __DEFAULT_FN_ATTRS

_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {

  __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"

                       : "+r" (_Value), "+m" (*_Target) :: "memory");

  return _Value;

}

#endif

#if defined(__x86_64__)

static __inline__ __int64 __DEFAULT_FN_ATTRS

_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {

  __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"

                       : "+r" (_Value), "+m" (*_Target) :: "memory");

  return _Value;

}

static __inline__ __int64 __DEFAULT_FN_ATTRS

_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {

  __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"

                       : "+r" (_Value), "+m" (*_Target) :: "memory");

  return _Value;

}

#endif

/*----------------------------------------------------------------------------*\

|* Interlocked Compare Exchange HLE

\*----------------------------------------------------------------------------*/

#if defined(__i386__) || defined(__x86_64__)

static __inline__ long __DEFAULT_FN_ATTRS

_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,

                              long _Exchange, long _Comparand) {

  __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"

                       : "+a" (_Comparand), "+m" (*_Destination)

                       : "r" (_Exchange) : "memory");

  return _Comparand;

}

static __inline__ long __DEFAULT_FN_ATTRS

_InterlockedCompareExchange_HLERelease(long volatile *_Destination,

                              long _Exchange, long _Comparand) {

  __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"

                       : "+a" (_Comparand), "+m" (*_Destination)

                       : "r" (_Exchange) : "memory");

  return _Comparand;

}

#endif

#if defined(__x86_64__)

static __inline__ __int64 __DEFAULT_FN_ATTRS

_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,

                              __int64 _Exchange, __int64 _Comparand) {

  __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"

                       : "+a" (_Comparand), "+m" (*_Destination)

                       : "r" (_Exchange) : "memory");

  return _Comparand;

}

static __inline__ __int64 __DEFAULT_FN_ATTRS

_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,

                              __int64 _Exchange, __int64 _Comparand) {

  __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"

                       : "+a" (_Comparand), "+m" (*_Destination)

                       : "r" (_Exchange) : "memory");

  return _Comparand;

}

#endif

#ifdef __cplusplus

}

#endif


#undef __DEFAULT_FN_ATTRS


#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */


#endif /* __IMMINTRIN_H */

__attribute__
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
Definition: __clang_hip_libdevice_declares.h:293

__DEFAULT_FN_ATTRS
#define __DEFAULT_FN_ATTRS
Definition: __wmmintrin_aes.h:18

adcintrin.h

adxintrin.h

amxcomplexintrin.h

amxfp16intrin.h

amxintrin.h

__p
static __inline__ uint32_t volatile uint32_t * __p
Definition: arm_acle.h:80

avx2intrin.h

avx512bf16intrin.h

avx512bitalgintrin.h

avx512bwintrin.h

avx512cdintrin.h

avx512dqintrin.h

avx512erintrin.h

avx512fintrin.h

avx512fp16intrin.h

avx512ifmaintrin.h

avx512ifmavlintrin.h

avx512pfintrin.h

avx512vbmi2intrin.h

avx512vbmiintrin.h

avx512vbmivlintrin.h

avx512vlbf16intrin.h

avx512vlbitalgintrin.h

avx512vlbwintrin.h

avx512vlcdintrin.h

avx512vldqintrin.h

avx512vlfp16intrin.h

avx512vlintrin.h

avx512vlvbmi2intrin.h

avx512vlvnniintrin.h

avx512vlvp2intersectintrin.h

avx512vnniintrin.h

avx512vp2intersectintrin.h

avx512vpopcntdqintrin.h

avx512vpopcntdqvlintrin.h

avxifmaintrin.h

avxintrin.h

avxneconvertintrin.h

avxvnniint16intrin.h

avxvnniint8intrin.h

avxvnniintrin.h

cetintrin.h

cldemoteintrin.h

clflushoptintrin.h

clwbintrin.h

enqcmdintrin.h

f16cintrin.h

fmaintrin.h

fxsrintrin.h

gfniintrin.h

__D
static __inline__ void short __D
Definition: immintrin.h:476

__v
struct __storeu_i16 *__P __v
Definition: immintrin.h:480

invpcidintrin.h

keylockerintrin.h

lzcntintrin.h

movdirintrin.h

pconfigintrin.h

pkuintrin.h

popcntintrin.h

bmi2intrin.h

__P
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25

bmiintrin.h

emmintrin.h

mmintrin.h

pmmintrin.h

ptwriteintrin.h

rdseedintrin.h

rtmintrin.h

serializeintrin.h

sgxintrin.h

sha512intrin.h

shaintrin.h

sm3intrin.h

sm4intrin.h

smmintrin.h

tmmintrin.h

tsxldtrkintrin.h

vaesintrin.h

vpclmulqdqintrin.h

waitpkgintrin.h

wbnoinvdintrin.h

wmmintrin.h

x86gprintrin.h

xmmintrin.h

xsavecintrin.h

xsaveintrin.h

xsaveoptintrin.h

xsavesintrin.h

xtestintrin.h