21#error "ACLE intrinsics support not enabled." 
   26#if defined(__cplusplus) 
   43#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE 
   44#define __dbg(t) __builtin_arm_dbg(t) 
   47#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE 
   49static __inline__ uint64_t 
__attribute__((__always_inline__, __nodebug__))
 
   50__chkfeat(uint64_t __features) {
 
   51  return __builtin_arm_chkfeat(__features) ^ __features;
 
   56static __inline__ uint32_t 
__attribute__((__always_inline__, __nodebug__))
 
   57__swp(uint32_t __x, volatile uint32_t *
__p) {
 
   59#if (__ARM_FEATURE_LDREX & 4) || __ARM_ARCH_6M__ || __linux__ 
   78  __v = __atomic_exchange_n(
__p, __x, __ATOMIC_RELAXED);
 
   86  __asm__(
"swp %0, %1, [%2]" : 
"=r"(
__v) : 
"r"(__x), 
"r"(
__p) : 
"memory");
 
   93#define __pld(addr) __pldx(0, 0, 0, addr) 
   95#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE 
   96#define __pldx(access_kind, cache_level, retention_policy, addr) \ 
   97  __builtin_arm_prefetch(addr, access_kind, 1) 
   99#define __pldx(access_kind, cache_level, retention_policy, addr) \ 
  100  __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1) 
 
  104#define __pli(addr) __plix(0, 0, addr) 
  106#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE 
  107#define __plix(cache_level, retention_policy, addr) \ 
  108  __builtin_arm_prefetch(addr, 0, 0) 
  110#define __plix(cache_level, retention_policy, addr) \ 
  111  __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0) 
 
  115#if !defined(_MSC_VER) || (!defined(__aarch64__) && !defined(__arm64ec__)) 
  116static __inline__ 
void __attribute__((__always_inline__, __nodebug__)) __nop(
void) {
 
 
  124static __inline__ uint32_t 
__attribute__((__always_inline__, __nodebug__))
 
  125__ror(uint32_t __x, uint32_t 
__y) {
 
  129  return (__x >> 
__y) | (__x << (32 - 
__y));
 
  132static __inline__ uint64_t 
__attribute__((__always_inline__, __nodebug__))
 
  133__rorll(uint64_t __x, uint32_t 
__y) {
 
  137  return (__x >> 
__y) | (__x << (64 - 
__y));
 
  140static __inline__ 
unsigned long __attribute__((__always_inline__, __nodebug__))
 
  141__rorl(
unsigned long __x, uint32_t 
__y) {
 
  142#if __SIZEOF_LONG__ == 4 
  143  return __ror(__x, 
__y);
 
  145  return __rorll(__x, 
__y);
 
  151static __inline__ 
unsigned int __attribute__((__always_inline__, __nodebug__))
 
  153  return __builtin_arm_clz(__t);
 
  156static __inline__ 
unsigned int __attribute__((__always_inline__, __nodebug__))
 
  157__clzl(
unsigned long __t) {
 
  158#if __SIZEOF_LONG__ == 4 
  159  return __builtin_arm_clz(__t);
 
  161  return __builtin_arm_clz64(__t);
 
  165static __inline__ 
unsigned int __attribute__((__always_inline__, __nodebug__))
 
  167  return __builtin_arm_clz64(__t);
 
  171static __inline__ 
unsigned int __attribute__((__always_inline__, __nodebug__))
 
  173  return __builtin_arm_cls(__t);
 
  176static __inline__ 
unsigned int __attribute__((__always_inline__, __nodebug__))
 
  177__clsl(
unsigned long __t) {
 
  178#if __SIZEOF_LONG__ == 4 
  179  return __builtin_arm_cls(__t);
 
  181  return __builtin_arm_cls64(__t);
 
  185static __inline__ 
unsigned int __attribute__((__always_inline__, __nodebug__))
 
  186__clsll(uint64_t __t) {
 
  187  return __builtin_arm_cls64(__t);
 
  191static __inline__ uint32_t 
__attribute__((__always_inline__, __nodebug__))
 
  193  return __builtin_bswap32(__t);
 
  196static __inline__ 
unsigned long __attribute__((__always_inline__, __nodebug__))
 
  197__revl(
unsigned long __t) {
 
  198#if __SIZEOF_LONG__ == 4 
  199  return __builtin_bswap32(__t);
 
  201  return __builtin_bswap64(__t);
 
  205static __inline__ uint64_t 
__attribute__((__always_inline__, __nodebug__))
 
  206__revll(uint64_t __t) {
 
  207  return __builtin_bswap64(__t);
 
  211static __inline__ uint32_t 
__attribute__((__always_inline__, __nodebug__))
 
  212__rev16(uint32_t __t) {
 
  213  return __ror(__rev(__t), 16);
 
  216static __inline__ uint64_t 
__attribute__((__always_inline__, __nodebug__))
 
  217__rev16ll(uint64_t __t) {
 
  218  return (((uint64_t)__rev16(__t >> 32)) << 32) | (uint64_t)__rev16((uint32_t)__t);
 
  221static __inline__ 
unsigned long __attribute__((__always_inline__, __nodebug__))
 
  222__rev16l(
unsigned long __t) {
 
  223#if __SIZEOF_LONG__ == 4 
  226    return __rev16ll(__t);
 
  231static __inline__ int16_t 
__attribute__((__always_inline__, __nodebug__))
 
  232__revsh(int16_t __t) {
 
  233  return (int16_t)__builtin_bswap16((uint16_t)__t);
 
  237static __inline__ uint32_t 
__attribute__((__always_inline__, __nodebug__))
 
  238__rbit(uint32_t __t) {
 
  239  return __builtin_arm_rbit(__t);
 
  242static __inline__ uint64_t 
__attribute__((__always_inline__, __nodebug__))
 
  243__rbitll(uint64_t __t) {
 
  244#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE 
  245  return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |
 
  246         __builtin_arm_rbit(__t >> 32);
 
  248  return __builtin_arm_rbit64(__t);
 
  252static __inline__ 
unsigned long __attribute__((__always_inline__, __nodebug__))
 
  253__rbitl(
unsigned long __t) {
 
  254#if __SIZEOF_LONG__ == 4 
  257  return __rbitll(__t);
 
  262#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE 
  263static __inline__ int32_t 
__attribute__((__always_inline__,__nodebug__, target(
"dsp")))
 
  264__smulbb(int32_t 
__a, int32_t 
__b) {
 
  265  return __builtin_arm_smulbb(
__a, 
__b);
 
  267static __inline__ int32_t 
__attribute__((__always_inline__,__nodebug__, target(
"dsp")))
 
  268__smulbt(int32_t 
__a, int32_t 
__b) {
 
  269  return __builtin_arm_smulbt(
__a, 
__b);
 
  271static __inline__ int32_t 
__attribute__((__always_inline__,__nodebug__, target(
"dsp")))
 
  272__smultb(int32_t 
__a, int32_t 
__b) {
 
  273  return __builtin_arm_smultb(
__a, 
__b);
 
  275static __inline__ int32_t 
__attribute__((__always_inline__,__nodebug__, target(
"dsp")))
 
  276__smultt(int32_t 
__a, int32_t 
__b) {
 
  277  return __builtin_arm_smultt(
__a, 
__b);
 
  279static __inline__ int32_t 
__attribute__((__always_inline__,__nodebug__, target(
"dsp")))
 
  280__smulwb(int32_t 
__a, int32_t 
__b) {
 
  281  return __builtin_arm_smulwb(
__a, 
__b);
 
  283static __inline__ int32_t 
__attribute__((__always_inline__,__nodebug__, target(
"dsp")))
 
  284__smulwt(int32_t 
__a, int32_t 
__b) {
 
  285  return __builtin_arm_smulwt(
__a, 
__b);
 
  296#if defined(__ARM_FEATURE_SAT) && __ARM_FEATURE_SAT 
  297#define __ssat(x, y) __builtin_arm_ssat(x, y) 
  298#define __usat(x, y) __builtin_arm_usat(x, y) 
  302#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE 
  303static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__, target(
"dsp")))
 
  304__qadd(int32_t __t, int32_t 
__v) {
 
  305  return __builtin_arm_qadd(__t, 
__v);
 
  308static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__, target(
"dsp")))
 
  309__qsub(int32_t __t, int32_t 
__v) {
 
  310  return __builtin_arm_qsub(__t, 
__v);
 
  313static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__, target(
"dsp")))
 
  315  return __builtin_arm_qadd(__t, __t);
 
  320#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE 
  321static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__, target(
"dsp")))
 
  322__smlabb(int32_t 
__a, int32_t 
__b, int32_t 
__c) {
 
  323  return __builtin_arm_smlabb(
__a, 
__b, 
__c);
 
  325static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__, target(
"dsp")))
 
  326__smlabt(int32_t 
__a, int32_t 
__b, int32_t 
__c) {
 
  327  return __builtin_arm_smlabt(
__a, 
__b, 
__c);
 
  329static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__, target(
"dsp")))
 
  330__smlatb(int32_t 
__a, int32_t 
__b, int32_t 
__c) {
 
  331  return __builtin_arm_smlatb(
__a, 
__b, 
__c);
 
  333static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__, target(
"dsp")))
 
  334__smlatt(int32_t 
__a, int32_t 
__b, int32_t 
__c) {
 
  335  return __builtin_arm_smlatt(
__a, 
__b, 
__c);
 
  337static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__, target(
"dsp")))
 
  338__smlawb(int32_t 
__a, int32_t 
__b, int32_t 
__c) {
 
  339  return __builtin_arm_smlawb(
__a, 
__b, 
__c);
 
  341static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__, target(
"dsp")))
 
  342__smlawt(int32_t 
__a, int32_t 
__b, int32_t 
__c) {
 
  343  return __builtin_arm_smlawt(
__a, 
__b, 
__c);
 
  349#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 
  350#define __ssat16(x, y) __builtin_arm_ssat16(x, y) 
  351#define __usat16(x, y) __builtin_arm_usat16(x, y) 
  355#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 
  356typedef int32_t int8x4_t;
 
  357typedef int32_t int16x2_t;
 
  358typedef uint32_t uint8x4_t;
 
  359typedef uint32_t uint16x2_t;
 
  361static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  362__sxtab16(int16x2_t 
__a, int8x4_t 
__b) {
 
  363  return __builtin_arm_sxtab16(
__a, 
__b);
 
  365static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  366__sxtb16(int8x4_t 
__a) {
 
  367  return __builtin_arm_sxtb16(
__a);
 
  369static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  370__uxtab16(int16x2_t 
__a, int8x4_t 
__b) {
 
  371  return __builtin_arm_uxtab16(
__a, 
__b);
 
  373static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  374__uxtb16(int8x4_t 
__a) {
 
  375  return __builtin_arm_uxtb16(
__a);
 
  380#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 
  381static __inline__ uint8x4_t 
__attribute__((__always_inline__, __nodebug__))
 
  382__sel(uint8x4_t 
__a, uint8x4_t 
__b) {
 
  383  return __builtin_arm_sel(
__a, 
__b);
 
  388#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 
  389static __inline__ int8x4_t 
__attribute__((__always_inline__, __nodebug__))
 
  390__qadd8(int8x4_t 
__a, int8x4_t 
__b) {
 
  391  return __builtin_arm_qadd8(
__a, 
__b);
 
  393static __inline__ int8x4_t 
__attribute__((__always_inline__, __nodebug__))
 
  394__qsub8(int8x4_t 
__a, int8x4_t 
__b) {
 
  395  return __builtin_arm_qsub8(
__a, 
__b);
 
  397static __inline__ int8x4_t 
__attribute__((__always_inline__, __nodebug__))
 
  398__sadd8(int8x4_t 
__a, int8x4_t 
__b) {
 
  399  return __builtin_arm_sadd8(
__a, 
__b);
 
  401static __inline__ int8x4_t 
__attribute__((__always_inline__, __nodebug__))
 
  402__shadd8(int8x4_t 
__a, int8x4_t 
__b) {
 
  403  return __builtin_arm_shadd8(
__a, 
__b);
 
  405static __inline__ int8x4_t 
__attribute__((__always_inline__, __nodebug__))
 
  406__shsub8(int8x4_t 
__a, int8x4_t 
__b) {
 
  407  return __builtin_arm_shsub8(
__a, 
__b);
 
  409static __inline__ int8x4_t 
__attribute__((__always_inline__, __nodebug__))
 
  410__ssub8(int8x4_t 
__a, int8x4_t 
__b) {
 
  411  return __builtin_arm_ssub8(
__a, 
__b);
 
  413static __inline__ uint8x4_t 
__attribute__((__always_inline__, __nodebug__))
 
  414__uadd8(uint8x4_t 
__a, uint8x4_t 
__b) {
 
  415  return __builtin_arm_uadd8(
__a, 
__b);
 
  417static __inline__ uint8x4_t 
__attribute__((__always_inline__, __nodebug__))
 
  418__uhadd8(uint8x4_t 
__a, uint8x4_t 
__b) {
 
  419  return __builtin_arm_uhadd8(
__a, 
__b);
 
  421static __inline__ uint8x4_t 
__attribute__((__always_inline__, __nodebug__))
 
  422__uhsub8(uint8x4_t 
__a, uint8x4_t 
__b) {
 
  423  return __builtin_arm_uhsub8(
__a, 
__b);
 
  425static __inline__ uint8x4_t 
__attribute__((__always_inline__, __nodebug__))
 
  426__uqadd8(uint8x4_t 
__a, uint8x4_t 
__b) {
 
  427  return __builtin_arm_uqadd8(
__a, 
__b);
 
  429static __inline__ uint8x4_t 
__attribute__((__always_inline__, __nodebug__))
 
  430__uqsub8(uint8x4_t 
__a, uint8x4_t 
__b) {
 
  431  return __builtin_arm_uqsub8(
__a, 
__b);
 
  433static __inline__ uint8x4_t 
__attribute__((__always_inline__, __nodebug__))
 
  434__usub8(uint8x4_t 
__a, uint8x4_t 
__b) {
 
  435  return __builtin_arm_usub8(
__a, 
__b);
 
  440#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 
  441static __inline__ uint32_t 
__attribute__((__always_inline__, __nodebug__))
 
  442__usad8(uint8x4_t 
__a, uint8x4_t 
__b) {
 
  443  return __builtin_arm_usad8(
__a, 
__b);
 
  445static __inline__ uint32_t 
__attribute__((__always_inline__, __nodebug__))
 
  446__usada8(uint8x4_t 
__a, uint8x4_t 
__b, uint32_t 
__c) {
 
  447  return __builtin_arm_usada8(
__a, 
__b, 
__c);
 
  452#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 
  453static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  454__qadd16(int16x2_t 
__a, int16x2_t 
__b) {
 
  455  return __builtin_arm_qadd16(
__a, 
__b);
 
  457static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  458__qasx(int16x2_t 
__a, int16x2_t 
__b) {
 
  459  return __builtin_arm_qasx(
__a, 
__b);
 
  461static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  462__qsax(int16x2_t 
__a, int16x2_t 
__b) {
 
  463  return __builtin_arm_qsax(
__a, 
__b);
 
  465static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  466__qsub16(int16x2_t 
__a, int16x2_t 
__b) {
 
  467  return __builtin_arm_qsub16(
__a, 
__b);
 
  469static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  470__sadd16(int16x2_t 
__a, int16x2_t 
__b) {
 
  471  return __builtin_arm_sadd16(
__a, 
__b);
 
  473static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  474__sasx(int16x2_t 
__a, int16x2_t 
__b) {
 
  475  return __builtin_arm_sasx(
__a, 
__b);
 
  477static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  478__shadd16(int16x2_t 
__a, int16x2_t 
__b) {
 
  479  return __builtin_arm_shadd16(
__a, 
__b);
 
  481static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  482__shasx(int16x2_t 
__a, int16x2_t 
__b) {
 
  483  return __builtin_arm_shasx(
__a, 
__b);
 
  485static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  486__shsax(int16x2_t 
__a, int16x2_t 
__b) {
 
  487  return __builtin_arm_shsax(
__a, 
__b);
 
  489static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  490__shsub16(int16x2_t 
__a, int16x2_t 
__b) {
 
  491  return __builtin_arm_shsub16(
__a, 
__b);
 
  493static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  494__ssax(int16x2_t 
__a, int16x2_t 
__b) {
 
  495  return __builtin_arm_ssax(
__a, 
__b);
 
  497static __inline__ int16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  498__ssub16(int16x2_t 
__a, int16x2_t 
__b) {
 
  499  return __builtin_arm_ssub16(
__a, 
__b);
 
  501static __inline__ uint16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  502__uadd16(uint16x2_t 
__a, uint16x2_t 
__b) {
 
  503  return __builtin_arm_uadd16(
__a, 
__b);
 
  505static __inline__ uint16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  506__uasx(uint16x2_t 
__a, uint16x2_t 
__b) {
 
  507  return __builtin_arm_uasx(
__a, 
__b);
 
  509static __inline__ uint16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  510__uhadd16(uint16x2_t 
__a, uint16x2_t 
__b) {
 
  511  return __builtin_arm_uhadd16(
__a, 
__b);
 
  513static __inline__ uint16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  514__uhasx(uint16x2_t 
__a, uint16x2_t 
__b) {
 
  515  return __builtin_arm_uhasx(
__a, 
__b);
 
  517static __inline__ uint16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  518__uhsax(uint16x2_t 
__a, uint16x2_t 
__b) {
 
  519  return __builtin_arm_uhsax(
__a, 
__b);
 
  521static __inline__ uint16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  522__uhsub16(uint16x2_t 
__a, uint16x2_t 
__b) {
 
  523  return __builtin_arm_uhsub16(
__a, 
__b);
 
  525static __inline__ uint16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  526__uqadd16(uint16x2_t 
__a, uint16x2_t 
__b) {
 
  527  return __builtin_arm_uqadd16(
__a, 
__b);
 
  529static __inline__ uint16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  530__uqasx(uint16x2_t 
__a, uint16x2_t 
__b) {
 
  531  return __builtin_arm_uqasx(
__a, 
__b);
 
  533static __inline__ uint16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  534__uqsax(uint16x2_t 
__a, uint16x2_t 
__b) {
 
  535  return __builtin_arm_uqsax(
__a, 
__b);
 
  537static __inline__ uint16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  538__uqsub16(uint16x2_t 
__a, uint16x2_t 
__b) {
 
  539  return __builtin_arm_uqsub16(
__a, 
__b);
 
  541static __inline__ uint16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  542__usax(uint16x2_t 
__a, uint16x2_t 
__b) {
 
  543  return __builtin_arm_usax(
__a, 
__b);
 
  545static __inline__ uint16x2_t 
__attribute__((__always_inline__, __nodebug__))
 
  546__usub16(uint16x2_t 
__a, uint16x2_t 
__b) {
 
  547  return __builtin_arm_usub16(
__a, 
__b);
 
  552#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32 
  553static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__))
 
  554__smlad(int16x2_t 
__a, int16x2_t 
__b, int32_t 
__c) {
 
  555  return __builtin_arm_smlad(
__a, 
__b, 
__c);
 
  557static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__))
 
  558__smladx(int16x2_t 
__a, int16x2_t 
__b, int32_t 
__c) {
 
  559  return __builtin_arm_smladx(
__a, 
__b, 
__c);
 
  561static __inline__ int64_t 
__attribute__((__always_inline__, __nodebug__))
 
  562__smlald(int16x2_t 
__a, int16x2_t 
__b, int64_t 
__c) {
 
  563  return __builtin_arm_smlald(
__a, 
__b, 
__c);
 
  565static __inline__ int64_t 
__attribute__((__always_inline__, __nodebug__))
 
  566__smlaldx(int16x2_t 
__a, int16x2_t 
__b, int64_t 
__c) {
 
  567  return __builtin_arm_smlaldx(
__a, 
__b, 
__c);
 
  569static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__))
 
  570__smlsd(int16x2_t 
__a, int16x2_t 
__b, int32_t 
__c) {
 
  571  return __builtin_arm_smlsd(
__a, 
__b, 
__c);
 
  573static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__))
 
  574__smlsdx(int16x2_t 
__a, int16x2_t 
__b, int32_t 
__c) {
 
  575  return __builtin_arm_smlsdx(
__a, 
__b, 
__c);
 
  577static __inline__ int64_t 
__attribute__((__always_inline__, __nodebug__))
 
  578__smlsld(int16x2_t 
__a, int16x2_t 
__b, int64_t 
__c) {
 
  579  return __builtin_arm_smlsld(
__a, 
__b, 
__c);
 
  581static __inline__ int64_t 
__attribute__((__always_inline__, __nodebug__))
 
  582__smlsldx(int16x2_t 
__a, int16x2_t 
__b, int64_t 
__c) {
 
  583  return __builtin_arm_smlsldx(
__a, 
__b, 
__c);
 
  585static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__))
 
  586__smuad(int16x2_t 
__a, int16x2_t 
__b) {
 
  587  return __builtin_arm_smuad(
__a, 
__b);
 
  589static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__))
 
  590__smuadx(int16x2_t 
__a, int16x2_t 
__b) {
 
  591  return __builtin_arm_smuadx(
__a, 
__b);
 
  593static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__))
 
  594__smusd(int16x2_t 
__a, int16x2_t 
__b) {
 
  595  return __builtin_arm_smusd(
__a, 
__b);
 
  597static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__))
 
  598__smusdx(int16x2_t 
__a, int16x2_t 
__b) {
 
  599  return __builtin_arm_smusdx(
__a, 
__b);
 
  604#if (defined(__ARM_FEATURE_DIRECTED_ROUNDING)    &&                         \ 
  605  (__ARM_FEATURE_DIRECTED_ROUNDING))             &&                         \ 
  606  (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE) 
  607static __inline__ 
double __attribute__((__always_inline__, __nodebug__))
 
  609  return __builtin_roundeven(
__a);
 
  612static __inline__ 
float __attribute__((__always_inline__, __nodebug__))
 
  614  return __builtin_roundevenf(
__a);
 
  619static __inline__ uint32_t 
__attribute__((__always_inline__, __nodebug__, target(
"crc")))
 
  621  return __builtin_arm_crc32b(
__a, 
__b);
 
  624static __inline__ uint32_t 
__attribute__((__always_inline__, __nodebug__, target(
"crc")))
 
  625__crc32h(uint32_t 
__a, uint16_t 
__b) {
 
  626  return __builtin_arm_crc32h(
__a, 
__b);
 
  629static __inline__ uint32_t 
__attribute__((__always_inline__, __nodebug__, target(
"crc")))
 
  631  return __builtin_arm_crc32w(
__a, 
__b);
 
  634static __inline__ uint32_t 
__attribute__((__always_inline__, __nodebug__, target(
"crc")))
 
  636  return __builtin_arm_crc32d(
__a, 
__b);
 
  639static __inline__ uint32_t 
__attribute__((__always_inline__, __nodebug__, target(
"crc")))
 
  640__crc32cb(uint32_t 
__a, uint8_t 
__b) {
 
  641  return __builtin_arm_crc32cb(
__a, 
__b);
 
  644static __inline__ uint32_t 
__attribute__((__always_inline__, __nodebug__, target(
"crc")))
 
  645__crc32ch(uint32_t 
__a, uint16_t 
__b) {
 
  646  return __builtin_arm_crc32ch(
__a, 
__b);
 
  649static __inline__ uint32_t 
__attribute__((__always_inline__, __nodebug__, target(
"crc")))
 
  650__crc32cw(uint32_t 
__a, uint32_t 
__b) {
 
  651  return __builtin_arm_crc32cw(
__a, 
__b);
 
  654static __inline__ uint32_t 
__attribute__((__always_inline__, __nodebug__, target(
"crc")))
 
  655__crc32cd(uint32_t 
__a, uint64_t 
__b) {
 
  656  return __builtin_arm_crc32cd(
__a, 
__b);
 
  661#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE 
  662static __inline__ int32_t 
__attribute__((__always_inline__, __nodebug__, target(
"v8.3a")))
 
  664  return __builtin_arm_jcvt(
__a);
 
  669#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE 
  670static __inline__ 
float __attribute__((__always_inline__, __nodebug__, target(
"v8.5a")))
 
  671__rint32zf(
float __a) {
 
  672  return __builtin_arm_rint32zf(
__a);
 
  675static __inline__ 
double __attribute__((__always_inline__, __nodebug__, target(
"v8.5a")))
 
  676__rint32z(
double __a) {
 
  677  return __builtin_arm_rint32z(
__a);
 
  680static __inline__ 
float __attribute__((__always_inline__, __nodebug__, target(
"v8.5a")))
 
  681__rint64zf(
float __a) {
 
  682  return __builtin_arm_rint64zf(
__a);
 
  685static __inline__ 
double __attribute__((__always_inline__, __nodebug__, target(
"v8.5a")))
 
  686__rint64z(
double __a) {
 
  687  return __builtin_arm_rint64z(
__a);
 
  690static __inline__ 
float __attribute__((__always_inline__, __nodebug__, target(
"v8.5a")))
 
  691__rint32xf(
float __a) {
 
  692  return __builtin_arm_rint32xf(
__a);
 
  695static __inline__ 
double __attribute__((__always_inline__, __nodebug__, target(
"v8.5a")))
 
  696__rint32x(
double __a) {
 
  697  return __builtin_arm_rint32x(
__a);
 
  700static __inline__ 
float __attribute__((__always_inline__, __nodebug__, target(
"v8.5a")))
 
  701__rint64xf(
float __a) {
 
  702  return __builtin_arm_rint64xf(
__a);
 
  705static __inline__ 
double __attribute__((__always_inline__, __nodebug__, target(
"v8.5a")))
 
  706__rint64x(
double __a) {
 
  707  return __builtin_arm_rint64x(
__a);
 
  712#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE 
  717static __inline__ data512_t 
__attribute__((__always_inline__, __nodebug__, target(
"ls64")))
 
  718__arm_ld64b(const 
void *__addr) {
 
  720  __builtin_arm_ld64b(__addr, 
__value.val);
 
  723static __inline__ 
void __attribute__((__always_inline__, __nodebug__, target(
"ls64")))
 
  724__arm_st64b(
void *__addr, data512_t 
__value) {
 
  725  __builtin_arm_st64b(__addr, 
__value.val);
 
  727static __inline__ uint64_t 
__attribute__((__always_inline__, __nodebug__, target(
"ls64")))
 
  728__arm_st64bv(
void *__addr, data512_t 
__value) {
 
  729  return __builtin_arm_st64bv(__addr, 
__value.val);
 
  731static __inline__ uint64_t 
__attribute__((__always_inline__, __nodebug__, target(
"ls64")))
 
  732__arm_st64bv0(
void *__addr, data512_t 
__value) {
 
  733  return __builtin_arm_st64bv0(__addr, 
__value.val);
 
  738#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg) 
  739#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg) 
  740#define __arm_rsr128(sysreg) __builtin_arm_rsr128(sysreg) 
  741#define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg) 
  742#define __arm_rsrf(sysreg) __builtin_bit_cast(float, __arm_rsr(sysreg)) 
  743#define __arm_rsrf64(sysreg) __builtin_bit_cast(double, __arm_rsr64(sysreg)) 
  744#define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v) 
  745#define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v) 
  746#define __arm_wsr128(sysreg, v) __builtin_arm_wsr128(sysreg, v) 
  747#define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v) 
  748#define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v)) 
  749#define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v)) 
  752#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE 
  753#define __arm_mte_create_random_tag(__ptr, __mask)  __builtin_arm_irg(__ptr, __mask) 
  754#define __arm_mte_increment_tag(__ptr, __tag_offset)  __builtin_arm_addg(__ptr, __tag_offset) 
  755#define __arm_mte_exclude_tag(__ptr, __excluded)  __builtin_arm_gmi(__ptr, __excluded) 
  756#define __arm_mte_get_tag(__ptr) __builtin_arm_ldg(__ptr) 
  757#define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr) 
  758#define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb) 
  761#define __arm_mops_memset_tag(__tagged_address, __value, __size)    \ 
  762  __builtin_arm_mops_memset_tag(__tagged_address, __value, __size) 
  766#if defined(__ARM_FEATURE_COPROC) 
  768#if (__ARM_FEATURE_COPROC & 0x1) 
  771#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)                           \ 
  772  __builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) 
  775#define __arm_ldc(coproc, CRd, p) __builtin_arm_ldc(coproc, CRd, p) 
  776#define __arm_stc(coproc, CRd, p) __builtin_arm_stc(coproc, CRd, p) 
  778#define __arm_mcr(coproc, opc1, value, CRn, CRm, opc2)                         \ 
  779  __builtin_arm_mcr(coproc, opc1, value, CRn, CRm, opc2) 
  780#define __arm_mrc(coproc, opc1, CRn, CRm, opc2)                                \ 
  781  __builtin_arm_mrc(coproc, opc1, CRn, CRm, opc2) 
  783#if (__ARM_ARCH != 4) && (__ARM_ARCH < 8) 
  784#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p) 
  785#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p) 
  788#if (__ARM_ARCH_8M_MAIN__) || (__ARM_ARCH_8_1M_MAIN__) 
  789#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2)                           \ 
  790  __builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) 
  791#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p) 
  792#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p) 
  797#if (__ARM_FEATURE_COPROC & 0x2) 
  798#define __arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2)                          \ 
  799  __builtin_arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2) 
  800#define __arm_ldc2(coproc, CRd, p) __builtin_arm_ldc2(coproc, CRd, p) 
  801#define __arm_stc2(coproc, CRd, p) __builtin_arm_stc2(coproc, CRd, p) 
  802#define __arm_ldc2l(coproc, CRd, p) __builtin_arm_ldc2l(coproc, CRd, p) 
  803#define __arm_stc2l(coproc, CRd, p) __builtin_arm_stc2l(coproc, CRd, p) 
  804#define __arm_mcr2(coproc, opc1, value, CRn, CRm, opc2)                        \ 
  805  __builtin_arm_mcr2(coproc, opc1, value, CRn, CRm, opc2) 
  806#define __arm_mrc2(coproc, opc1, CRn, CRm, opc2)                               \ 
  807  __builtin_arm_mrc2(coproc, opc1, CRn, CRm, opc2) 
  810#if (__ARM_FEATURE_COPROC & 0x4) 
  811#define __arm_mcrr(coproc, opc1, value, CRm)                                   \ 
  812  __builtin_arm_mcrr(coproc, opc1, value, CRm) 
  813#define __arm_mrrc(coproc, opc1, CRm) __builtin_arm_mrrc(coproc, opc1, CRm) 
  816#if (__ARM_FEATURE_COPROC & 0x8) 
  817#define __arm_mcrr2(coproc, opc1, value, CRm)                                  \ 
  818  __builtin_arm_mcrr2(coproc, opc1, value, CRm) 
  819#define __arm_mrrc2(coproc, opc1, CRm) __builtin_arm_mrrc2(coproc, opc1, CRm) 
  825#if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME 
  827#define _TMFAILURE_REASON  0x00007fffu 
  828#define _TMFAILURE_RTRY    0x00008000u 
  829#define _TMFAILURE_CNCL    0x00010000u 
  830#define _TMFAILURE_MEM     0x00020000u 
  831#define _TMFAILURE_IMP     0x00040000u 
  832#define _TMFAILURE_ERR     0x00080000u 
  833#define _TMFAILURE_SIZE    0x00100000u 
  834#define _TMFAILURE_NEST    0x00200000u 
  835#define _TMFAILURE_DBG     0x00400000u 
  836#define _TMFAILURE_INT     0x00800000u 
  837#define _TMFAILURE_TRIVIAL 0x01000000u 
  839#define __tstart()        __builtin_arm_tstart() 
  840#define __tcommit()       __builtin_arm_tcommit() 
  841#define __tcancel(__arg)  __builtin_arm_tcancel(__arg) 
  842#define __ttest()         __builtin_arm_ttest() 
  847#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE 
  848static __inline__ 
int __attribute__((__always_inline__, __nodebug__, target(
"rand")))
 
  849__rndr(uint64_t *
__p) {
 
  850  return __builtin_arm_rndr(
__p);
 
  852static __inline__ 
int __attribute__((__always_inline__, __nodebug__, target(
"rand")))
 
  853__rndrrs(uint64_t *
__p) {
 
  854  return __builtin_arm_rndrrs(
__p);
 
  859#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE 
  860static __inline__ 
void * 
__attribute__((__always_inline__, __nodebug__))
 
  862  return (
void *)__builtin_arm_rsr64(
"gcspr_el0");
 
  865static __inline__ uint64_t 
__attribute__((__always_inline__, __nodebug__, target(
"gcs")))
 
  867  return __builtin_arm_gcspopm(0);
 
  870static __inline__ 
void *
__attribute__((__always_inline__, __nodebug__,
 
  872__gcsss(
void *__stack) {
 
  873  return __builtin_arm_gcsss(__stack);
 
  877#if defined(__cplusplus) 
__DEVICE__ int __clzll(long long __a)
__DEVICE__ int __clz(int __a)
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
static __inline__ vector float vector float vector float __c
static __inline__ vector float vector float __b
static __inline__ uint32_t volatile uint32_t * __p
__asm__("swp %0, %1, [%2]" :"=r"(__v) :"r"(__x), "r"(__p) :"memory")
static __inline__ uint32_t uint32_t __y
static __inline__ void int __a
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32d(unsigned int __C, unsigned int __D)
Adds the unsigned integer operand to the CRC-32C checksum of the second unsigned integer operand.
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32b(unsigned int __C, unsigned char __D)
Adds the unsigned integer operand to the CRC-32C checksum of the unsigned char operand.
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32 __crc32w(unsigned int __C, unsigned short __D)
Adds the unsigned integer operand to the CRC-32C checksum of the unsigned short operand.
static __inline__ void unsigned int __value