doxygen/ppc__wrappers_2tmmintrin_8h_source.html

/*===---- tmmintrin.h - Implementation of SSSE3 intrinsics on PowerPC ------===

 *

 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 * See https://llvm.org/LICENSE.txt for license information.

 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 *

 *===-----------------------------------------------------------------------===

 */


/* Implemented from the specification included in the Intel C++ Compiler

   User Guide and Reference, version 9.0.  */


#ifndef NO_WARN_X86_INTRINSICS

/* This header is distributed to simplify porting x86_64 code that

   makes explicit use of Intel intrinsics to powerpc64le.


   It is the user's responsibility to determine if the results are

   acceptable and make additional changes as necessary.


   Note that much code that uses Intel intrinsics can be rewritten in

   standard C or GNU C extensions, which are more portable and better

   optimized across multiple targets.  */

#endif


#ifndef TMMINTRIN_H_

#define TMMINTRIN_H_


#if defined(__powerpc64__) &&                                                  \

    (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))


#include <altivec.h>


/* We need definitions from the SSE header files.  */

#include <pmmintrin.h>


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_abs_epi16(__m128i __A) {

  return (__m128i)vec_abs((__v8hi)__A);

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_abs_epi32(__m128i __A) {

  return (__m128i)vec_abs((__v4si)__A);

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_abs_epi8(__m128i __A) {

  return (__m128i)vec_abs((__v16qi)__A);

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_abs_pi16(__m64 __A) {

  __v8hi __B = (__v8hi)(__v2du){__A, __A};

  return (__m64)((__v2du)vec_abs(__B))[0];

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_abs_pi32(__m64 __A) {

  __v4si __B = (__v4si)(__v2du){__A, __A};

  return (__m64)((__v2du)vec_abs(__B))[0];

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_abs_pi8(__m64 __A) {

  __v16qi __B = (__v16qi)(__v2du){__A, __A};

  return (__m64)((__v2du)vec_abs(__B))[0];

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_alignr_epi8(__m128i __A, __m128i __B, const unsigned int __count) {

  if (__builtin_constant_p(__count) && __count < 16) {

#ifdef __LITTLE_ENDIAN__

    __A = (__m128i)vec_reve((__v16qu)__A);

    __B = (__m128i)vec_reve((__v16qu)__B);

#endif

    __A = (__m128i)vec_sld((__v16qu)__B, (__v16qu)__A, __count);

#ifdef __LITTLE_ENDIAN__

    __A = (__m128i)vec_reve((__v16qu)__A);

#endif

    return __A;

  }


  if (__count == 0)

    return __B;


  if (__count >= 16) {

    if (__count >= 32) {

      const __v16qu __zero = {0};

      return (__m128i)__zero;

    } else {

      const __v16qu __shift = vec_splats((unsigned char)((__count - 16) * 8));

#ifdef __LITTLE_ENDIAN__

      return (__m128i)vec_sro((__v16qu)__A, __shift);

#else

      return (__m128i)vec_slo((__v16qu)__A, __shift);

#endif

    }

  } else {

    const __v16qu __shiftA = vec_splats((unsigned char)((16 - __count) * 8));

    const __v16qu __shiftB = vec_splats((unsigned char)(__count * 8));

#ifdef __LITTLE_ENDIAN__

    __A = (__m128i)vec_slo((__v16qu)__A, __shiftA);

    __B = (__m128i)vec_sro((__v16qu)__B, __shiftB);

#else

    __A = (__m128i)vec_sro((__v16qu)__A, __shiftA);

    __B = (__m128i)vec_slo((__v16qu)__B, __shiftB);

#endif

    return (__m128i)vec_or((__v16qu)__A, (__v16qu)__B);

  }

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_alignr_pi8(__m64 __A, __m64 __B, unsigned int __count) {

  if (__count < 16) {

    __v2du __C = {__B, __A};

#ifdef __LITTLE_ENDIAN__

    const __v4su __shift = {__count << 3, 0, 0, 0};

    __C = (__v2du)vec_sro((__v16qu)__C, (__v16qu)__shift);

#else

    const __v4su __shift = {0, 0, 0, __count << 3};

    __C = (__v2du)vec_slo((__v16qu)__C, (__v16qu)__shift);

#endif

    return (__m64)__C[0];

  } else {

    const __m64 __zero = {0};

    return __zero;

  }

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hadd_epi16(__m128i __A, __m128i __B) {

  const __v16qu __P = {0,  1,  4,  5,  8,  9,  12, 13,

                       16, 17, 20, 21, 24, 25, 28, 29};

  const __v16qu __Q = {2,  3,  6,  7,  10, 11, 14, 15,

                       18, 19, 22, 23, 26, 27, 30, 31};

  __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);

  __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);

  return (__m128i)vec_add(__C, __D);

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hadd_epi32(__m128i __A, __m128i __B) {

  const __v16qu __P = {0,  1,  2,  3,  8,  9,  10, 11,

                       16, 17, 18, 19, 24, 25, 26, 27};

  const __v16qu __Q = {4,  5,  6,  7,  12, 13, 14, 15,

                       20, 21, 22, 23, 28, 29, 30, 31};

  __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P);

  __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q);

  return (__m128i)vec_add(__C, __D);

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hadd_pi16(__m64 __A, __m64 __B) {

  __v8hi __C = (__v8hi)(__v2du){__A, __B};

  const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};

  const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};

  __v8hi __D = vec_perm(__C, __C, __Q);

  __C = vec_perm(__C, __C, __P);

  __C = vec_add(__C, __D);

  return (__m64)((__v2du)__C)[1];

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hadd_pi32(__m64 __A, __m64 __B) {

  __v4si __C = (__v4si)(__v2du){__A, __B};

  const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};

  const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};

  __v4si __D = vec_perm(__C, __C, __Q);

  __C = vec_perm(__C, __C, __P);

  __C = vec_add(__C, __D);

  return (__m64)((__v2du)__C)[1];

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hadds_epi16(__m128i __A, __m128i __B) {

  __v4si __C = {0}, __D = {0};

  __C = vec_sum4s((__v8hi)__A, __C);

  __D = vec_sum4s((__v8hi)__B, __D);

  __C = (__v4si)vec_packs(__C, __D);

  return (__m128i)__C;

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hadds_pi16(__m64 __A, __m64 __B) {

  const __v4si __zero = {0};

  __v8hi __C = (__v8hi)(__v2du){__A, __B};

  __v4si __D = vec_sum4s(__C, __zero);

  __C = vec_packs(__D, __D);

  return (__m64)((__v2du)__C)[1];

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hsub_epi16(__m128i __A, __m128i __B) {

  const __v16qu __P = {0,  1,  4,  5,  8,  9,  12, 13,

                       16, 17, 20, 21, 24, 25, 28, 29};

  const __v16qu __Q = {2,  3,  6,  7,  10, 11, 14, 15,

                       18, 19, 22, 23, 26, 27, 30, 31};

  __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);

  __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);

  return (__m128i)vec_sub(__C, __D);

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hsub_epi32(__m128i __A, __m128i __B) {

  const __v16qu __P = {0,  1,  2,  3,  8,  9,  10, 11,

                       16, 17, 18, 19, 24, 25, 26, 27};

  const __v16qu __Q = {4,  5,  6,  7,  12, 13, 14, 15,

                       20, 21, 22, 23, 28, 29, 30, 31};

  __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P);

  __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q);

  return (__m128i)vec_sub(__C, __D);

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hsub_pi16(__m64 __A, __m64 __B) {

  const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};

  const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};

  __v8hi __C = (__v8hi)(__v2du){__A, __B};

  __v8hi __D = vec_perm(__C, __C, __Q);

  __C = vec_perm(__C, __C, __P);

  __C = vec_sub(__C, __D);

  return (__m64)((__v2du)__C)[1];

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hsub_pi32(__m64 __A, __m64 __B) {

  const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};

  const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};

  __v4si __C = (__v4si)(__v2du){__A, __B};

  __v4si __D = vec_perm(__C, __C, __Q);

  __C = vec_perm(__C, __C, __P);

  __C = vec_sub(__C, __D);

  return (__m64)((__v2du)__C)[1];

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hsubs_epi16(__m128i __A, __m128i __B) {

  const __v16qu __P = {0,  1,  4,  5,  8,  9,  12, 13,

                       16, 17, 20, 21, 24, 25, 28, 29};

  const __v16qu __Q = {2,  3,  6,  7,  10, 11, 14, 15,

                       18, 19, 22, 23, 26, 27, 30, 31};

  __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);

  __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);

  return (__m128i)vec_subs(__C, __D);

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hsubs_pi16(__m64 __A, __m64 __B) {

  const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};

  const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};

  __v8hi __C = (__v8hi)(__v2du){__A, __B};

  __v8hi __D = vec_perm(__C, __C, __P);

  __v8hi __E = vec_perm(__C, __C, __Q);

  __C = vec_subs(__D, __E);

  return (__m64)((__v2du)__C)[1];

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_shuffle_epi8(__m128i __A, __m128i __B) {

  const __v16qi __zero = {0};

  __vector __bool char __select = vec_cmplt((__v16qi)__B, __zero);

  __v16qi __C = vec_perm((__v16qi)__A, (__v16qi)__A, (__v16qu)__B);

  return (__m128i)vec_sel(__C, __zero, __select);

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_shuffle_pi8(__m64 __A, __m64 __B) {

  const __v16qi __zero = {0};

  __v16qi __C = (__v16qi)(__v2du){__A, __A};

  __v16qi __D = (__v16qi)(__v2du){__B, __B};

  __vector __bool char __select = vec_cmplt((__v16qi)__D, __zero);

  __C = vec_perm((__v16qi)__C, (__v16qi)__C, (__v16qu)__D);

  __C = vec_sel(__C, __zero, __select);

  return (__m64)((__v2du)(__C))[0];

}


#ifdef _ARCH_PWR8

extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_sign_epi8(__m128i __A, __m128i __B) {

  const __v16qi __zero = {0};

  __v16qi __selectneg = (__v16qi)vec_cmplt((__v16qi)__B, __zero);

  __v16qi __selectpos =

      (__v16qi)vec_neg((__v16qi)vec_cmpgt((__v16qi)__B, __zero));

  __v16qi __conv = vec_add(__selectneg, __selectpos);

  return (__m128i)vec_mul((__v16qi)__A, (__v16qi)__conv);

}

#endif


#ifdef _ARCH_PWR8

extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_sign_epi16(__m128i __A, __m128i __B) {

  const __v8hi __zero = {0};

  __v8hi __selectneg = (__v8hi)vec_cmplt((__v8hi)__B, __zero);

  __v8hi __selectpos = (__v8hi)vec_neg((__v8hi)vec_cmpgt((__v8hi)__B, __zero));

  __v8hi __conv = vec_add(__selectneg, __selectpos);

  return (__m128i)vec_mul((__v8hi)__A, (__v8hi)__conv);

}

#endif


#ifdef _ARCH_PWR8

extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_sign_epi32(__m128i __A, __m128i __B) {

  const __v4si __zero = {0};

  __v4si __selectneg = (__v4si)vec_cmplt((__v4si)__B, __zero);

  __v4si __selectpos = (__v4si)vec_neg((__v4si)vec_cmpgt((__v4si)__B, __zero));

  __v4si __conv = vec_add(__selectneg, __selectpos);

  return (__m128i)vec_mul((__v4si)__A, (__v4si)__conv);

}

#endif


#ifdef _ARCH_PWR8

extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_sign_pi8(__m64 __A, __m64 __B) {

  const __v16qi __zero = {0};

  __v16qi __C = (__v16qi)(__v2du){__A, __A};

  __v16qi __D = (__v16qi)(__v2du){__B, __B};

  __C = (__v16qi)_mm_sign_epi8((__m128i)__C, (__m128i)__D);

  return (__m64)((__v2du)(__C))[0];

}

#endif


#ifdef _ARCH_PWR8

extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_sign_pi16(__m64 __A, __m64 __B) {

  const __v8hi __zero = {0};

  __v8hi __C = (__v8hi)(__v2du){__A, __A};

  __v8hi __D = (__v8hi)(__v2du){__B, __B};

  __C = (__v8hi)_mm_sign_epi16((__m128i)__C, (__m128i)__D);

  return (__m64)((__v2du)(__C))[0];

}

#endif


#ifdef _ARCH_PWR8

extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_sign_pi32(__m64 __A, __m64 __B) {

  const __v4si __zero = {0};

  __v4si __C = (__v4si)(__v2du){__A, __A};

  __v4si __D = (__v4si)(__v2du){__B, __B};

  __C = (__v4si)_mm_sign_epi32((__m128i)__C, (__m128i)__D);

  return (__m64)((__v2du)(__C))[0];

}

#endif


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_maddubs_epi16(__m128i __A, __m128i __B) {

  __v8hi __unsigned = vec_splats((signed short)0x00ff);

  __v8hi __C = vec_and(vec_unpackh((__v16qi)__A), __unsigned);

  __v8hi __D = vec_and(vec_unpackl((__v16qi)__A), __unsigned);

  __v8hi __E = vec_unpackh((__v16qi)__B);

  __v8hi __F = vec_unpackl((__v16qi)__B);

  __C = vec_mul(__C, __E);

  __D = vec_mul(__D, __F);

  const __v16qu __odds = {0,  1,  4,  5,  8,  9,  12, 13,

                          16, 17, 20, 21, 24, 25, 28, 29};

  const __v16qu __evens = {2,  3,  6,  7,  10, 11, 14, 15,

                           18, 19, 22, 23, 26, 27, 30, 31};

  __E = vec_perm(__C, __D, __odds);

  __F = vec_perm(__C, __D, __evens);

  return (__m128i)vec_adds(__E, __F);

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_maddubs_pi16(__m64 __A, __m64 __B) {

  __v8hi __C = (__v8hi)(__v2du){__A, __A};

  __C = vec_unpackl((__v16qi)__C);

  const __v8hi __unsigned = vec_splats((signed short)0x00ff);

  __C = vec_and(__C, __unsigned);

  __v8hi __D = (__v8hi)(__v2du){__B, __B};

  __D = vec_unpackl((__v16qi)__D);

  __D = vec_mul(__C, __D);

  const __v16qu __odds = {0,  1,  4,  5,  8,  9,  12, 13,

                          16, 17, 20, 21, 24, 25, 28, 29};

  const __v16qu __evens = {2,  3,  6,  7,  10, 11, 14, 15,

                           18, 19, 22, 23, 26, 27, 30, 31};

  __C = vec_perm(__D, __D, __odds);

  __D = vec_perm(__D, __D, __evens);

  __C = vec_adds(__C, __D);

  return (__m64)((__v2du)(__C))[0];

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_mulhrs_epi16(__m128i __A, __m128i __B) {

  __v4si __C = vec_unpackh((__v8hi)__A);

  __v4si __D = vec_unpackh((__v8hi)__B);

  __C = vec_mul(__C, __D);

  __D = vec_unpackl((__v8hi)__A);

  __v4si __E = vec_unpackl((__v8hi)__B);

  __D = vec_mul(__D, __E);

  const __v4su __shift = vec_splats((unsigned int)14);

  __C = vec_sr(__C, __shift);

  __D = vec_sr(__D, __shift);

  const __v4si __ones = vec_splats((signed int)1);

  __C = vec_add(__C, __ones);

  __C = vec_sr(__C, (__v4su)__ones);

  __D = vec_add(__D, __ones);

  __D = vec_sr(__D, (__v4su)__ones);

  return (__m128i)vec_pack(__C, __D);

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_mulhrs_pi16(__m64 __A, __m64 __B) {

  __v4si __C = (__v4si)(__v2du){__A, __A};

  __C = vec_unpackh((__v8hi)__C);

  __v4si __D = (__v4si)(__v2du){__B, __B};

  __D = vec_unpackh((__v8hi)__D);

  __C = vec_mul(__C, __D);

  const __v4su __shift = vec_splats((unsigned int)14);

  __C = vec_sr(__C, __shift);

  const __v4si __ones = vec_splats((signed int)1);

  __C = vec_add(__C, __ones);

  __C = vec_sr(__C, (__v4su)__ones);

  __v8hi __E = vec_pack(__C, __D);

  return (__m64)((__v2du)(__E))[0];

}


#else

#include_next <tmmintrin.h>

#endif /* defined(__powerpc64__) &&                                            \

        *   (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */


#endif /* TMMINTRIN_H_ */

__attribute__
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
Definition __clang_hip_libdevice_declares.h:285

altivec.h

vec_sr
static __inline__ vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, vector unsigned char __b)
Definition altivec.h:10393

vec_sro
static __inline__ vector signed char __ATTRS_o_ai vec_sro(vector signed char __a, vector signed char __b)
Definition altivec.h:10979

vec_reve
static __inline__ __ATTRS_o_ai vector bool char vec_reve(vector bool char __a)
Definition altivec.h:17528

vec_splats
static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a)
Definition altivec.h:14737

vec_sld
static __inline__ vector signed int __ATTRS_o_ai vec_sld(vector signed int, vector signed int, unsigned const int __c)
Definition altivec.h:9149

vec_unpackl
static __inline__ vector short __ATTRS_o_ai vec_unpackl(vector signed char __a)
Definition altivec.h:12781

vec_sum4s
static __inline__ vector int __ATTRS_o_ai vec_sum4s(vector signed char __a, vector int __b)
Definition altivec.h:12487

vec_mul
static __inline__ vector signed char __ATTRS_o_ai vec_mul(vector signed char __a, vector signed char __b)
Definition altivec.h:6205

vec_and
static __inline__ vector signed char __ATTRS_o_ai vec_and(vector signed char __a, vector signed char __b)
Definition altivec.h:882

vec_subs
static __inline__ vector signed char __ATTRS_o_ai vec_subs(vector signed char __a, vector signed char __b)
Definition altivec.h:12149

vec_adds
static __inline__ vector signed char __ATTRS_o_ai vec_adds(vector signed char __a, vector signed char __b)
Definition altivec.h:626

vec_perm
static __inline__ vector signed char __ATTRS_o_ai vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c)
Definition altivec.h:7962

vec_sel
static __inline__ vector signed char __ATTRS_o_ai vec_sel(vector signed char __a, vector signed char __b, vector unsigned char __c)
Definition altivec.h:8588

vec_cmplt
static __inline__ vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a, vector signed char __b)
Definition altivec.h:2435

vec_slo
static __inline__ vector signed char __ATTRS_o_ai vec_slo(vector signed char __a, vector signed char __b)
Definition altivec.h:9884

vec_pack
static __inline__ vector signed char __ATTRS_o_ai vec_pack(vector signed short __a, vector signed short __b)
Definition altivec.h:7389

vec_neg
static vector float __ATTRS_o_ai vec_neg(vector float __a)
Definition altivec.h:18238

vec_or
static __inline__ vector signed char __ATTRS_o_ai vec_or(vector signed char __a, vector signed char __b)
Definition altivec.h:6865

vec_unpackh
static __inline__ vector short __ATTRS_o_ai vec_unpackh(vector signed char __a)
Definition altivec.h:12642

vec_add
static __inline__ vector signed char __ATTRS_o_ai vec_add(vector signed char __a, vector signed char __b)
Definition altivec.h:200

vec_abs
static __inline__ vector signed char __ATTRS_o_ai vec_abs(vector signed char __a)
Definition altivec.h:117

vec_cmpgt
static __inline__ vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a, vector signed char __b)
Definition altivec.h:2131

vec_packs
static __inline__ vector signed char __ATTRS_o_ai vec_packs(vector short __a, vector short __b)
Definition altivec.h:7715

vec_sub
static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector signed char __a, vector signed char __b)
Definition altivec.h:11869

__D
static __inline__ void short __D
Definition immintrin.h:342

__conv
#define __conv
Definition opencl-c.h:36

__P
__inline unsigned int unsigned int unsigned int * __P
Definition bmi2intrin.h:25

pmmintrin.h

_mm_hsub_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi32(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32]...
Definition tmmintrin.h:410

_mm_abs_pi8
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi8(__m64 __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition tmmintrin.h:48

_mm_sign_epi8
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_epi8(__m128i __a, __m128i __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition tmmintrin.h:642

_mm_abs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition tmmintrin.h:98

_mm_alignr_epi8
#define _mm_alignr_epi8(a, b, n)
Concatenates the two 128-bit integer vector operands, and right-shifts the result by the number of by...
Definition tmmintrin.h:155

_mm_maddubs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition tmmintrin.h:496

_mm_hsub_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_epi32(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32...
Definition tmmintrin.h:366

_mm_hsubs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsubs_pi16(__m64 __a, __m64 __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 64-bit ...
Definition tmmintrin.h:461

_mm_sign_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi16(__m64 __a, __m64 __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition tmmintrin.h:742

_mm_abs_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi32(__m64 __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition tmmintrin.h:114

_mm_hadd_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
Definition tmmintrin.h:246

_mm_sign_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_epi32(__m128i __a, __m128i __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition tmmintrin.h:692

_mm_alignr_pi8
#define _mm_alignr_pi8(a, b, n)
Concatenates the two 64-bit integer vector operands, and right-shifts the result by the number of byt...
Definition tmmintrin.h:178

_mm_abs_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition tmmintrin.h:131

_mm_abs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi16(__m64 __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition tmmintrin.h:81

_mm_sign_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi32(__m64 __a, __m64 __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition tmmintrin.h:768

_mm_maddubs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_maddubs_pi16(__m64 __a, __m64 __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition tmmintrin.h:525

_mm_hadd_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
Definition tmmintrin.h:203

_mm_sign_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_epi16(__m128i __a, __m128i __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition tmmintrin.h:667

_mm_hadds_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadds_epi16(__m128i __a, __m128i __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 128-bit vect...
Definition tmmintrin.h:296

_mm_hadds_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadds_pi16(__m64 __a, __m64 __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 64-bit vecto...
Definition tmmintrin.h:320

_mm_hadd_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_epi32(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32].
Definition tmmintrin.h:225

_mm_mulhrs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition tmmintrin.h:545

_mm_hsub_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition tmmintrin.h:387

_mm_shuffle_pi8
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_shuffle_pi8(__m64 __a, __m64 __b)
Copies the 8-bit integers from a 64-bit integer vector to the destination or clears 8-bit values in t...
Definition tmmintrin.h:614

_mm_hadd_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi32(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32].
Definition tmmintrin.h:269

_mm_mulhrs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhrs_pi16(__m64 __a, __m64 __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition tmmintrin.h:564

_mm_shuffle_epi8
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
Definition tmmintrin.h:590

_mm_hsub_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
Definition tmmintrin.h:344

_mm_hsubs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsubs_epi16(__m128i __a, __m128i __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 128-bit...
Definition tmmintrin.h:437

_mm_sign_pi8
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi8(__m64 __a, __m64 __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition tmmintrin.h:716

_mm_abs_epi8
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition tmmintrin.h:65