10#ifndef __RISCV_PACKED_SIMD_H
11#define __RISCV_PACKED_SIMD_H
15#if defined(__cplusplus)
33#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
35#define __packed_splat2(ty, x) ((ty){(x), (x)})
36#define __packed_splat4(ty, x) ((ty){(x), (x), (x), (x)})
37#define __packed_splat8(ty, x) ((ty){(x), (x), (x), (x), (x), (x), (x), (x)})
39#define __packed_splat(name, ty, scalar_ty, splat) \
40 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(scalar_ty __x) { \
41 return splat(ty, __x); \
44#define __packed_shift(name, ty, op, mask) \
45 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, \
47 return __rs1 op(__rs2 & (mask)); \
49#define __packed_shift8(name, ty, op) __packed_shift(name, ty, op, 0x7)
50#define __packed_shift16(name, ty, op) __packed_shift(name, ty, op, 0xf)
51#define __packed_shift32(name, ty, op) __packed_shift(name, ty, op, 0x1f)
53#define __packed_scalar_binary_op(name, ty, scalar_ty, op, splat) \
54 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, \
56 return __rs1 op splat(ty, __rs2); \
59#define __packed_binary_op(name, ty, op) \
60 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, ty __rs2) { \
61 return __rs1 op __rs2; \
64#define __packed_unary_op(name, ty, op) \
65 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1) { \
69#define __packed_binary_builtin(name, ty, builtin) \
70 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, ty __rs2) { \
71 return builtin(__rs1, __rs2); \
74#define __packed_sh1add(name, ty) \
75 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, ty __rs2) { \
76 return (__rs1 << 1) + __rs2; \
83#define __packed_sh1sadd(name, ty) \
84 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, ty __rs2) { \
85 return __builtin_elementwise_add_sat( \
86 __builtin_elementwise_add_sat(__rs1, __rs1), __rs2); \
89#define __packed_cmp(name, ty, rty, op) \
90 static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, \
92 return (rty)(__rs1 op __rs2); \
95#define __packed_pabs(name, ty, rty) \
96 static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1) { \
97 return (rty)__builtin_elementwise_abs(__rs1); \
100#define __packed_binary_builtin_cast(name, ty, rty, builtin) \
101 static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, \
103 return (rty)builtin(__rs1, __rs2); \
106#define __packed_reduction(name, rty, ty, builtin) \
107 static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, \
109 return builtin(__rs1, __rs2); \
453#undef __packed_splat2
454#undef __packed_splat4
455#undef __packed_splat8
458#undef __packed_shift8
459#undef __packed_shift16
460#undef __packed_shift32
461#undef __packed_scalar_binary_op
462#undef __packed_binary_op
463#undef __packed_unary_op
464#undef __packed_binary_builtin
465#undef __packed_sh1add
466#undef __packed_sh1sadd
469#undef __packed_binary_builtin_cast
470#undef __packed_reduction
471#undef __DEFAULT_FN_ATTRS
473#if defined(__cplusplus)
#define __packed_cmp(name, ty, rty, op)
__packed_splat4 int16x2_t
#define __packed_binary_op(name, ty, op)
#define __packed_splat2(ty, x)
int8_t int8x4_t __attribute__((__vector_size__(4)))
#define __packed_reduction(name, rty, ty, builtin)
#define __packed_shift8(name, ty, op)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 uint16x4_t
#define __packed_splat8(ty, x)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 int32_t
#define __packed_scalar_binary_op(name, ty, scalar_ty, op, splat)
#define __packed_shift16(name, ty, op)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 uint8_t
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 __packed_splat4 uint16_t
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 uint32x2_t
#define __packed_binary_builtin(name, ty, builtin)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 __packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 uint32_t
#define __packed_unary_op(name, ty, op)
#define __packed_sh1add(name, ty)
#define __packed_pabs(name, ty, rty)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 uint16x2_t
#define __packed_sh1sadd(name, ty)
#define __packed_shift32(name, ty, op)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 int32x2_t
#define __packed_splat(name, ty, scalar_ty, splat)
__packed_splat4 __packed_splat2 int8x8_t
#define __packed_binary_builtin_cast(name, ty, rty, builtin)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 uint8x4_t
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 uint8x8_t
#define __packed_splat4(ty, x)
__packed_splat4 __packed_splat2 __packed_splat8 int16x4_t