10#ifndef __RISCV_PACKED_SIMD_H
11#define __RISCV_PACKED_SIMD_H
15#if defined(__cplusplus)
33#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
35#define __packed_splat2(ty, x) ((ty){(x), (x)})
36#define __packed_splat4(ty, x) ((ty){(x), (x), (x), (x)})
37#define __packed_splat8(ty, x) ((ty){(x), (x), (x), (x), (x), (x), (x), (x)})
39#define __packed_splat(name, ty, scalar_ty, splat) \
40 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(scalar_ty __x) { \
41 return splat(ty, __x); \
44#define __packed_shift(name, ty, op, mask) \
45 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, \
47 return __rs1 op(__rs2 & (mask)); \
49#define __packed_shift8(name, ty, op) __packed_shift(name, ty, op, 0x7)
50#define __packed_shift16(name, ty, op) __packed_shift(name, ty, op, 0xf)
51#define __packed_shift32(name, ty, op) __packed_shift(name, ty, op, 0x1f)
53#define __packed_scalar_binary_op(name, ty, scalar_ty, op, splat) \
54 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, \
56 return __rs1 op splat(ty, __rs2); \
59#define __packed_binary_op(name, ty, op) \
60 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, ty __rs2) { \
61 return __rs1 op __rs2; \
64#define __packed_unary_op(name, ty, op) \
65 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1) { \
69#define __packed_binary_builtin(name, ty, builtin) \
70 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, ty __rs2) { \
71 return builtin(__rs1, __rs2); \
74#define __packed_sh1add(name, ty) \
75 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, ty __rs2) { \
76 return (__rs1 << 1) + __rs2; \
83#define __packed_sh1sadd(name, ty) \
84 static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, ty __rs2) { \
85 return __builtin_elementwise_add_sat( \
86 __builtin_elementwise_add_sat(__rs1, __rs1), __rs2); \
286#undef __packed_splat2
287#undef __packed_splat4
288#undef __packed_splat8
291#undef __packed_shift8
292#undef __packed_shift16
293#undef __packed_shift32
294#undef __packed_scalar_binary_op
295#undef __packed_binary_op
296#undef __packed_unary_op
297#undef __packed_binary_builtin
298#undef __packed_sh1add
299#undef __packed_sh1sadd
300#undef __DEFAULT_FN_ATTRS
302#if defined(__cplusplus)
#define __packed_binary_op(name, ty, op)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 uint8x8_t
#define __packed_splat2(ty, x)
int8_t int8x4_t __attribute__((__vector_size__(4)))
#define __packed_shift8(name, ty, op)
__packed_splat4 __packed_splat2 __packed_splat8 int16x4_t
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 uint16x4_t
#define __packed_splat8(ty, x)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 int32_t
#define __packed_scalar_binary_op(name, ty, scalar_ty, op, splat)
#define __packed_shift16(name, ty, op)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 uint8_t
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 __packed_splat4 uint16_t
#define __packed_binary_builtin(name, ty, builtin)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 __packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 uint32_t
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 uint16x2_t
#define __packed_unary_op(name, ty, op)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 uint8x4_t
__packed_splat4 int16x2_t
#define __packed_sh1add(name, ty)
#define __packed_sh1sadd(name, ty)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 uint32x2_t
#define __packed_shift32(name, ty, op)
#define __packed_splat(name, ty, scalar_ty, splat)
__packed_splat4 __packed_splat2 int8x8_t
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 int32x2_t
#define __packed_splat4(ty, x)