16 uint32_t block_count[3];
17 uint16_t group_size[3];
20 uint64_t global_offset[3];
30 uint32_t private_base;
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
amdhsa_implicit_kernarg_v5
#define remainder(__x, __y)