clang 20.0.0git
Macros | Functions
gpuintrin.h File Reference
#include <stdint.h>

Go to the source code of this file.

Macros

#define _DEFAULT_FN_ATTRS
 
#define bool   _Bool
 
#define __GPU_X_DIM   0
 
#define __GPU_Y_DIM   1
 
#define __GPU_Z_DIM   2
 
#define __DO_LANE_SUM(__type, __suffix)
 
#define __DO_LANE_SCAN(__type, __bitmask_type, __suffix)
 

Functions

 _Pragma ("push_macro(\"bool\")")
 
 _Pragma ("omp begin declare target device_type(nohost)")
 
 _Pragma ("omp begin declare variant match(device = {kind(gpu)})")
 
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_blocks (int __dim)
 
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_block_id (int __dim)
 
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_threads (int __dim)
 
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_thread_id (int __dim)
 
static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_first_lane_id (uint64_t __lane_mask)
 
static _DEFAULT_FN_ATTRS __inline__ bool __gpu_is_first_in_lane (uint64_t __lane_mask)
 
static _DEFAULT_FN_ATTRS __inline__ float __gpu_read_first_lane_f32 (uint64_t __lane_mask, float __x)
 
static _DEFAULT_FN_ATTRS __inline__ double __gpu_read_first_lane_f64 (uint64_t __lane_mask, double __x)
 
static _DEFAULT_FN_ATTRS __inline__ float __gpu_shuffle_idx_f32 (uint64_t __lane_mask, uint32_t __idx, float __x)
 
static _DEFAULT_FN_ATTRS __inline__ double __gpu_shuffle_idx_f64 (uint64_t __lane_mask, uint32_t __idx, double __x)
 
 __DO_LANE_SUM (uint32_t, u32)
 
 __DO_LANE_SUM (uint64_t, u64)
 
 __DO_LANE_SUM (float, f32)
 
 __DO_LANE_SUM (double, f64)
 
 __DO_LANE_SCAN (uint32_t, uint32_t, u32)
 
 __DO_LANE_SCAN (uint64_t, uint64_t, u64)
 
 __DO_LANE_SCAN (float, uint32_t, f32)
 
 __DO_LANE_SCAN (double, uint64_t, f64)
 
 _Pragma ("omp end declare variant")
 
 _Pragma ("omp end declare target")
 
 _Pragma ("pop_macro(\"bool\")")
 

Macro Definition Documentation

◆ __DO_LANE_SCAN

#define __DO_LANE_SCAN (   __type,
  __bitmask_type,
  __suffix 
)
Value:
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_lane_scan_##__suffix( \
uint64_t __lane_mask, uint32_t __x) { \
for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) { \
uint32_t __index = __gpu_lane_id() - __step; \
__bitmask_type bitmask = __gpu_lane_id() >= __step; \
__x += __builtin_bit_cast( \
__type, \
-bitmask & __builtin_bit_cast(__bitmask_type, \
__gpu_shuffle_idx_##__suffix( \
__lane_mask, __index, __x))); \
} \
return __x; \
}
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_lane_id(void)
Definition: amdgpuintrin.h:103
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_lanes(void)
Definition: amdgpuintrin.h:98
#define _DEFAULT_FN_ATTRS
Definition: gpuintrin.h:24

Definition at line 167 of file gpuintrin.h.

◆ __DO_LANE_SUM

#define __DO_LANE_SUM (   __type,
  __suffix 
)
Value:
_DEFAULT_FN_ATTRS static __inline__ __type __gpu_lane_sum_##__suffix( \
uint64_t __lane_mask, __type __x) { \
for (uint32_t __step = __gpu_num_lanes() / 2; __step > 0; __step /= 2) { \
uint32_t __index = __step + __gpu_lane_id(); \
__x += __gpu_shuffle_idx_##__suffix(__lane_mask, __index, __x); \
} \
return __gpu_read_first_lane_##__suffix(__lane_mask, __x); \
}

Definition at line 151 of file gpuintrin.h.

◆ __GPU_X_DIM

#define __GPU_X_DIM   0

Definition at line 46 of file gpuintrin.h.

◆ __GPU_Y_DIM

#define __GPU_Y_DIM   1

Definition at line 47 of file gpuintrin.h.

◆ __GPU_Z_DIM

#define __GPU_Z_DIM   2

Definition at line 48 of file gpuintrin.h.

◆ _DEFAULT_FN_ATTRS

#define _DEFAULT_FN_ATTRS

Definition at line 24 of file gpuintrin.h.

◆ bool

#define bool   _Bool

Definition at line 40 of file gpuintrin.h.

Function Documentation

◆ __DO_LANE_SCAN() [1/4]

__DO_LANE_SCAN ( double  ,
uint64_t  ,
f64   
)

◆ __DO_LANE_SCAN() [2/4]

__DO_LANE_SCAN ( float  ,
uint32_t  ,
f32   
)

◆ __DO_LANE_SCAN() [3/4]

__DO_LANE_SCAN ( uint32_t  ,
uint32_t  ,
u32   
)

◆ __DO_LANE_SCAN() [4/4]

__DO_LANE_SCAN ( uint64_t  ,
uint64_t  ,
u64   
)

◆ __DO_LANE_SUM() [1/4]

__DO_LANE_SUM ( double  ,
f64   
)

◆ __DO_LANE_SUM() [2/4]

__DO_LANE_SUM ( float  ,
f32   
)

◆ __DO_LANE_SUM() [3/4]

__DO_LANE_SUM ( uint32_t  ,
u32   
)

◆ __DO_LANE_SUM() [4/4]

__DO_LANE_SUM ( uint64_t  ,
u64   
)

◆ __gpu_block_id()

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_block_id ( int  __dim)
static

Definition at line 65 of file gpuintrin.h.

References __gpu_block_id_x(), __gpu_block_id_y(), and __gpu_block_id_z().

◆ __gpu_first_lane_id()

static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_first_lane_id ( uint64_t  __lane_mask)
static

Definition at line 108 of file gpuintrin.h.

Referenced by __gpu_is_first_in_lane().

◆ __gpu_is_first_in_lane()

static _DEFAULT_FN_ATTRS __inline__ bool __gpu_is_first_in_lane ( uint64_t  __lane_mask)
static

Definition at line 114 of file gpuintrin.h.

References __gpu_first_lane_id(), and __gpu_lane_id().

◆ __gpu_num_blocks()

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_blocks ( int  __dim)
static

Definition at line 51 of file gpuintrin.h.

References __gpu_num_blocks_x(), __gpu_num_blocks_y(), and __gpu_num_blocks_z().

◆ __gpu_num_threads()

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_threads ( int  __dim)
static

Definition at line 79 of file gpuintrin.h.

References __gpu_num_threads_x(), __gpu_num_threads_y(), and __gpu_num_threads_z().

◆ __gpu_read_first_lane_f32()

static _DEFAULT_FN_ATTRS __inline__ float __gpu_read_first_lane_f32 ( uint64_t  __lane_mask,
float  __x 
)
static

Definition at line 120 of file gpuintrin.h.

References __gpu_read_first_lane_u32().

◆ __gpu_read_first_lane_f64()

static _DEFAULT_FN_ATTRS __inline__ double __gpu_read_first_lane_f64 ( uint64_t  __lane_mask,
double  __x 
)
static

Definition at line 128 of file gpuintrin.h.

References __gpu_read_first_lane_u64().

◆ __gpu_shuffle_idx_f32()

static _DEFAULT_FN_ATTRS __inline__ float __gpu_shuffle_idx_f32 ( uint64_t  __lane_mask,
uint32_t  __idx,
float  __x 
)
static

Definition at line 136 of file gpuintrin.h.

References __gpu_shuffle_idx_u32().

◆ __gpu_shuffle_idx_f64()

static _DEFAULT_FN_ATTRS __inline__ double __gpu_shuffle_idx_f64 ( uint64_t  __lane_mask,
uint32_t  __idx,
double  __x 
)
static

Definition at line 144 of file gpuintrin.h.

References __gpu_shuffle_idx_u64().

◆ __gpu_thread_id()

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_thread_id ( int  __dim)
static

Definition at line 93 of file gpuintrin.h.

References __gpu_thread_id_x(), __gpu_thread_id_y(), and __gpu_thread_id_z().

◆ _Pragma() [1/6]

_Pragma ( "omp begin declare target device_type(nohost)"  )

◆ _Pragma() [2/6]

_Pragma ( "omp begin declare variant match(device = {kind(gpu)})"  )

◆ _Pragma() [3/6]

_Pragma ( "omp end declare target"  )

◆ _Pragma() [4/6]

_Pragma ( "omp end declare variant"  )

◆ _Pragma() [5/6]

_Pragma ( "pop_macro(\"bool\")"  )

◆ _Pragma() [6/6]

_Pragma ( "push_macro(\"bool\")"  )