clang 23.0.0git
gpuintrin.h File Reference
#include <stdint.h>

Go to the source code of this file.

Macros

#define _DEFAULT_FN_ATTRS
#define bool   _Bool
#define __gpu_kernel   __attribute__((device_kernel, visibility("protected")))
#define __GPU_X_DIM   0
#define __GPU_Y_DIM   1
#define __GPU_Z_DIM   2
#define __DO_LANE_OPS(__type, __op, __identity, __prefix, __suffix)
#define __GPU_OP(__x, __y)
#define __GPU_OP(__x, __y)
#define __GPU_OP(__x, __y)
#define __GPU_OP(__x, __y)
#define __GPU_OP(__x, __y)
#define __GPU_OP(__x, __y)
#define __GPU_OP(__x, __y)
#define __GPU_OP(__x, __y)

Functions

 _Pragma ("push_macro(\"bool\")")
 _Pragma ("omp begin declare target device_type(nohost)")
 _Pragma ("omp begin declare variant match(device = {kind(gpu)})")
static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_match_any_u32_impl (uint64_t __lane_mask, uint32_t __x)
static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_match_any_u64_impl (uint64_t __lane_mask, uint64_t __x)
static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_match_all_u32_impl (uint64_t __lane_mask, uint32_t __x)
static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_match_all_u64_impl (uint64_t __lane_mask, uint64_t __x)
 _Pragma ("omp end declare variant")
 _Pragma ("omp end declare target")
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_blocks (int __dim)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_block_id (int __dim)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_threads (int __dim)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_thread_id (int __dim)
static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_first_lane_id (uint64_t __lane_mask)
static _DEFAULT_FN_ATTRS __inline__ bool __gpu_is_first_in_lane (uint64_t __lane_mask)
static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_read_first_lane_u64 (uint64_t __lane_mask, uint64_t __x)
static _DEFAULT_FN_ATTRS __inline__ float __gpu_read_first_lane_f32 (uint64_t __lane_mask, float __x)
static _DEFAULT_FN_ATTRS __inline__ double __gpu_read_first_lane_f64 (uint64_t __lane_mask, double __x)
static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_shuffle_idx_u64 (uint64_t __lane_mask, uint32_t __idx, uint64_t __x, uint32_t __width)
static _DEFAULT_FN_ATTRS __inline__ float __gpu_shuffle_idx_f32 (uint64_t __lane_mask, uint32_t __idx, float __x, uint32_t __width)
static _DEFAULT_FN_ATTRS __inline__ double __gpu_shuffle_idx_f64 (uint64_t __lane_mask, uint32_t __idx, double __x, uint32_t __width)
 __DO_LANE_OPS (uint32_t, __GPU_OP, 0, sum, u32)
 __DO_LANE_OPS (uint64_t, __GPU_OP, 0, sum, u64)
 __DO_LANE_OPS (float, __GPU_OP, 0, sum, f32)
 __DO_LANE_OPS (double, __GPU_OP, 0, sum, f64)
 __DO_LANE_OPS (uint32_t, __GPU_OP, UINT32_MAX, and, u32)
 __DO_LANE_OPS (uint64_t, __GPU_OP, UINT64_MAX, and, u64)
 __DO_LANE_OPS (uint32_t, __GPU_OP, 0, or, u32)
 __DO_LANE_OPS (uint64_t, __GPU_OP, 0, or, u64)
 __DO_LANE_OPS (uint32_t, __GPU_OP, 0, xor, u32)
 __DO_LANE_OPS (uint64_t, __GPU_OP, 0, xor, u64)
 __DO_LANE_OPS (uint32_t, __GPU_OP, UINT32_MAX, min, u32)
 __DO_LANE_OPS (uint64_t, __GPU_OP, UINT64_MAX, min, u64)
 __DO_LANE_OPS (uint32_t, __GPU_OP, 0, max, u32)
 __DO_LANE_OPS (uint64_t, __GPU_OP, 0, max, u64)
 __DO_LANE_OPS (float, __GPU_OP, __builtin_inff(), minnum, f32)
 __DO_LANE_OPS (double, __GPU_OP, __builtin_inf(), minnum, f64)
 __DO_LANE_OPS (float, __GPU_OP, -__builtin_inff(), maxnum, f32)
 __DO_LANE_OPS (double, __GPU_OP, -__builtin_inf(), maxnum, f64)
 _Pragma ("pop_macro(\"bool\")")

Macro Definition Documentation

◆ __DO_LANE_OPS

#define __DO_LANE_OPS ( __type,
__op,
__identity,
__prefix,
__suffix )

Definition at line 210 of file gpuintrin.h.

◆ __gpu_kernel

#define __gpu_kernel   __attribute__((device_kernel, visibility("protected")))

Definition at line 73 of file gpuintrin.h.

◆ __GPU_OP [1/8]

◆ __GPU_OP [2/8]

#define __GPU_OP ( __x,
__y )
Value:
((__x) & (__y))

Definition at line 249 of file gpuintrin.h.

◆ __GPU_OP [3/8]

#define __GPU_OP ( __x,
__y )
Value:
((__x) | (__y))

Definition at line 249 of file gpuintrin.h.

◆ __GPU_OP [4/8]

#define __GPU_OP ( __x,
__y )
Value:
((__x) ^ (__y))

Definition at line 249 of file gpuintrin.h.

◆ __GPU_OP [5/8]

#define __GPU_OP ( __x,
__y )
Value:
((__x) < (__y) ? (__x) : (__y))

Definition at line 249 of file gpuintrin.h.

◆ __GPU_OP [6/8]

#define __GPU_OP ( __x,
__y )
Value:
((__x) > (__y) ? (__x) : (__y))

Definition at line 249 of file gpuintrin.h.

◆ __GPU_OP [7/8]

#define __GPU_OP ( __x,
__y )
Value:
__builtin_elementwise_minnum((__x), (__y))

Definition at line 249 of file gpuintrin.h.

◆ __GPU_OP [8/8]

#define __GPU_OP ( __x,
__y )
Value:
__builtin_elementwise_maxnum((__x), (__y))

Definition at line 249 of file gpuintrin.h.

◆ __GPU_X_DIM

#define __GPU_X_DIM   0

Definition at line 75 of file gpuintrin.h.

◆ __GPU_Y_DIM

#define __GPU_Y_DIM   1

Definition at line 76 of file gpuintrin.h.

◆ __GPU_Z_DIM

#define __GPU_Z_DIM   2

Definition at line 77 of file gpuintrin.h.

◆ _DEFAULT_FN_ATTRS

#define _DEFAULT_FN_ATTRS

Definition at line 24 of file gpuintrin.h.

◆ bool

Function Documentation

◆ __DO_LANE_OPS() [1/18]

__DO_LANE_OPS ( double ,
__GPU_OP ,
- __builtin_inf(),
maxnum ,
f64  )

References __GPU_OP, and _DEFAULT_FN_ATTRS.

◆ __DO_LANE_OPS() [2/18]

__DO_LANE_OPS ( double ,
__GPU_OP ,
0 ,
sum ,
f64  )

References __GPU_OP.

◆ __DO_LANE_OPS() [3/18]

__DO_LANE_OPS ( double ,
__GPU_OP ,
__builtin_inf() ,
minnum ,
f64  )

References __GPU_OP.

◆ __DO_LANE_OPS() [4/18]

__DO_LANE_OPS ( float ,
__GPU_OP ,
- __builtin_inff(),
maxnum ,
f32  )

References __GPU_OP.

◆ __DO_LANE_OPS() [5/18]

__DO_LANE_OPS ( float ,
__GPU_OP ,
0 ,
sum ,
f32  )

References __GPU_OP.

◆ __DO_LANE_OPS() [6/18]

__DO_LANE_OPS ( float ,
__GPU_OP ,
__builtin_inff() ,
minnum ,
f32  )

References __GPU_OP.

◆ __DO_LANE_OPS() [7/18]

__DO_LANE_OPS ( uint32_t ,
__GPU_OP ,
0 ,
max ,
u32  )

References __GPU_OP, and max().

◆ __DO_LANE_OPS() [8/18]

__DO_LANE_OPS ( uint32_t ,
__GPU_OP ,
0 ,
or ,
u32  )

References __GPU_OP, and or.

◆ __DO_LANE_OPS() [9/18]

__DO_LANE_OPS ( uint32_t ,
__GPU_OP ,
0 ,
sum ,
u32  )

References __GPU_OP.

◆ __DO_LANE_OPS() [10/18]

__DO_LANE_OPS ( uint32_t ,
__GPU_OP ,
0 ,
xor ,
u32  )

References __GPU_OP, and xor.

◆ __DO_LANE_OPS() [11/18]

__DO_LANE_OPS ( uint32_t ,
__GPU_OP ,
UINT32_MAX ,
and ,
u32  )

References __GPU_OP, and and.

◆ __DO_LANE_OPS() [12/18]

__DO_LANE_OPS ( uint32_t ,
__GPU_OP ,
UINT32_MAX ,
min ,
u32  )

References __GPU_OP, and min().

◆ __DO_LANE_OPS() [13/18]

__DO_LANE_OPS ( uint64_t ,
__GPU_OP ,
0 ,
max ,
u64  )

References __GPU_OP, and max().

◆ __DO_LANE_OPS() [14/18]

__DO_LANE_OPS ( uint64_t ,
__GPU_OP ,
0 ,
or ,
u64  )

References __GPU_OP, and or.

◆ __DO_LANE_OPS() [15/18]

__DO_LANE_OPS ( uint64_t ,
__GPU_OP ,
0 ,
sum ,
u64  )

References __GPU_OP.

◆ __DO_LANE_OPS() [16/18]

__DO_LANE_OPS ( uint64_t ,
__GPU_OP ,
0 ,
xor ,
u64  )

References __GPU_OP, and xor.

◆ __DO_LANE_OPS() [17/18]

__DO_LANE_OPS ( uint64_t ,
__GPU_OP ,
UINT64_MAX ,
and ,
u64  )

References __GPU_OP, and and.

◆ __DO_LANE_OPS() [18/18]

__DO_LANE_OPS ( uint64_t ,
__GPU_OP ,
UINT64_MAX ,
min ,
u64  )

References __GPU_OP, and min().

◆ __gpu_block_id()

_DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_block_id ( int __dim)
static

◆ __gpu_first_lane_id()

_DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_first_lane_id ( uint64_t __lane_mask)
static

Definition at line 137 of file gpuintrin.h.

Referenced by __gpu_is_first_in_lane().

◆ __gpu_is_first_in_lane()

_DEFAULT_FN_ATTRS __inline__ bool __gpu_is_first_in_lane ( uint64_t __lane_mask)
static

Definition at line 143 of file gpuintrin.h.

References __gpu_first_lane_id(), and __gpu_lane_id().

◆ __gpu_match_all_u32_impl()

_DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_match_all_u32_impl ( uint64_t __lane_mask,
uint32_t __x )
static

◆ __gpu_match_all_u64_impl()

_DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_match_all_u64_impl ( uint64_t __lane_mask,
uint64_t __x )
static

◆ __gpu_match_any_u32_impl()

_DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_match_any_u32_impl ( uint64_t __lane_mask,
uint32_t __x )
static

◆ __gpu_match_any_u64_impl()

_DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_match_any_u64_impl ( uint64_t __lane_mask,
uint64_t __x )
static

◆ __gpu_num_blocks()

_DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_blocks ( int __dim)
static

◆ __gpu_num_threads()

_DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_threads ( int __dim)
static

◆ __gpu_read_first_lane_f32()

_DEFAULT_FN_ATTRS __inline__ float __gpu_read_first_lane_f32 ( uint64_t __lane_mask,
float __x )
static

Definition at line 159 of file gpuintrin.h.

References __gpu_read_first_lane_u32().

◆ __gpu_read_first_lane_f64()

_DEFAULT_FN_ATTRS __inline__ double __gpu_read_first_lane_f64 ( uint64_t __lane_mask,
double __x )
static

Definition at line 167 of file gpuintrin.h.

References __gpu_read_first_lane_u64().

◆ __gpu_read_first_lane_u64()

_DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_read_first_lane_u64 ( uint64_t __lane_mask,
uint64_t __x )
static

Definition at line 149 of file gpuintrin.h.

References __gpu_read_first_lane_u32().

Referenced by __gpu_read_first_lane_f64().

◆ __gpu_shuffle_idx_f32()

_DEFAULT_FN_ATTRS __inline__ float __gpu_shuffle_idx_f32 ( uint64_t __lane_mask,
uint32_t __idx,
float __x,
uint32_t __width )
static

Definition at line 187 of file gpuintrin.h.

References __gpu_shuffle_idx_u32().

◆ __gpu_shuffle_idx_f64()

_DEFAULT_FN_ATTRS __inline__ double __gpu_shuffle_idx_f64 ( uint64_t __lane_mask,
uint32_t __idx,
double __x,
uint32_t __width )
static

Definition at line 196 of file gpuintrin.h.

References __gpu_shuffle_idx_u64().

◆ __gpu_shuffle_idx_u64()

_DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_shuffle_idx_u64 ( uint64_t __lane_mask,
uint32_t __idx,
uint64_t __x,
uint32_t __width )
static

◆ __gpu_thread_id()

_DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_thread_id ( int __dim)
static

◆ _Pragma() [1/6]

_Pragma ( "omp begin declare target device_type(nohost)" )

◆ _Pragma() [2/6]

_Pragma ( "omp begin declare variant match(device = {kind(gpu)})" )

References _DEFAULT_FN_ATTRS.

◆ _Pragma() [3/6]

_Pragma ( "omp end declare target" )

◆ _Pragma() [4/6]

_Pragma ( "omp end declare variant" )

◆ _Pragma() [5/6]

_Pragma ( "pop_macro(\"bool\")" )

◆ _Pragma() [6/6]

_Pragma ( "push_macro(\"bool\")" )