10#define __NVPTXINTRIN_H
13#error "This file is intended for NVPTX targets or offloading to NVPTX"
18#if !defined(__cplusplus)
23_Pragma(
"omp begin declare target device_type(nohost)");
24_Pragma(
"omp begin declare variant match(device = {arch(nvptx64)})");
27#define __gpu_private __attribute__((address_space(5)))
28#define __gpu_constant __attribute__((address_space(4)))
29#define __gpu_local __attribute__((address_space(3)))
30#define __gpu_global __attribute__((address_space(1)))
31#define __gpu_generic __attribute__((address_space(0)))
34#define __gpu_kernel __attribute__((nvptx_kernel, visibility("protected")))
38 return __nvvm_read_ptx_sreg_nctaid_x();
43 return __nvvm_read_ptx_sreg_nctaid_y();
48 return __nvvm_read_ptx_sreg_nctaid_z();
53 return __nvvm_read_ptx_sreg_ctaid_x();
58 return __nvvm_read_ptx_sreg_ctaid_y();
63 return __nvvm_read_ptx_sreg_ctaid_z();
68 return __nvvm_read_ptx_sreg_ntid_x();
73 return __nvvm_read_ptx_sreg_ntid_y();
78 return __nvvm_read_ptx_sreg_ntid_z();
83 return __nvvm_read_ptx_sreg_tid_x();
88 return __nvvm_read_ptx_sreg_tid_y();
93 return __nvvm_read_ptx_sreg_tid_z();
98 return __nvvm_read_ptx_sreg_warpsize();
103 return __nvvm_read_ptx_sreg_laneid();
108 return __nvvm_activemask();
114 uint32_t __mask = (uint32_t)__lane_mask;
115 uint32_t __id = __builtin_ffs(__mask) - 1;
116 return __nvvm_shfl_sync_idx_i32(__mask, __x, __id,
__gpu_num_lanes() - 1);
122 uint32_t __hi = (uint32_t)(__x >> 32ull);
123 uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF);
124 uint32_t __mask = (uint32_t)__lane_mask;
125 uint32_t __id = __builtin_ffs(__mask) - 1;
126 return ((uint64_t)__nvvm_shfl_sync_idx_i32(__mask, __hi, __id,
129 ((uint64_t)__nvvm_shfl_sync_idx_i32(__mask, __lo, __id,
136 uint32_t __mask = (uint32_t)__lane_mask;
137 return __nvvm_vote_ballot_sync(__mask, __x);
147 __nvvm_bar_warp_sync((uint32_t)__lane_mask);
153 uint32_t __mask = (uint32_t)__lane_mask;
154 uint32_t __bitmask = (__mask >> __idx) & 1u;
162 uint32_t __hi = (uint32_t)(__x >> 32ull);
163 uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF);
164 uint32_t __mask = (uint32_t)__lane_mask;
165 uint64_t __bitmask = (__mask >> __idx) & 1u;
166 return -__bitmask & ((uint64_t)__nvvm_shfl_sync_idx_i32(
169 ((uint64_t)__nvvm_shfl_sync_idx_i32(__mask, __lo, __idx,
175 return __nvvm_isspacep_shared(ptr);
180 return __nvvm_isspacep_local(ptr);
190 if (__nvvm_reflect(
"__CUDA_ARCH") >= 700)
191 asm(
"nanosleep.u32 64;" :::
"memory");
197#if !defined(__cplusplus)
#define _DEFAULT_FN_ATTRS
static _DEFAULT_FN_ATTRS __inline__ void __gpu_thread_suspend(void)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_thread_id_x(void)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_lane_id(void)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_threads_z(void)
static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_lane_mask(void)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_read_first_lane_u32(uint64_t __lane_mask, uint32_t __x)
static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_block_id_y(void)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_thread_id_z(void)
static _DEFAULT_FN_ATTRS __inline__ void __gpu_sync_lane(uint64_t __lane_mask)
static _DEFAULT_FN_ATTRS __inline__ bool __gpu_is_ptr_private(void *ptr)
static _DEFAULT_FN_ATTRS __inline__ bool __gpu_is_ptr_local(void *ptr)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_blocks_x(void)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_block_id_z(void)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_threads_y(void)
static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_threads_x(void)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_lanes(void)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_thread_id_y(void)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_blocks_z(void)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_block_id_x(void)
static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_ballot(uint64_t __lane_mask, bool __x)
static _DEFAULT_FN_ATTRS __inline__ void __gpu_exit(void)
static _DEFAULT_FN_ATTRS __inline__ void __gpu_sync_threads(void)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_blocks_y(void)
_Pragma("push_macro(\"bool\")")