14#ifndef __CLANG_CUDA_TEXTURE_INTRINSICS_H__
15#define __CLANG_CUDA_TEXTURE_INTRINSICS_H__
17#error "This file is for CUDA compilation only."
21#define __nv_tex_surf_handler(__op, __ptr, ...) \
22 ::__cuda_tex::__tex_fetch< \
23 ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash(__op)>>(__ptr, \
26#pragma push_macro("__ASM_OUT")
27#pragma push_macro("__ASM_OUTP")
28#pragma push_macro("__Args")
29#pragma push_macro("__ID")
30#pragma push_macro("__IDV")
31#pragma push_macro("__IMPL_2DGATHER")
32#pragma push_macro("__IMPL_ALIAS")
33#pragma push_macro("__IMPL_ALIASI")
34#pragma push_macro("__IMPL_F1")
35#pragma push_macro("__IMPL_F3")
36#pragma push_macro("__IMPL_F3N")
37#pragma push_macro("__IMPL_F3S")
38#pragma push_macro("__IMPL_S")
39#pragma push_macro("__IMPL_S3")
40#pragma push_macro("__IMPL_S3I")
41#pragma push_macro("__IMPL_S3N")
42#pragma push_macro("__IMPL_S3NI")
43#pragma push_macro("__IMPL_S3S")
44#pragma push_macro("__IMPL_S3SI")
45#pragma push_macro("__IMPL_SI")
46#pragma push_macro("__L")
47#pragma push_macro("__STRIP_PARENS")
72constexpr int __tex_len(
const char *
s) {
73 return (
s[0] == 0) ? 0
108constexpr int __tex_hash_map(
int c) {
109 return (
c == 49) ? 10
133constexpr int __tex_op_hash(
const char *str) {
134 return __tex_len(str) + __tex_hash_map(str[7] + 1) + __tex_hash_map(str[6]) +
135 __tex_hash_map(str[5]) + __tex_hash_map(str[__tex_len(str) - 1]);
139template <
int N>
struct __Tag;
140#define __ID(__op) __Tag<__tex_op_hash(__op)>
143#define __IDV(__op, __variant) \
144 __Tag<10000 + __tex_op_hash(__op) * 100 + __variant>
148template <
class>
struct __TypeInfoT;
150template <>
struct __TypeInfoT<
float> {
151 using __base_t =
float;
152 using __fetch_t = float4;
154template <>
struct __TypeInfoT<char> {
155 using __base_t = char;
156 using __fetch_t =
int4;
158template <>
struct __TypeInfoT<signed char> {
159 using __base_t =
signed char;
160 using __fetch_t =
int4;
162template <>
struct __TypeInfoT<
unsigned char> {
163 using __base_t =
unsigned char;
164 using __fetch_t =
uint4;
166template <>
struct __TypeInfoT<short> {
167 using __base_t = short;
168 using __fetch_t =
int4;
170template <>
struct __TypeInfoT<
unsigned short> {
171 using __base_t =
unsigned short;
172 using __fetch_t =
uint4;
174template <>
struct __TypeInfoT<
int> {
175 using __base_t =
int;
176 using __fetch_t =
int4;
179 using __base_t =
unsigned int;
180 using __fetch_t =
uint4;
184template <
class __T>
struct __TypeInfoT {
185 using __base_t =
decltype(__T::x);
186 using __fetch_t =
typename __TypeInfoT<__base_t>::__fetch_t;
190template <
class __op>
struct __tex_fetch_v4;
193#define __Args(...) __VA_ARGS__
194#define __STRIP_PARENS(__X) __X
195#define __L(__X) __STRIP_PARENS(__Args __X)
201#define __ASM_OUT(__t) \
202 ("=" __t(__r.x), "=" __t(__r.y), "=" __t(__r.z), "=" __t(__r.w))
204#define __ASM_OUTP(__t) (__L(__ASM_OUT(__t)), "=h"(*__ir))
207#define __IMPL_F1(__rt, __dt, __args, __asm_op, __asm_outs, __asm_args) \
209 __device__ __rt __run<__dt>(cudaTextureObject_t __obj, __L(__args)) { \
211 asm(__asm_op : __L(__asm_outs) : "l"(__obj), __L(__asm_args)); \
216#define __IMPL_F3(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
217 __IMPL_F1(int4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \
218 __ASM_OUT("r"), __asm_args) \
219 __IMPL_F1(uint4, uint4, __args, __asm_op ".u32." __ctype "\t" __asm_op_args, \
220 __ASM_OUT("r"), __asm_args) \
221 __IMPL_F1(float4, float4, __args, \
222 __asm_op ".f32." __ctype "\t" __asm_op_args, __ASM_OUT("f"), \
227#define __IMPL_F3S(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
228 __IMPL_F1(int4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \
229 __ASM_OUTP("r"), __asm_args) \
230 __IMPL_F1(uint4, uint4, __args, __asm_op ".u32." __ctype "\t" __asm_op_args, \
231 __ASM_OUTP("r"), __asm_args) \
232 __IMPL_F1(float4, float4, __args, \
233 __asm_op ".f32." __ctype "\t" __asm_op_args, __ASM_OUTP("f"), \
238#define __IMPL_F3N(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
239 __IMPL_F1(float4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \
240 __ASM_OUT("r"), __asm_args) \
241 __IMPL_F1(float4, uint4, __args, \
242 __asm_op ".u32." __ctype "\t" __asm_op_args, __ASM_OUT("r"), \
246#define __IMPL_S3I(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \
247 template <> struct __tex_fetch_v4<__op> { \
249 __device__ static T __run(cudaTextureObject_t __obj, __L(__args)); \
250 __IMPL_F3(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
254#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600)
255#define __IMPL_S3SI(__op, __args, __asm_op, __ctype, __asm_op_args, \
257 template <> struct __tex_fetch_v4<__op> { \
259 __device__ static T __run(cudaTextureObject_t __obj, __L(__args)); \
260 __IMPL_F3S(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
263#define __IMPL_S3SI(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args)
267#define __IMPL_S3NI(__op, __args, __asm_op, __ctype, __asm_op_args, \
269 template <> struct __tex_fetch_v4<__op> { \
271 __device__ static float4 __run(cudaTextureObject_t __obj, __L(__args)); \
272 __IMPL_F3N(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
277#define __IMPL_SI(__op, __opn, __args, __asm_op, __ctype, __asm_op_args, \
279 __IMPL_S3I(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args); \
280 __IMPL_S3NI(__opn, __args, __asm_op, __ctype, __asm_op_args, __asm_args)
283#define __IMPL_S3(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \
284 __IMPL_S3I(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args)
285#define __IMPL_S3S(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \
286 __IMPL_S3SI(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args)
287#define __IMPL_S3N(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \
288 __IMPL_S3NI(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args)
289#define __IMPL_S(__op, __opn, __args, __asm_op, __ctype, __asm_op_args, \
291 __IMPL_SI(__ID(__op), __ID(__opn), __args, __asm_op, __ctype, __asm_op_args, \
296#define __IMPL_ALIASI(__op, __opn) \
297 template <> struct __tex_fetch_v4<__op> : __tex_fetch_v4<__opn> {}
298#define __IMPL_ALIAS(__op, __opn) __IMPL_ALIASI(__ID(__op), __ID(__opn))
302__IMPL_S(
"__tex1D_v2",
"__tex1D_rmnf_v2", (
float __x),
"tex.1d.v4",
"f32",
303 "{%0, %1, %2, %3}, [%4, {%5}];", (
"f"(__x)));
304__IMPL_S(
"__tex1Dfetch_v2",
"__tex1Dfetch_rmnf_v2", (
int __x),
"tex.1d.v4",
305 "s32",
"{%0, %1, %2, %3}, [%4, {%5}];", (
"r"(__x)));
309__IMPL_S(
"__tex1DGrad_v2",
"__tex1DGrad_rmnf_v2",
310 (
float __x,
float __dPdx,
float __dPdy),
"tex.grad.1d.v4",
"f32",
311 "{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};",
312 (
"f"(__x),
"f"(__dPdx),
"f"(__dPdy)));
315__IMPL_S(
"__tex1DLayered_v2",
"__tex1DLayered_rmnf_v2",
316 (
float __x,
int __layer),
"tex.a1d.v4",
"f32",
317 "{%0, %1, %2, %3}, [%4, {%5, %6}];", (
"r"(__layer),
"f"(__x)));
320__IMPL_S(
"__tex1DLayeredGrad_v2",
"__tex1DLayeredGrad_rmnf_v2",
321 (
float __x,
int __layer,
float __dPdx,
float __dPdy),
322 "tex.grad.a1d.v4",
"f32",
323 "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};",
324 (
"r"(__layer),
"f"(__x),
"f"(__dPdx),
"f"(__dPdy)));
325__IMPL_ALIAS(
"__itex1DLayeredGrad",
"__tex1DLayeredGrad_v2");
327__IMPL_S(
"__tex1DLayeredLod_v2",
"__tex1DLayeredLod_rmnf_v2",
328 (
float __x,
int __layer,
float __level),
"tex.level.a1d.v4",
"f32",
329 "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;",
330 (
"r"(__layer),
"f"(__x),
"f"(__level)));
331__IMPL_ALIAS(
"__itex1DLayeredLod",
"__tex1DLayeredLod_v2");
333__IMPL_S(
"__tex1DLod_v2",
"__tex1DLod_rmnf_v2", (
float __x,
float __level),
334 "tex.level.1d.v4",
"f32",
"{%0, %1, %2, %3}, [%4, {%5}], %6;",
335 (
"f"(__x),
"f"(__level)));
339__IMPL_S(
"__tex2D_v2",
"__tex2D_rmnf_v2", (
float __x,
float __y),
"tex.2d.v4",
340 "f32",
"{%0, %1, %2, %3}, [%4, {%5, %6}];", (
"f"(__x),
"f"(
__y)));
343__IMPL_S3S(
"__itex2D_sparse", (
float __x,
float __y,
unsigned char *__ir),
344 "{.reg .pred %%p0;\n\t"
347 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
348 " selp.u16 %4, 1, 0, %%p0; }",
349 (
"f"(__x),
"f"(
__y)));
351__IMPL_S(
"__tex2DGrad_v2",
"__tex2DGrad_rmnf_v2",
352 (
float __x,
float __y,
const float2 *__dPdx,
const float2 *__dPdy),
353 "tex.grad.2d.v4",
"f32",
354 "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};",
355 (
"f"(__x),
"f"(
__y),
"f"(__dPdx->x),
"f"(__dPdx->y),
"f"(__dPdy->x),
360 (
float __x,
float __y,
const float2 *__dPdx,
const float2 *__dPdy,
361 unsigned char *__ir),
362 "{.reg .pred %%p0;\n\t"
365 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], {%8, %9}, {%10, %11};\n\t"
366 "selp.u16 %4, 1, 0, %%p0; }",
367 (
"f"(__x),
"f"(
__y),
"f"(__dPdx->x),
"f"(__dPdx->y),
"f"(__dPdy->x),
370__IMPL_S(
"__tex2DLayered_v2",
"__tex2DLayered_rmnf_v2",
371 (
float __x,
float __y,
int __layer),
"tex.a2d.v4",
"f32",
372 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];",
373 (
"r"(__layer),
"f"(__x),
"f"(
__y)));
377 (
float __x,
float __y,
int __layer,
unsigned char *__ir),
378 "{.reg .pred %%p0;\n\t"
381 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
382 "selp.u16 %4, 1, 0, %%p0; }",
383 (
"r"(__layer),
"f"(__x),
"f"(
__y)));
385__IMPL_S(
"__tex2DLayeredGrad_v2",
"__tex2DLayeredGrad_rmnf_v2",
386 (
float __x,
float __y,
int __layer,
const float2 *__dPdx,
387 const float2 *__dPdy),
388 "tex.grad.a2d.v4",
"f32",
389 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};",
390 (
"r"(__layer),
"f"(__x),
"f"(
__y),
"f"(__dPdx->x),
"f"(__dPdx->y),
391 "f"(__dPdy->x),
"f"(__dPdy->y)));
392__IMPL_ALIAS(
"__itex2DLayeredGrad_v2",
"__tex2DLayeredGrad_v2");
395 "__itex2DLayeredGrad_sparse",
396 (
float __x,
float __y,
int __layer,
const float2 *__dPdx,
397 const float2 *__dPdy,
unsigned char *__ir),
398 "{.reg .pred %%p0;\n\t"
401 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], {%9, %10}, {%11, %12};\n\t"
402 "selp.u16 %4, 1, 0, %%p0; }",
403 (
"r"(__layer),
"f"(__x),
"f"(
__y),
"f"(__dPdx->x),
"f"(__dPdx->y),
404 "f"(__dPdy->x),
"f"(__dPdy->y)));
406__IMPL_S(
"__tex2DLayeredLod_v2",
"__tex2DLayeredLod_rmnf_v2",
407 (
float __x,
float __y,
int __layer,
float __level),
"tex.level.a2d.v4",
408 "f32",
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;",
409 (
"r"(__layer),
"f"(__x),
"f"(
__y),
"f"(__level)));
410__IMPL_ALIAS(
"__itex2DLayeredLod",
"__tex2DLayeredLod_v2");
413 (
float __x,
float __y,
int __layer,
float __level,
414 unsigned char *__ir),
415 "{.reg .pred %%p0;\n\t"
418 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t"
419 "selp.u16 %4, 1, 0, %%p0; }",
420 (
"r"(__layer),
"f"(__x),
"f"(
__y),
"f"(__level)));
422__IMPL_S(
"__tex2DLod_v2",
"__tex2DLod_rmnf_v2",
423 (
float __x,
float __y,
float __level),
"tex.level.2d.v4",
"f32",
424 "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;",
425 (
"f"(__x),
"f"(
__y),
"f"(__level)));
429 (
float __x,
float __y,
float __level,
unsigned char *__ir),
430 "{.reg .pred %%p0;\n\t"
433 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], %8;\n\t"
434 "selp.u16 %4, 1, 0, %%p0; }",
435 (
"f"(__x),
"f"(
__y),
"f"(__level)));
441#define __IMPL_2DGATHER(variant, instr) \
442 __IMPL_SI(__IDV("__tex2Dgather_v2", variant), \
443 __IDV("__tex2Dgather_rmnf_v2", variant), \
444 (float __x, float __y, int __comp), instr, "f32", \
445 "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("f"(__x), "f"(__y))); \
446 __IMPL_ALIASI(__IDV("__itex2Dgather", variant), \
447 __IDV("__tex2Dgather_v2", variant)); \
448 __IMPL_S3SI(__IDV("__itex2Dgather_sparse", variant), \
449 (float __x, float __y, unsigned char *__ir, int __comp), \
450 "{.reg .pred %%p0;\n\t" instr, "f32", \
451 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" \
452 "selp.u16 %4, 1, 0, %%p0; }", \
453 ("f"(__x), "f"(__y)));
460template <>
struct __tex_fetch_v4<
__ID(
"__tex2Dgather_v2")> {
462 __device__ static __T __run(cudaTextureObject_t __obj,
float __x,
float __y,
466 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_v2", 0)>::__run<__T>(
467 __obj, __x,
__y, __comp);
469 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_v2", 1)>::__run<__T>(
470 __obj, __x,
__y, __comp);
472 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_v2", 2)>::__run<__T>(
473 __obj, __x,
__y, __comp);
475 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_v2", 3)>::__run<__T>(
476 __obj, __x,
__y, __comp);
482template <>
struct __tex_fetch_v4<
__ID(
"__tex2Dgather_rmnf_v2")> {
485 float __y,
int __comp) {
488 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_rmnf_v2", 0)>::__run<__T>(
489 __obj, __x,
__y, __comp);
491 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_rmnf_v2", 1)>::__run<__T>(
492 __obj, __x,
__y, __comp);
494 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_rmnf_v2", 2)>::__run<__T>(
495 __obj, __x,
__y, __comp);
497 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_rmnf_v2", 3)>::__run<__T>(
498 __obj, __x,
__y, __comp);
503#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600)
504template <>
struct __tex_fetch_v4<
__ID(
"__itex2Dgather_sparse")> {
506 __device__ static __T __run(cudaTextureObject_t __obj,
float __x,
float __y,
507 unsigned char *__ir,
int __comp) {
510 return __tex_fetch_v4<
__IDV(
"__itex2Dgather_sparse", 0)>::__run<__T>(
511 __obj, __x,
__y, __ir, __comp);
513 return __tex_fetch_v4<
__IDV(
"__itex2Dgather_sparse", 1)>::__run<__T>(
514 __obj, __x,
__y, __ir, __comp);
516 return __tex_fetch_v4<
__IDV(
"__itex2Dgather_sparse", 2)>::__run<__T>(
517 __obj, __x,
__y, __ir, __comp);
519 return __tex_fetch_v4<
__IDV(
"__itex2Dgather_sparse", 3)>::__run<__T>(
520 __obj, __x,
__y, __ir, __comp);
527__IMPL_S(
"__tex3D_v2",
"__tex3D_rmnf_v2", (
float __x,
float __y,
float __z),
528 "tex.3d.v4",
"f32",
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];",
529 (
"f"(__x),
"f"(
__y),
"f"(__z)));
533 (
float __x,
float __y,
float __z,
unsigned char *__ir),
534 "{.reg .pred %%p0;\n\t"
537 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
538 "selp.u16 %4, 1, 0, %%p0; }",
539 (
"f"(__x),
"f"(
__y),
"f"(__z)));
541__IMPL_S(
"__tex3DGrad_v2",
"__tex3DGrad_rmnf_v2",
542 (
float __x,
float __y,
float __z,
const float4 *__dPdx,
543 const float4 *__dPdy),
544 "tex.grad.3d.v4",
"f32",
545 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
546 "{%8, %9, %10, %10}, {%11, %12, %13, %13};",
547 (
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__dPdx->x),
"f"(__dPdx->y),
548 "f"(__dPdx->z),
"f"(__dPdy->x),
"f"(__dPdy->y),
"f"(__dPdy->z)));
552 (
float __x,
float __y,
float __z,
const float4 *__dPdx,
553 const float4 *__dPdy,
unsigned char *__ir),
554 "{.reg .pred %%p0;\n\t"
557 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], "
558 "{%9, %10, %11, %11}, {%12, %13, %14, %14};\n\t"
559 "selp.u16 %4, 1, 0, %%p0; }",
560 (
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__dPdx->x),
"f"(__dPdx->y),
561 "f"(__dPdx->z),
"f"(__dPdy->x),
"f"(__dPdy->y),
"f"(__dPdy->z)));
563__IMPL_S(
"__tex3DLod_v2",
"__tex3DLod_rmnf_v2",
564 (
float __x,
float __y,
float __z,
float __level),
"tex.level.3d.v4",
565 "f32",
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;",
566 (
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__level)));
570 (
float __x,
float __y,
float __z,
float __level,
571 unsigned char *__ir),
572 "{.reg .pred %%p0;\n\t"
575 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t"
576 "selp.u16 %4, 1, 0, %%p0; }",
577 (
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__level)));
580__IMPL_S(
"__texCubemap_v2",
"__texCubemap_rmnf_v2",
581 (
float __x,
float __y,
float __z),
"tex.cube.v4",
"f32",
582 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];",
583 (
"f"(__x),
"f"(
__y),
"f"(__z)));
587 (
float __x,
float __y,
float __z,
unsigned char *__ir),
588 "{.reg .pred %%p0;\n\t"
591 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
592 "selp.u16 %4, 1, 0, %%p0; }",
593 (
"f"(__x),
"f"(
__y),
"f"(__z)));
595__IMPL_S(
"__texCubemapGrad_v2",
"__texCubemapGrad_rmnf_v2",
596 (
float __x,
float __y,
float __z,
const float4 *__dPdx,
597 const float4 *__dPdy),
598 "tex.grad.cube.v4",
"f32",
599 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
600 "{%8, %9, %10, %10}, {%11, %12, %13, %13};",
601 (
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__dPdx->x),
"f"(__dPdx->y),
602 "f"(__dPdx->z),
"f"(__dPdy->x),
"f"(__dPdy->y),
"f"(__dPdy->z)));
603__IMPL_ALIAS(
"__itexCubemapGrad_v2",
"__texCubemapGrad_v2");
605__IMPL_S(
"__texCubemapLayered_v2",
"__texCubemapLayered_rmnf_v2",
606 (
float __x,
float __y,
float __z,
int __layer),
"tex.acube.v4",
"f32",
607 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];",
608 (
"r"(__layer),
"f"(__x),
"f"(
__y),
"f"(__z)));
609__IMPL_ALIAS(
"__itexCubemapLayered",
"__texCubemapLayered_v2");
611__IMPL_S(
"__texCubemapLayeredGrad_v2",
"__texCubemapLayeredGrad_rmnf_v2",
612 (
float __x,
float __y,
float __z,
int __layer,
const float4 *__dPdx,
613 const float4 *__dPdy),
614 "tex.grad.acube.v4",
"f32",
615 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], "
616 "{%9, %10, %11, %11}, {%12, %13, %14, %14};",
617 (
"r"(__layer),
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__dPdx->x),
618 "f"(__dPdx->y),
"f"(__dPdx->z),
"f"(__dPdy->x),
"f"(__dPdy->y),
620__IMPL_ALIAS(
"__itexCubemapLayeredGrad_v2",
"__texCubemapLayeredGrad_v2");
622__IMPL_S(
"__texCubemapLayeredLod_v2",
"__texCubemapLayeredLod_rmnf_v2",
623 (
float __x,
float __y,
float __z,
int __layer,
float __level),
624 "tex.level.acube.v4",
"f32",
625 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;",
626 (
"r"(__layer),
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__level)));
627__IMPL_ALIAS(
"__itexCubemapLayeredLod",
"__texCubemapLayeredLod_v2");
629__IMPL_S(
"__texCubemapLod_v2",
"__texCubemapLod_rmnf_v2",
630 (
float __x,
float __y,
float __z,
float __level),
"tex.level.cube.v4",
631 "f32",
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;",
632 (
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__level)));
636template <
class __DestT,
class __SrcT>
struct __convert {
637 template <
int __NElements =
sizeof(__DestT) /
638 sizeof(
typename __TypeInfoT<__DestT>::__base_t)>
640 template <>
__device__ static __DestT __run<1>(__SrcT
__v) {
return {
__v.x}; }
661template <
class __op,
class __T,
class...
__Args>
662__device__ static void __tex_fetch(__T *__ptr, cudaTextureObject_t __handle,
664 using __FetchT =
typename __TypeInfoT<__T>::__fetch_t;
665 *__ptr = __convert<__T, __FetchT>::__run(
666 __tex_fetch_v4<__op>::template __run<__FetchT>(__handle, __args...));
669#if CUDA_VERSION < 12000
675__device__ cudaTextureObject_t __tex_handle_to_obj(__T __handle) {
676 cudaTextureObject_t __obj;
677 asm(
"mov.b64 %0, %1; " :
"=l"(__obj) :
"l"(__handle));
683template <
class __op,
class __T,
class __HandleT,
class...
__Args>
684__device__ static void __tex_fetch(__T *__ptr, __HandleT __handle,
686 using __FetchT =
typename __TypeInfoT<__T>::__fetch_t;
687 *__ptr = __convert<__T, __FetchT>::__run(
688 __tex_fetch_v4<__op>::template __run<__FetchT>(
689 __tex_handle_to_obj(__handle), __args...));
694template <
class __op,
class __DataT,
class __RetT,
int __TexT,
class...
__Args>
696__tex_fetch(__DataT *, __RetT *__ptr,
697 texture<__DataT, __TexT, cudaReadModeNormalizedFloat> __handle,
699 using __FetchT =
typename __TypeInfoT<__DataT>::__fetch_t;
700 *__ptr = __convert<__RetT, float4>::__run(
701 __tex_fetch_v4<__op>::template __run<__FetchT>(
702 __tex_handle_to_obj(__handle), __args...));
707template <
class __op,
class __DataT,
class __RetT,
int __TexT,
class...
__Args>
709__tex_fetch(__DataT *, __RetT *__ptr,
710 texture<__DataT, __TexT, cudaReadModeElementType> __handle,
712 using __FetchT =
typename __TypeInfoT<__DataT>::__fetch_t;
713 *__ptr = __convert<__RetT, __FetchT>::__run(
714 __tex_fetch_v4<__op>::template __run<__FetchT>(
715 __tex_handle_to_obj(__handle), __args...));
720#pragma pop_macro("__ASM_OUT")
721#pragma pop_macro("__ASM_OUTP")
722#pragma pop_macro("__Args")
723#pragma pop_macro("__ID")
724#pragma pop_macro("__IDV")
725#pragma pop_macro("__IMPL_2DGATHER")
726#pragma pop_macro("__IMPL_ALIAS")
727#pragma pop_macro("__IMPL_ALIASI")
728#pragma pop_macro("__IMPL_F1")
729#pragma pop_macro("__IMPL_F3")
730#pragma pop_macro("__IMPL_F3N")
731#pragma pop_macro("__IMPL_F3S")
732#pragma pop_macro("__IMPL_S")
733#pragma pop_macro("__IMPL_S3")
734#pragma pop_macro("__IMPL_S3I")
735#pragma pop_macro("__IMPL_S3N")
736#pragma pop_macro("__IMPL_S3NI")
737#pragma pop_macro("__IMPL_S3S")
738#pragma pop_macro("__IMPL_S3SI")
739#pragma pop_macro("__IMPL_SI")
740#pragma pop_macro("__L")
741#pragma pop_macro("__STRIP_PARENS")
#define __IMPL_S3S(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args)
#define __IMPL_ALIAS(__op, __opn)
#define __IMPL_2DGATHER(variant, instr)
#define __IDV(__op, __variant)
#define __IMPL_S(__op, __opn, __args, __asm_op, __ctype, __asm_op_args, __asm_args)
__device__ __2f16 float __ockl_bool s
__device__ __2f16 float c
static __inline__ uint32_t uint32_t __y
struct __storeu_i16 *__P __v
vector< float, 4 > float4