130constexpr int __tex_len(
const char *
s) {
131 return (
s[0] == 0) ? 0
166constexpr int __tex_hash_map(
int c) {
167 return (
c == 49) ? 10
191constexpr int __tex_op_hash(
const char *str) {
192 return __tex_len(str) + __tex_hash_map(str[7] + 1) + __tex_hash_map(str[6]) +
193 __tex_hash_map(str[5]) + __tex_hash_map(str[__tex_len(str) - 1]);
197template <
int N>
struct __Tag;
198#define __ID(__op) __Tag<__tex_op_hash(__op)>
201#define __IDV(__op, __variant) \
202 __Tag<10000 + __tex_op_hash(__op) * 100 + __variant>
206template <
class>
struct __TypeInfoT;
208template <>
struct __TypeInfoT<
float> {
209 using __base_t =
float;
210 using __fetch_t = float4;
212template <>
struct __TypeInfoT<char> {
213 using __base_t = char;
214 using __fetch_t =
int4;
216template <>
struct __TypeInfoT<signed char> {
217 using __base_t =
signed char;
218 using __fetch_t =
int4;
220template <>
struct __TypeInfoT<unsigned char> {
221 using __base_t =
unsigned char;
222 using __fetch_t =
uint4;
224template <>
struct __TypeInfoT<short> {
225 using __base_t = short;
226 using __fetch_t =
int4;
228template <>
struct __TypeInfoT<unsigned short> {
229 using __base_t =
unsigned short;
230 using __fetch_t =
uint4;
232template <>
struct __TypeInfoT<
int> {
233 using __base_t =
int;
234 using __fetch_t =
int4;
236template <>
struct __TypeInfoT<unsigned
int> {
237 using __base_t =
unsigned int;
238 using __fetch_t =
uint4;
242template <
class __T>
struct __TypeInfoT {
243 using __base_t =
decltype(__T::x);
244 using __fetch_t =
typename __TypeInfoT<__base_t>::__fetch_t;
248struct __texture_op_tag {};
249struct __surface_op_tag {};
252template <
class __op>
struct __op_type_traits {
253 using type = __texture_op_tag;
257#define __OP_TYPE_SURFACE(__op) \
258 template <> struct __op_type_traits<__op> { \
259 using type = __surface_op_tag; \
263template <
class __op>
struct __tex_fetch_v4;
266#define __Args(...) __VA_ARGS__
267#define __STRIP_PARENS(__X) __X
268#define __L(__X) __STRIP_PARENS(__Args __X)
274#define __ASM_OUT(__t) \
275 ("=" __t(__r.x), "=" __t(__r.y), "=" __t(__r.z), "=" __t(__r.w))
277#define __ASM_OUTP(__t) (__L(__ASM_OUT(__t)), "=h"(*__ir))
280#define __IMPL_F1(__rt, __dt, __args, __asm_op, __asm_outs, __asm_args) \
282 __device__ __rt __run<__dt>(cudaTextureObject_t __obj, __L(__args)) { \
284 asm(__asm_op : __L(__asm_outs) : "l"(__obj), __L(__asm_args)); \
289#define __IMPL_F3(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
290 __IMPL_F1(int4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \
291 __ASM_OUT("r"), __asm_args) \
292 __IMPL_F1(uint4, uint4, __args, __asm_op ".u32." __ctype "\t" __asm_op_args, \
293 __ASM_OUT("r"), __asm_args) \
294 __IMPL_F1(float4, float4, __args, \
295 __asm_op ".f32." __ctype "\t" __asm_op_args, __ASM_OUT("f"), \
300#define __IMPL_F3S(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
301 __IMPL_F1(int4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \
302 __ASM_OUTP("r"), __asm_args) \
303 __IMPL_F1(uint4, uint4, __args, __asm_op ".u32." __ctype "\t" __asm_op_args, \
304 __ASM_OUTP("r"), __asm_args) \
305 __IMPL_F1(float4, float4, __args, \
306 __asm_op ".f32." __ctype "\t" __asm_op_args, __ASM_OUTP("f"), \
311#define __IMPL_F3N(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
312 __IMPL_F1(float4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \
313 __ASM_OUT("r"), __asm_args) \
314 __IMPL_F1(float4, uint4, __args, \
315 __asm_op ".u32." __ctype "\t" __asm_op_args, __ASM_OUT("r"), \
319#define __IMPL_S3I(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \
320 template <> struct __tex_fetch_v4<__op> { \
322 __device__ static T __run(cudaTextureObject_t __obj, __L(__args)); \
323 __IMPL_F3(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
327#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600)
328#define __IMPL_S3SI(__op, __args, __asm_op, __ctype, __asm_op_args, \
330 template <> struct __tex_fetch_v4<__op> { \
332 __device__ static T __run(cudaTextureObject_t __obj, __L(__args)); \
333 __IMPL_F3S(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
336#define __IMPL_S3SI(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args)
340#define __IMPL_S3NI(__op, __args, __asm_op, __ctype, __asm_op_args, \
342 template <> struct __tex_fetch_v4<__op> { \
344 __device__ static float4 __run(cudaTextureObject_t __obj, __L(__args)); \
345 __IMPL_F3N(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
350#define __IMPL_SI(__op, __opn, __args, __asm_op, __ctype, __asm_op_args, \
352 __IMPL_S3I(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args); \
353 __IMPL_S3NI(__opn, __args, __asm_op, __ctype, __asm_op_args, __asm_args)
356#define __IMPL_S3(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \
357 __IMPL_S3I(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args)
358#define __IMPL_S3S(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \
359 __IMPL_S3SI(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args)
360#define __IMPL_S3N(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \
361 __IMPL_S3NI(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args)
362#define __IMPL_S(__op, __opn, __args, __asm_op, __ctype, __asm_op_args, \
364 __IMPL_SI(__ID(__op), __ID(__opn), __args, __asm_op, __ctype, __asm_op_args, \
369#define __IMPL_ALIASI(__op, __opn) \
370 template <> struct __tex_fetch_v4<__op> : __tex_fetch_v4<__opn> {}
371#define __IMPL_ALIAS(__op, __opn) __IMPL_ALIASI(__ID(__op), __ID(__opn))
375__IMPL_S(
"__tex1D_v2",
"__tex1D_rmnf_v2", (
float __x),
"tex.1d.v4",
"f32",
376 "{%0, %1, %2, %3}, [%4, {%5}];", (
"f"(__x)));
377__IMPL_S(
"__tex1Dfetch_v2",
"__tex1Dfetch_rmnf_v2", (
int __x),
"tex.1d.v4",
378 "s32",
"{%0, %1, %2, %3}, [%4, {%5}];", (
"r"(__x)));
382__IMPL_S(
"__tex1DGrad_v2",
"__tex1DGrad_rmnf_v2",
383 (
float __x,
float __dPdx,
float __dPdy),
"tex.grad.1d.v4",
"f32",
384 "{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};",
385 (
"f"(__x),
"f"(__dPdx),
"f"(__dPdy)));
388__IMPL_S(
"__tex1DLayered_v2",
"__tex1DLayered_rmnf_v2",
389 (
float __x,
int __layer),
"tex.a1d.v4",
"f32",
390 "{%0, %1, %2, %3}, [%4, {%5, %6}];", (
"r"(__layer),
"f"(__x)));
393__IMPL_S(
"__tex1DLayeredGrad_v2",
"__tex1DLayeredGrad_rmnf_v2",
394 (
float __x,
int __layer,
float __dPdx,
float __dPdy),
395 "tex.grad.a1d.v4",
"f32",
396 "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};",
397 (
"r"(__layer),
"f"(__x),
"f"(__dPdx),
"f"(__dPdy)));
398__IMPL_ALIAS(
"__itex1DLayeredGrad",
"__tex1DLayeredGrad_v2");
400__IMPL_S(
"__tex1DLayeredLod_v2",
"__tex1DLayeredLod_rmnf_v2",
401 (
float __x,
int __layer,
float __level),
"tex.level.a1d.v4",
"f32",
402 "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;",
403 (
"r"(__layer),
"f"(__x),
"f"(__level)));
404__IMPL_ALIAS(
"__itex1DLayeredLod",
"__tex1DLayeredLod_v2");
406__IMPL_S(
"__tex1DLod_v2",
"__tex1DLod_rmnf_v2", (
float __x,
float __level),
407 "tex.level.1d.v4",
"f32",
"{%0, %1, %2, %3}, [%4, {%5}], %6;",
408 (
"f"(__x),
"f"(__level)));
412__IMPL_S(
"__tex2D_v2",
"__tex2D_rmnf_v2", (
float __x,
float __y),
"tex.2d.v4",
413 "f32",
"{%0, %1, %2, %3}, [%4, {%5, %6}];", (
"f"(__x),
"f"(
__y)));
416__IMPL_S3S(
"__itex2D_sparse", (
float __x,
float __y,
unsigned char *__ir),
417 "{.reg .pred %%p0;\n\t"
420 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
421 " selp.u16 %4, 1, 0, %%p0; }",
422 (
"f"(__x),
"f"(
__y)));
424__IMPL_S(
"__tex2DGrad_v2",
"__tex2DGrad_rmnf_v2",
425 (
float __x,
float __y,
const float2 *__dPdx,
const float2 *__dPdy),
426 "tex.grad.2d.v4",
"f32",
427 "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};",
428 (
"f"(__x),
"f"(
__y),
"f"(__dPdx->x),
"f"(__dPdx->y),
"f"(__dPdy->x),
433 (
float __x,
float __y,
const float2 *__dPdx,
const float2 *__dPdy,
434 unsigned char *__ir),
435 "{.reg .pred %%p0;\n\t"
438 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], {%8, %9}, {%10, %11};\n\t"
439 "selp.u16 %4, 1, 0, %%p0; }",
440 (
"f"(__x),
"f"(
__y),
"f"(__dPdx->x),
"f"(__dPdx->y),
"f"(__dPdy->x),
443__IMPL_S(
"__tex2DLayered_v2",
"__tex2DLayered_rmnf_v2",
444 (
float __x,
float __y,
int __layer),
"tex.a2d.v4",
"f32",
445 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];",
446 (
"r"(__layer),
"f"(__x),
"f"(
__y)));
450 (
float __x,
float __y,
int __layer,
unsigned char *__ir),
451 "{.reg .pred %%p0;\n\t"
454 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
455 "selp.u16 %4, 1, 0, %%p0; }",
456 (
"r"(__layer),
"f"(__x),
"f"(
__y)));
458__IMPL_S(
"__tex2DLayeredGrad_v2",
"__tex2DLayeredGrad_rmnf_v2",
459 (
float __x,
float __y,
int __layer,
const float2 *__dPdx,
460 const float2 *__dPdy),
461 "tex.grad.a2d.v4",
"f32",
462 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};",
463 (
"r"(__layer),
"f"(__x),
"f"(
__y),
"f"(__dPdx->x),
"f"(__dPdx->y),
464 "f"(__dPdy->x),
"f"(__dPdy->y)));
465__IMPL_ALIAS(
"__itex2DLayeredGrad_v2",
"__tex2DLayeredGrad_v2");
468 "__itex2DLayeredGrad_sparse",
469 (
float __x,
float __y,
int __layer,
const float2 *__dPdx,
470 const float2 *__dPdy,
unsigned char *__ir),
471 "{.reg .pred %%p0;\n\t"
474 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], {%9, %10}, {%11, %12};\n\t"
475 "selp.u16 %4, 1, 0, %%p0; }",
476 (
"r"(__layer),
"f"(__x),
"f"(
__y),
"f"(__dPdx->x),
"f"(__dPdx->y),
477 "f"(__dPdy->x),
"f"(__dPdy->y)));
479__IMPL_S(
"__tex2DLayeredLod_v2",
"__tex2DLayeredLod_rmnf_v2",
480 (
float __x,
float __y,
int __layer,
float __level),
"tex.level.a2d.v4",
481 "f32",
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;",
482 (
"r"(__layer),
"f"(__x),
"f"(
__y),
"f"(__level)));
483__IMPL_ALIAS(
"__itex2DLayeredLod",
"__tex2DLayeredLod_v2");
486 (
float __x,
float __y,
int __layer,
float __level,
487 unsigned char *__ir),
488 "{.reg .pred %%p0;\n\t"
491 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t"
492 "selp.u16 %4, 1, 0, %%p0; }",
493 (
"r"(__layer),
"f"(__x),
"f"(
__y),
"f"(__level)));
495__IMPL_S(
"__tex2DLod_v2",
"__tex2DLod_rmnf_v2",
496 (
float __x,
float __y,
float __level),
"tex.level.2d.v4",
"f32",
497 "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;",
498 (
"f"(__x),
"f"(
__y),
"f"(__level)));
502 (
float __x,
float __y,
float __level,
unsigned char *__ir),
503 "{.reg .pred %%p0;\n\t"
506 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], %8;\n\t"
507 "selp.u16 %4, 1, 0, %%p0; }",
508 (
"f"(__x),
"f"(
__y),
"f"(__level)));
514#define __IMPL_2DGATHER(variant, instr) \
515 __IMPL_SI(__IDV("__tex2Dgather_v2", variant), \
516 __IDV("__tex2Dgather_rmnf_v2", variant), \
517 (float __x, float __y, int __comp), instr, "f32", \
518 "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("f"(__x), "f"(__y))); \
519 __IMPL_ALIASI(__IDV("__itex2Dgather", variant), \
520 __IDV("__tex2Dgather_v2", variant)); \
521 __IMPL_S3SI(__IDV("__itex2Dgather_sparse", variant), \
522 (float __x, float __y, unsigned char *__ir, int __comp), \
523 "{.reg .pred %%p0;\n\t" instr, "f32", \
524 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" \
525 "selp.u16 %4, 1, 0, %%p0; }", \
526 ("f"(__x), "f"(__y)));
533template <>
struct __tex_fetch_v4<
__ID(
"__tex2Dgather_v2")> {
535 __device__ static __T __run(cudaTextureObject_t __obj,
float __x,
float __y,
539 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_v2", 0)>::__run<__T>(
540 __obj, __x,
__y, __comp);
542 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_v2", 1)>::__run<__T>(
543 __obj, __x,
__y, __comp);
545 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_v2", 2)>::__run<__T>(
546 __obj, __x,
__y, __comp);
548 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_v2", 3)>::__run<__T>(
549 __obj, __x,
__y, __comp);
555template <>
struct __tex_fetch_v4<
__ID(
"__tex2Dgather_rmnf_v2")> {
558 float __y,
int __comp) {
561 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_rmnf_v2", 0)>::__run<__T>(
562 __obj, __x,
__y, __comp);
564 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_rmnf_v2", 1)>::__run<__T>(
565 __obj, __x,
__y, __comp);
567 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_rmnf_v2", 2)>::__run<__T>(
568 __obj, __x,
__y, __comp);
570 return __tex_fetch_v4<
__IDV(
"__tex2Dgather_rmnf_v2", 3)>::__run<__T>(
571 __obj, __x,
__y, __comp);
576#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600)
577template <>
struct __tex_fetch_v4<
__ID(
"__itex2Dgather_sparse")> {
579 __device__ static __T __run(cudaTextureObject_t __obj,
float __x,
float __y,
580 unsigned char *__ir,
int __comp) {
583 return __tex_fetch_v4<
__IDV(
"__itex2Dgather_sparse", 0)>::__run<__T>(
584 __obj, __x,
__y, __ir, __comp);
586 return __tex_fetch_v4<
__IDV(
"__itex2Dgather_sparse", 1)>::__run<__T>(
587 __obj, __x,
__y, __ir, __comp);
589 return __tex_fetch_v4<
__IDV(
"__itex2Dgather_sparse", 2)>::__run<__T>(
590 __obj, __x,
__y, __ir, __comp);
592 return __tex_fetch_v4<
__IDV(
"__itex2Dgather_sparse", 3)>::__run<__T>(
593 __obj, __x,
__y, __ir, __comp);
600__IMPL_S(
"__tex3D_v2",
"__tex3D_rmnf_v2", (
float __x,
float __y,
float __z),
601 "tex.3d.v4",
"f32",
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];",
602 (
"f"(__x),
"f"(
__y),
"f"(__z)));
606 (
float __x,
float __y,
float __z,
unsigned char *__ir),
607 "{.reg .pred %%p0;\n\t"
610 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
611 "selp.u16 %4, 1, 0, %%p0; }",
612 (
"f"(__x),
"f"(
__y),
"f"(__z)));
614__IMPL_S(
"__tex3DGrad_v2",
"__tex3DGrad_rmnf_v2",
615 (
float __x,
float __y,
float __z,
const float4 *__dPdx,
616 const float4 *__dPdy),
617 "tex.grad.3d.v4",
"f32",
618 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
619 "{%8, %9, %10, %10}, {%11, %12, %13, %13};",
620 (
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__dPdx->x),
"f"(__dPdx->y),
621 "f"(__dPdx->z),
"f"(__dPdy->x),
"f"(__dPdy->y),
"f"(__dPdy->z)));
625 (
float __x,
float __y,
float __z,
const float4 *__dPdx,
626 const float4 *__dPdy,
unsigned char *__ir),
627 "{.reg .pred %%p0;\n\t"
630 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], "
631 "{%9, %10, %11, %11}, {%12, %13, %14, %14};\n\t"
632 "selp.u16 %4, 1, 0, %%p0; }",
633 (
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__dPdx->x),
"f"(__dPdx->y),
634 "f"(__dPdx->z),
"f"(__dPdy->x),
"f"(__dPdy->y),
"f"(__dPdy->z)));
636__IMPL_S(
"__tex3DLod_v2",
"__tex3DLod_rmnf_v2",
637 (
float __x,
float __y,
float __z,
float __level),
"tex.level.3d.v4",
638 "f32",
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;",
639 (
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__level)));
643 (
float __x,
float __y,
float __z,
float __level,
644 unsigned char *__ir),
645 "{.reg .pred %%p0;\n\t"
648 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t"
649 "selp.u16 %4, 1, 0, %%p0; }",
650 (
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__level)));
653__IMPL_S(
"__texCubemap_v2",
"__texCubemap_rmnf_v2",
654 (
float __x,
float __y,
float __z),
"tex.cube.v4",
"f32",
655 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];",
656 (
"f"(__x),
"f"(
__y),
"f"(__z)));
660 (
float __x,
float __y,
float __z,
unsigned char *__ir),
661 "{.reg .pred %%p0;\n\t"
664 "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
665 "selp.u16 %4, 1, 0, %%p0; }",
666 (
"f"(__x),
"f"(
__y),
"f"(__z)));
668__IMPL_S(
"__texCubemapGrad_v2",
"__texCubemapGrad_rmnf_v2",
669 (
float __x,
float __y,
float __z,
const float4 *__dPdx,
670 const float4 *__dPdy),
671 "tex.grad.cube.v4",
"f32",
672 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
673 "{%8, %9, %10, %10}, {%11, %12, %13, %13};",
674 (
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__dPdx->x),
"f"(__dPdx->y),
675 "f"(__dPdx->z),
"f"(__dPdy->x),
"f"(__dPdy->y),
"f"(__dPdy->z)));
676__IMPL_ALIAS(
"__itexCubemapGrad_v2",
"__texCubemapGrad_v2");
678__IMPL_S(
"__texCubemapLayered_v2",
"__texCubemapLayered_rmnf_v2",
679 (
float __x,
float __y,
float __z,
int __layer),
"tex.acube.v4",
"f32",
680 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];",
681 (
"r"(__layer),
"f"(__x),
"f"(
__y),
"f"(__z)));
682__IMPL_ALIAS(
"__itexCubemapLayered",
"__texCubemapLayered_v2");
684__IMPL_S(
"__texCubemapLayeredGrad_v2",
"__texCubemapLayeredGrad_rmnf_v2",
685 (
float __x,
float __y,
float __z,
int __layer,
const float4 *__dPdx,
686 const float4 *__dPdy),
687 "tex.grad.acube.v4",
"f32",
688 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], "
689 "{%9, %10, %11, %11}, {%12, %13, %14, %14};",
690 (
"r"(__layer),
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__dPdx->x),
691 "f"(__dPdx->y),
"f"(__dPdx->z),
"f"(__dPdy->x),
"f"(__dPdy->y),
693__IMPL_ALIAS(
"__itexCubemapLayeredGrad_v2",
"__texCubemapLayeredGrad_v2");
695__IMPL_S(
"__texCubemapLayeredLod_v2",
"__texCubemapLayeredLod_rmnf_v2",
696 (
float __x,
float __y,
float __z,
int __layer,
float __level),
697 "tex.level.acube.v4",
"f32",
698 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;",
699 (
"r"(__layer),
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__level)));
700__IMPL_ALIAS(
"__itexCubemapLayeredLod",
"__texCubemapLayeredLod_v2");
702__IMPL_S(
"__texCubemapLod_v2",
"__texCubemapLod_rmnf_v2",
703 (
float __x,
float __y,
float __z,
float __level),
"tex.level.cube.v4",
704 "f32",
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;",
705 (
"f"(__x),
"f"(
__y),
"f"(__z),
"f"(__level)));
709template <
class __DestT,
class __SrcT>
struct __convert {
710 template <
int __NElements =
sizeof(__DestT) /
711 sizeof(
typename __TypeInfoT<__DestT>::__base_t)>
713 template <>
__device__ static __DestT __run<1>(__SrcT
__v) {
return {
__v.x}; }
749template <
class __op,
typename __type>
struct __surf_read_write_v2;
755#define __SURF_WRITE_V2(__op, __asm_dim, __asmtype, __type, __index_op_args, \
756 __index_args, __index_asm_args, __asm_op_args, \
758 template <> struct __surf_read_write_v2<__op, __type> { \
759 static __device__ void __run(__type *__ptr, cudaSurfaceObject_t obj, \
761 cudaSurfaceBoundaryMode mode) { \
763 case cudaBoundaryModeZero: \
764 asm volatile("sust.b." __asm_dim "." __asmtype \
765 ".zero [%0, " __index_op_args "], " __asm_op_args ";" \
767 : "l"(obj), __L(__index_asm_args), __L(__asm_args)); \
769 case cudaBoundaryModeClamp: \
770 asm volatile("sust.b." __asm_dim "." __asmtype \
771 ".clamp [%0, " __index_op_args "], " __asm_op_args ";" \
773 : "l"(obj), __L(__index_asm_args), __L(__asm_args)); \
775 case cudaBoundaryModeTrap: \
776 asm volatile("sust.b." __asm_dim "." __asmtype \
777 ".trap [%0, " __index_op_args "], " __asm_op_args ";" \
779 : "l"(obj), __L(__index_asm_args), __L(__asm_args)); \
785#define __SURF_READ_V2(__op, __asm_dim, __asmtype, __type, __asm_op_args, \
786 __asm_args, __index_args, __index_asm_args) \
787 template <> struct __surf_read_write_v2<__op, __type> { \
788 static __device__ void __run(__type *__ptr, cudaSurfaceObject_t obj, \
790 cudaSurfaceBoundaryMode mode) { \
792 case cudaBoundaryModeZero: \
793 asm("suld.b." __asm_dim "." __asmtype ".zero " __asm_op_args ";" \
795 : "l"(obj), __L(__index_asm_args)); \
797 case cudaBoundaryModeClamp: \
798 asm("suld.b." __asm_dim "." __asmtype ".clamp " __asm_op_args ";" \
800 : "l"(obj), __L(__index_asm_args)); \
802 case cudaBoundaryModeTrap: \
803 asm("suld.b." __asm_dim "." __asmtype ".trap " __asm_op_args ";" \
805 : "l"(obj), __L(__index_asm_args)); \
814#define __SW_ASM_ARGS(__type) (__type(*__ptr))
815#define __SW_ASM_ARGS1(__type) (__type(__ptr->x))
816#define __SW_ASM_ARGS2(__type) (__type(__ptr->x), __type(__ptr->y))
817#define __SW_ASM_ARGS4(__type) \
818 (__type(__ptr->x), __type(__ptr->y), __type(__ptr->z), __type(__ptr->w))
820#define __SURF_READ1D(__asmtype, __type, __asm_op_args, __asm_args) \
821 __SURF_READ_V2(__ID("__isurf1Dread"), "1d", __asmtype, __type, \
822 __asm_op_args, __asm_args, (int x), ("r"(x)))
823#define __SURF_READ2D(__asmtype, __type, __asm_op_args, __asm_args) \
824 __SURF_READ_V2(__ID("__isurf2Dread"), "2d", __asmtype, __type, \
825 __asm_op_args, __asm_args, (int x, int y), ("r"(x), "r"(y)))
826#define __SURF_READ3D(__asmtype, __type, __asm_op_args, __asm_args) \
827 __SURF_READ_V2(__ID("__isurf3Dread"), "3d", __asmtype, __type, \
828 __asm_op_args, __asm_args, (int x, int y, int z), \
829 ("r"(x), "r"(y), "r"(z)))
831#define __SURF_READ1DLAYERED(__asmtype, __type, __asm_op_args, __asm_args) \
832 __SURF_READ_V2(__ID("__isurf1DLayeredread"), "a1d", __asmtype, __type, \
833 __asm_op_args, __asm_args, (int x, int layer), \
834 ("r"(x), "r"(layer)))
835#define __SURF_READ2DLAYERED(__asmtype, __type, __asm_op_args, __asm_args) \
836 __SURF_READ_V2(__ID("__isurf2DLayeredread"), "a2d", __asmtype, __type, \
837 __asm_op_args, __asm_args, (int x, int y, int layer), \
838 ("r"(x), "r"(y), "r"(layer)))
839#define __SURF_READCUBEMAP(__asmtype, __type, __asm_op_args, __asm_args) \
840 __SURF_READ_V2(__ID("__isurfCubemapread"), "a2d", __asmtype, __type, \
841 __asm_op_args, __asm_args, (int x, int y, int face), \
842 ("r"(x), "r"(y), "r"(face)))
843#define __SURF_READCUBEMAPLAYERED(__asmtype, __type, __asm_op_args, \
845 __SURF_READ_V2(__ID("__isurfCubemapLayeredread"), "a2d", __asmtype, __type, \
846 __asm_op_args, __asm_args, (int x, int y, int layerface), \
847 ("r"(x), "r"(y), "r"(layerface)))
849#define __1DV1 "{%0}, [%1, {%2}]"
850#define __1DV2 "{%0, %1}, [%2, {%3}]"
851#define __1DV4 "{%0, %1, %2, %3}, [%4, {%5}]"
853#define __2DV1 "{%0}, [%1, {%2, %3}]"
854#define __2DV2 "{%0, %1}, [%2, {%3, %4}]"
855#define __2DV4 "{%0, %1, %2, %3}, [%4, {%5, %6}]"
857#define __1DLAYERV1 "{%0}, [%1, {%3, %2}]"
858#define __1DLAYERV2 "{%0, %1}, [%2, {%4, %3}]"
859#define __1DLAYERV4 "{%0, %1, %2, %3}, [%4, {%6, %5}]"
861#define __3DV1 "{%0}, [%1, {%2, %3, %4, %4}]"
862#define __3DV2 "{%0, %1}, [%2, {%3, %4, %5, %5}]"
863#define __3DV4 "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}]"
865#define __2DLAYERV1 "{%0}, [%1, {%4, %2, %3, %3}]"
866#define __2DLAYERV2 "{%0, %1}, [%2, {%5, %3, %4, %4}]"
867#define __2DLAYERV4 "{%0, %1, %2, %3}, [%4, {%7, %5, %6, %6}]"
869#define __CUBEMAPV1 "{%0}, [%1, {%4, %2, %3, %3}]"
870#define __CUBEMAPV2 "{%0, %1}, [%2, {%5, %3, %4, %4}]"
871#define __CUBEMAPV4 "{%0, %1, %2, %3}, [%4, {%7, %5, %6, %6}]"
873#define __CUBEMAPLAYERV1 "{%0}, [%1, {%4, %2, %3, %3}]"
874#define __CUBEMAPLAYERV2 "{%0, %1}, [%2, {%5, %3, %4, %4}]"
875#define __CUBEMAPLAYERV4 "{%0, %1, %2, %3}, [%4, {%7, %5, %6, %6}]"
877#define __SURF_READXD_ALL(__xdv1, __xdv2, __xdv4, __surf_readxd_v2) \
878 __surf_readxd_v2("b8", char, __xdv1, __SW_ASM_ARGS("=h")); \
879 __surf_readxd_v2("b8", signed char, __xdv1, __SW_ASM_ARGS("=h")); \
880 __surf_readxd_v2("b8", char1, __xdv1, __SW_ASM_ARGS1("=h")); \
881 __surf_readxd_v2("b8", unsigned char, __xdv1, __SW_ASM_ARGS("=h")); \
882 __surf_readxd_v2("b8", uchar1, __xdv1, __SW_ASM_ARGS1("=h")); \
883 __surf_readxd_v2("b16", short, __xdv1, __SW_ASM_ARGS("=h")); \
884 __surf_readxd_v2("b16", short1, __xdv1, __SW_ASM_ARGS1("=h")); \
885 __surf_readxd_v2("b16", unsigned short, __xdv1, __SW_ASM_ARGS("=h")); \
886 __surf_readxd_v2("b16", ushort1, __xdv1, __SW_ASM_ARGS1("=h")); \
887 __surf_readxd_v2("b32", int, __xdv1, __SW_ASM_ARGS("=r")); \
888 __surf_readxd_v2("b32", int1, __xdv1, __SW_ASM_ARGS1("=r")); \
889 __surf_readxd_v2("b32", unsigned int, __xdv1, __SW_ASM_ARGS("=r")); \
890 __surf_readxd_v2("b32", uint1, __xdv1, __SW_ASM_ARGS1("=r")); \
891 __surf_readxd_v2("b64", long long, __xdv1, __SW_ASM_ARGS("=l")); \
892 __surf_readxd_v2("b64", longlong1, __xdv1, __SW_ASM_ARGS1("=l")); \
893 __surf_readxd_v2("b64", unsigned long long, __xdv1, __SW_ASM_ARGS("=l")); \
894 __surf_readxd_v2("b64", ulonglong1, __xdv1, __SW_ASM_ARGS1("=l")); \
895 __surf_readxd_v2("b32", float, __xdv1, __SW_ASM_ARGS("=r")); \
896 __surf_readxd_v2("b32", float1, __xdv1, __SW_ASM_ARGS1("=r")); \
898 __surf_readxd_v2("v2.b8", char2, __xdv2, __SW_ASM_ARGS2("=h")); \
899 __surf_readxd_v2("v2.b8", uchar2, __xdv2, __SW_ASM_ARGS2("=h")); \
900 __surf_readxd_v2("v2.b16", short2, __xdv2, __SW_ASM_ARGS2("=h")); \
901 __surf_readxd_v2("v2.b16", ushort2, __xdv2, __SW_ASM_ARGS2("=h")); \
902 __surf_readxd_v2("v2.b32", int2, __xdv2, __SW_ASM_ARGS2("=r")); \
903 __surf_readxd_v2("v2.b32", uint2, __xdv2, __SW_ASM_ARGS2("=r")); \
904 __surf_readxd_v2("v2.b64", longlong2, __xdv2, __SW_ASM_ARGS2("=l")); \
905 __surf_readxd_v2("v2.b64", ulonglong2, __xdv2, __SW_ASM_ARGS2("=l")); \
906 __surf_readxd_v2("v2.b32", float2, __xdv2, __SW_ASM_ARGS2("=r")); \
908 __surf_readxd_v2("v4.b8", char4, __xdv4, __SW_ASM_ARGS4("=h")); \
909 __surf_readxd_v2("v4.b8", uchar4, __xdv4, __SW_ASM_ARGS4("=h")); \
910 __surf_readxd_v2("v4.b16", short4, __xdv4, __SW_ASM_ARGS4("=h")); \
911 __surf_readxd_v2("v4.b16", ushort4, __xdv4, __SW_ASM_ARGS4("=h")); \
912 __surf_readxd_v2("v4.b32", int4, __xdv4, __SW_ASM_ARGS4("=r")); \
913 __surf_readxd_v2("v4.b32", uint4, __xdv4, __SW_ASM_ARGS4("=r")); \
914 __surf_readxd_v2("v4.b32", float4, __xdv4, __SW_ASM_ARGS4("=r"))
925#define __SURF_WRITE1D_V2(__asmtype, __type, __asm_op_args, __asm_args) \
926 __SURF_WRITE_V2(__ID("__isurf1Dwrite_v2"), "1d", __asmtype, __type, "{%1}", \
927 (int x), ("r"(x)), __asm_op_args, __asm_args)
928#define __SURF_WRITE1DLAYERED_V2(__asmtype, __type, __asm_op_args, __asm_args) \
929 __SURF_WRITE_V2(__ID("__isurf1DLayeredwrite_v2"), "a1d", __asmtype, __type, \
930 "{%2, %1}", (int x, int layer), ("r"(x), "r"(layer)), \
931 __asm_op_args, __asm_args)
932#define __SURF_WRITE2D_V2(__asmtype, __type, __asm_op_args, __asm_args) \
933 __SURF_WRITE_V2(__ID("__isurf2Dwrite_v2"), "2d", __asmtype, __type, \
934 "{%1, %2}", (int x, int y), ("r"(x), "r"(y)), __asm_op_args, \
936#define __SURF_WRITE2DLAYERED_V2(__asmtype, __type, __asm_op_args, __asm_args) \
937 __SURF_WRITE_V2(__ID("__isurf2DLayeredwrite_v2"), "a2d", __asmtype, __type, \
938 "{%3, %1, %2, %2}", (int x, int y, int layer), \
939 ("r"(x), "r"(y), "r"(layer)), __asm_op_args, __asm_args)
940#define __SURF_WRITE3D_V2(__asmtype, __type, __asm_op_args, __asm_args) \
941 __SURF_WRITE_V2(__ID("__isurf3Dwrite_v2"), "3d", __asmtype, __type, \
942 "{%1, %2, %3, %3}", (int x, int y, int z), \
943 ("r"(x), "r"(y), "r"(z)), __asm_op_args, __asm_args)
945#define __SURF_CUBEMAPWRITE_V2(__asmtype, __type, __asm_op_args, __asm_args) \
946 __SURF_WRITE_V2(__ID("__isurfCubemapwrite_v2"), "a2d", __asmtype, __type, \
947 "{%3, %1, %2, %2}", (int x, int y, int face), \
948 ("r"(x), "r"(y), "r"(face)), __asm_op_args, __asm_args)
949#define __SURF_CUBEMAPLAYEREDWRITE_V2(__asmtype, __type, __asm_op_args, \
951 __SURF_WRITE_V2(__ID("__isurfCubemapLayeredwrite_v2"), "a2d", __asmtype, \
952 __type, "{%3, %1, %2, %2}", (int x, int y, int layerface), \
953 ("r"(x), "r"(y), "r"(layerface)), __asm_op_args, __asm_args)
955#define __SURF_WRITEXD_V2_ALL(__xdv1, __xdv2, __xdv4, __surf_writexd_v2) \
956 __surf_writexd_v2("b8", char, __xdv1, __SW_ASM_ARGS("h")); \
957 __surf_writexd_v2("b8", signed char, __xdv1, __SW_ASM_ARGS("h")); \
958 __surf_writexd_v2("b8", char1, __xdv1, __SW_ASM_ARGS1("h")); \
959 __surf_writexd_v2("b8", unsigned char, __xdv1, __SW_ASM_ARGS("h")); \
960 __surf_writexd_v2("b8", uchar1, __xdv1, __SW_ASM_ARGS1("h")); \
961 __surf_writexd_v2("b16", short, __xdv1, __SW_ASM_ARGS("h")); \
962 __surf_writexd_v2("b16", short1, __xdv1, __SW_ASM_ARGS1("h")); \
963 __surf_writexd_v2("b16", unsigned short, __xdv1, __SW_ASM_ARGS("h")); \
964 __surf_writexd_v2("b16", ushort1, __xdv1, __SW_ASM_ARGS1("h")); \
965 __surf_writexd_v2("b32", int, __xdv1, __SW_ASM_ARGS("r")); \
966 __surf_writexd_v2("b32", int1, __xdv1, __SW_ASM_ARGS1("r")); \
967 __surf_writexd_v2("b32", unsigned int, __xdv1, __SW_ASM_ARGS("r")); \
968 __surf_writexd_v2("b32", uint1, __xdv1, __SW_ASM_ARGS1("r")); \
969 __surf_writexd_v2("b64", long long, __xdv1, __SW_ASM_ARGS("l")); \
970 __surf_writexd_v2("b64", longlong1, __xdv1, __SW_ASM_ARGS1("l")); \
971 __surf_writexd_v2("b64", unsigned long long, __xdv1, __SW_ASM_ARGS("l")); \
972 __surf_writexd_v2("b64", ulonglong1, __xdv1, __SW_ASM_ARGS1("l")); \
973 __surf_writexd_v2("b32", float, __xdv1, __SW_ASM_ARGS("r")); \
974 __surf_writexd_v2("b32", float1, __xdv1, __SW_ASM_ARGS1("r")); \
976 __surf_writexd_v2("v2.b8", char2, __xdv2, __SW_ASM_ARGS2("h")); \
977 __surf_writexd_v2("v2.b8", uchar2, __xdv2, __SW_ASM_ARGS2("h")); \
978 __surf_writexd_v2("v2.b16", short2, __xdv2, __SW_ASM_ARGS2("h")); \
979 __surf_writexd_v2("v2.b16", ushort2, __xdv2, __SW_ASM_ARGS2("h")); \
980 __surf_writexd_v2("v2.b32", int2, __xdv2, __SW_ASM_ARGS2("r")); \
981 __surf_writexd_v2("v2.b32", uint2, __xdv2, __SW_ASM_ARGS2("r")); \
982 __surf_writexd_v2("v2.b64", longlong2, __xdv2, __SW_ASM_ARGS2("l")); \
983 __surf_writexd_v2("v2.b64", ulonglong2, __xdv2, __SW_ASM_ARGS2("l")); \
984 __surf_writexd_v2("v2.b32", float2, __xdv2, __SW_ASM_ARGS2("r")); \
986 __surf_writexd_v2("v4.b8", char4, __xdv4, __SW_ASM_ARGS4("h")); \
987 __surf_writexd_v2("v4.b8", uchar4, __xdv4, __SW_ASM_ARGS4("h")); \
988 __surf_writexd_v2("v4.b16", short4, __xdv4, __SW_ASM_ARGS4("h")); \
989 __surf_writexd_v2("v4.b16", ushort4, __xdv4, __SW_ASM_ARGS4("h")); \
990 __surf_writexd_v2("v4.b32", int4, __xdv4, __SW_ASM_ARGS4("r")); \
991 __surf_writexd_v2("v4.b32", uint4, __xdv4, __SW_ASM_ARGS4("r")); \
992 __surf_writexd_v2("v4.b32", float4, __xdv4, __SW_ASM_ARGS4("r"))
995#define __1DV2 "{%2, %3}"
996#define __1DV4 "{%2, %3, %4, %5}"
999#define __2DV2 "{%3, %4}"
1000#define __2DV4 "{%3, %4, %5, %6}"
1002#define __3DV1 "{%4}"
1003#define __3DV2 "{%4, %5}"
1004#define __3DV4 "{%4, %5, %6, %7}"
1014template <
class __op,
class __DataT,
class...
__Args>
1015__device__ static void __tex_fetch_impl(__surface_op_tag, __DataT *__ptr,
1016 cudaSurfaceObject_t __handle,
1018 __surf_read_write_v2<__op, __DataT>::__run(__ptr, __handle, __args...);
1030template <
class __op,
class __T,
class...
__Args>
1031__device__ static void __tex_fetch_impl(__texture_op_tag, __T *__ptr,
1032 cudaTextureObject_t __handle,
1034 using __FetchT =
typename __TypeInfoT<__T>::__fetch_t;
1035 *__ptr = __convert<__T, __FetchT>::__run(
1036 __tex_fetch_v4<__op>::template __run<__FetchT>(__handle, __args...));
1039template <
class __op,
class __T,
class...
__Args>
1040__device__ static void __tex_fetch(__T *__ptr, cudaTextureObject_t __handle,
1042 using op_type =
typename __op_type_traits<__op>::type;
1043 __tex_fetch_impl<__op>(op_type{}, __ptr, __handle, __args...);
1046#if CUDA_VERSION < 12000
1052__device__ cudaTextureObject_t __tex_handle_to_obj(__T __handle) {
1053 cudaTextureObject_t __obj;
1054 asm(
"mov.b64 %0, %1; " :
"=l"(__obj) :
"l"(__handle));
1060template <
class __op,
class __T,
class __HandleT,
class...
__Args>
1061__device__ static void __tex_fetch(__T *__ptr, __HandleT __handle,
1063 using __FetchT =
typename __TypeInfoT<__T>::__fetch_t;
1064 *__ptr = __convert<__T, __FetchT>::__run(
1065 __tex_fetch_v4<__op>::template __run<__FetchT>(
1066 __tex_handle_to_obj(__handle), __args...));
1071template <
class __op,
class __DataT,
class __RetT,
int __TexT,
class...
__Args>
1073__tex_fetch(__DataT *, __RetT *__ptr,
1074 texture<__DataT, __TexT, cudaReadModeNormalizedFloat> __handle,
1076 using __FetchT =
typename __TypeInfoT<__DataT>::__fetch_t;
1077 *__ptr = __convert<__RetT, float4>::__run(
1078 __tex_fetch_v4<__op>::template __run<__FetchT>(
1079 __tex_handle_to_obj(__handle), __args...));
1084template <
class __op,
class __DataT,
class __RetT,
int __TexT,
class...
__Args>
1086__tex_fetch(__DataT *, __RetT *__ptr,
1087 texture<__DataT, __TexT, cudaReadModeElementType> __handle,
1089 using __FetchT =
typename __TypeInfoT<__DataT>::__fetch_t;
1090 *__ptr = __convert<__RetT, __FetchT>::__run(
1091 __tex_fetch_v4<__op>::template __run<__FetchT>(
1092 __tex_handle_to_obj(__handle), __args...));
#define __SURF_CUBEMAPWRITE_V2(__asmtype, __type, __asm_op_args, __asm_args)
#define __SURF_READ2DLAYERED(__asmtype, __type, __asm_op_args, __asm_args)
#define __SURF_READCUBEMAPLAYERED(__asmtype, __type, __asm_op_args, __asm_args)
#define __SURF_WRITE2D_V2(__asmtype, __type, __asm_op_args, __asm_args)
#define __SURF_READ1DLAYERED(__asmtype, __type, __asm_op_args, __asm_args)
#define __SURF_WRITE2DLAYERED_V2(__asmtype, __type, __asm_op_args, __asm_args)
#define __SURF_READ3D(__asmtype, __type, __asm_op_args, __asm_args)
#define __SURF_WRITE3D_V2(__asmtype, __type, __asm_op_args, __asm_args)
#define __SURF_READCUBEMAP(__asmtype, __type, __asm_op_args, __asm_args)
#define __SURF_WRITEXD_V2_ALL(__xdv1, __xdv2, __xdv4, __surf_writexd_v2)
#define __SURF_WRITE1D_V2(__asmtype, __type, __asm_op_args, __asm_args)
#define __SURF_READ1D(__asmtype, __type, __asm_op_args, __asm_args)
#define __SURF_CUBEMAPLAYEREDWRITE_V2(__asmtype, __type, __asm_op_args, __asm_args)
#define __OP_TYPE_SURFACE(__op)
#define __SURF_READ2D(__asmtype, __type, __asm_op_args, __asm_args)
#define __IMPL_S3S(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args)
#define __IMPL_ALIAS(__op, __opn)
#define __IMPL_2DGATHER(variant, instr)
#define __SURF_READXD_ALL(__xdv1, __xdv2, __xdv4, __surf_readxd_v2)
#define __IDV(__op, __variant)
#define __IMPL_S(__op, __opn, __args, __asm_op, __ctype, __asm_op_args, __asm_args)
#define __SURF_WRITE1DLAYERED_V2(__asmtype, __type, __asm_op_args, __asm_args)