10#error "Never use <amxavx512intrin.h> directly; include <immintrin.h> instead."
13#ifndef __AMX_AVX512INTRIN_H
14#define __AMX_AVX512INTRIN_H
15#if defined(__x86_64__) && defined(__SSE2__)
17#define __DEFAULT_FN_ATTRS_AVX512 \
18 __attribute__((__always_inline__, __nodebug__, \
19 __target__("amx-avx512,avx10.2"), __min_vector_width__(512)))
53#define _tile_cvtrowd2ps(tsrc, row) __builtin_ia32_tcvtrowd2ps(tsrc, row)
87#define _tile_cvtrowd2psi(tsrc, imm8) __builtin_ia32_tcvtrowd2psi(tsrc, imm8)
123#define _tile_cvtrowps2bf16h(tsrc, row) \
124 __builtin_ia32_tcvtrowps2bf16h(tsrc, row)
160#define _tile_cvtrowps2bf16hi(tsrc, imm8) \
161 __builtin_ia32_tcvtrowps2bf16hi(tsrc, imm8)
197#define _tile_cvtrowps2bf16l(tsrc, row) \
198 __builtin_ia32_tcvtrowps2bf16l(tsrc, row)
234#define _tile_cvtrowps2bf16li(tsrc, imm8) \
235 __builtin_ia32_tcvtrowps2bf16li(tsrc, imm8)
271#define _tile_cvtrowps2phh(tsrc, row) __builtin_ia32_tcvtrowps2phh(tsrc, row)
307#define _tile_cvtrowps2phhi(tsrc, imm8) \
308 __builtin_ia32_tcvtrowps2phhi(tsrc, imm8)
344#define _tile_cvtrowps2phl(tsrc, row) __builtin_ia32_tcvtrowps2phl(tsrc, row)
380#define _tile_cvtrowps2phli(tsrc, imm8) \
381 __builtin_ia32_tcvtrowps2phli(tsrc, imm8)
413#define _tile_movrow(a, b) ((__m512i)__builtin_ia32_tilemovrow(a, b))
445#define _tile_movrowi(a, b) ((__m512i)__builtin_ia32_tilemovrowi(a, b))
449static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal(
450 unsigned short m,
unsigned short n, _tile1024i src,
unsigned u) {
451 return __builtin_ia32_tcvtrowd2ps_internal(m, n, src, u);
454static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
455_tile_cvtrowps2bf16h_internal(
unsigned short m,
unsigned short n,
456 _tile1024i src,
unsigned u) {
457 return __builtin_ia32_tcvtrowps2bf16h_internal(m, n, src, u);
460static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
461_tile_cvtrowps2bf16l_internal(
unsigned short m,
unsigned short n,
462 _tile1024i src,
unsigned u) {
463 return __builtin_ia32_tcvtrowps2bf16l_internal(m, n, src, u);
466static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal(
467 unsigned short m,
unsigned short n, _tile1024i src,
unsigned u) {
468 return __builtin_ia32_tcvtrowps2phh_internal(m, n, src, u);
471static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phl_internal(
472 unsigned short m,
unsigned short n, _tile1024i src,
unsigned u) {
473 return __builtin_ia32_tcvtrowps2phl_internal(m, n, src, u);
476static __inline__ __m512i __DEFAULT_FN_ATTRS_AVX512 _tile_movrow_internal(
477 unsigned short m,
unsigned short n, _tile1024i src,
unsigned u) {
478 return (__m512i)__builtin_ia32_tilemovrow_internal(m, n, src, u);
496__DEFAULT_FN_ATTRS_AVX512
497static __m512 __tile_cvtrowd2ps(__tile1024i src0,
unsigned src1) {
498 return _tile_cvtrowd2ps_internal(src0.row, src0.col, src0.tile, src1);
515__DEFAULT_FN_ATTRS_AVX512
516static __m512bh __tile_cvtrowps2bf16h(__tile1024i src0,
unsigned src1) {
517 return _tile_cvtrowps2bf16h_internal(src0.row, src0.col, src0.tile, src1);
534__DEFAULT_FN_ATTRS_AVX512
535static __m512bh __tile_cvtrowps2bf16l(__tile1024i src0,
unsigned src1) {
536 return _tile_cvtrowps2bf16l_internal(src0.row, src0.col, src0.tile, src1);
553__DEFAULT_FN_ATTRS_AVX512
554static __m512h __tile_cvtrowps2phh(__tile1024i src0,
unsigned src1) {
555 return _tile_cvtrowps2phh_internal(src0.row, src0.col, src0.tile, src1);
572__DEFAULT_FN_ATTRS_AVX512
573static __m512h __tile_cvtrowps2phl(__tile1024i src0,
unsigned src1) {
574 return _tile_cvtrowps2phl_internal(src0.row, src0.col, src0.tile, src1);
590__DEFAULT_FN_ATTRS_AVX512
591static __m512i __tile_movrow(__tile1024i src0,
unsigned src1) {
592 return (__m512i)_tile_movrow_internal(src0.row, src0.col, src0.tile, src1);