11#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
18#if defined(__cplusplus) && (__cplusplus >= 201103L)
19#define __DEFAULT_FN_ATTRS \
20 __attribute__((__always_inline__, __nodebug__, __target__("gfni"), \
21 __min_vector_width__(128))) constexpr
24#define __DEFAULT_FN_ATTRS_Y \
25 __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), \
26 __min_vector_width__(256))) constexpr
29#define __DEFAULT_FN_ATTRS_VL128 \
30 __attribute__((__always_inline__, __nodebug__, \
31 __target__("avx512bw,avx512vl,gfni"), \
32 __min_vector_width__(128))) constexpr
33#define __DEFAULT_FN_ATTRS_VL256 \
34 __attribute__((__always_inline__, __nodebug__, \
35 __target__("avx512bw,avx512vl,gfni"), \
36 __min_vector_width__(256))) constexpr
39#define __DEFAULT_FN_ATTRS_Z \
40 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"), \
41 __min_vector_width__(512))) constexpr
43#define __DEFAULT_FN_ATTRS_Z_MASK \
44 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), \
45 __min_vector_width__(512))) constexpr
47#define __DEFAULT_FN_ATTRS \
48 __attribute__((__always_inline__, __nodebug__, __target__("gfni"), \
49 __min_vector_width__(128)))
52#define __DEFAULT_FN_ATTRS_Y \
53 __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), \
54 __min_vector_width__(256)))
57#define __DEFAULT_FN_ATTRS_VL128 \
58 __attribute__((__always_inline__, __nodebug__, \
59 __target__("avx512bw,avx512vl,gfni"), \
60 __min_vector_width__(128)))
61#define __DEFAULT_FN_ATTRS_VL256 \
62 __attribute__((__always_inline__, __nodebug__, \
63 __target__("avx512bw,avx512vl,gfni"), \
64 __min_vector_width__(256)))
67#define __DEFAULT_FN_ATTRS_Z \
68 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"), \
69 __min_vector_width__(512)))
71#define __DEFAULT_FN_ATTRS_Z_MASK \
72 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), \
73 __min_vector_width__(512)))
76#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \
77 ((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
78 (__v16qi)(__m128i)(B), \
81#define _mm_gf2p8affine_epi64_epi8(A, B, I) \
82 ((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
83 (__v16qi)(__m128i)(B), \
89 return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
94#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \
95 ((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \
96 (__v32qi)(__m256i)(B), \
99#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \
100 ((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \
101 (__v32qi)(__m256i)(B), \
105_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)
107 return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,
112#ifdef __AVX512BWINTRIN_H
113#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \
114 ((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \
115 (__v64qi)(__m512i)(B), \
118#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
119 ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
120 (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \
121 (__v64qi)(__m512i)(S)))
123#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
124 _mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \
127#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \
128 ((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \
129 (__v64qi)(__m512i)(B), \
132#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
133 ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
134 (__v64qi)_mm512_gf2p8affine_epi64_epi8((A), (B), (I)), \
135 (__v64qi)(__m512i)(S)))
137#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
138 _mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \
142_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)
144 return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A,
149_mm512_mask_gf2p8mul_epi8(__m512i __S,
__mmask64 __U, __m512i __A, __m512i __B)
151 return (__m512i) __builtin_ia32_selectb_512(__U,
152 (__v64qi) _mm512_gf2p8mul_epi8(__A, __B),
157_mm512_maskz_gf2p8mul_epi8(
__mmask64 __U, __m512i __A, __m512i __B)
164#ifdef __AVX512VLBWINTRIN_H
165#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
166 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
167 (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \
168 (__v16qi)(__m128i)(S)))
170#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
171 _mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \
174#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
175 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
176 (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \
177 (__v32qi)(__m256i)(S)))
179#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
180 _mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
183#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
184 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
185 (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \
186 (__v16qi)(__m128i)(S)))
188#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
189 _mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), U, A, B, I)
191#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
192 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
193 (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \
194 (__v32qi)(__m256i)(S)))
196#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
197 _mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \
201_mm_mask_gf2p8mul_epi8(__m128i __S,
__mmask16 __U, __m128i __A, __m128i __B)
203 return (__m128i) __builtin_ia32_selectb_128(__U,
209_mm_maskz_gf2p8mul_epi8(
__mmask16 __U, __m128i __A, __m128i __B)
216_mm256_mask_gf2p8mul_epi8(__m256i __S,
__mmask32 __U, __m256i __A, __m256i __B)
218 return (__m256i) __builtin_ia32_selectb_256(__U,
219 (__v32qi) _mm256_gf2p8mul_epi8(__A, __B),
224_mm256_maskz_gf2p8mul_epi8(
__mmask32 __U, __m256i __A, __m256i __B)
231#undef __DEFAULT_FN_ATTRS
232#undef __DEFAULT_FN_ATTRS_Y
233#undef __DEFAULT_FN_ATTRS_Z
234#undef __DEFAULT_FN_ATTRS_VL128
235#undef __DEFAULT_FN_ATTRS_VL256
#define __DEFAULT_FN_ATTRS
unsigned long long __mmask64
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_si512(void)
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
#define __DEFAULT_FN_ATTRS_Y
#define __DEFAULT_FN_ATTRS_VL128
#define __DEFAULT_FN_ATTRS_Z_MASK
#define __DEFAULT_FN_ATTRS_VL256
#define __DEFAULT_FN_ATTRS_Z
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_gf2p8mul_epi8(__m128i __A, __m128i __B)