clang 22.0.0git
gfniintrin.h
Go to the documentation of this file.
1/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------===
2 *
3 *
4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *
8 *===-----------------------------------------------------------------------===
9 */
10#ifndef __IMMINTRIN_H
11#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __GFNIINTRIN_H
15#define __GFNIINTRIN_H
16
17/* Default attributes for simple form (no masking). */
18#if defined(__cplusplus) && (__cplusplus >= 201103L)
19#define __DEFAULT_FN_ATTRS \
20 __attribute__((__always_inline__, __nodebug__, __target__("gfni"), \
21 __min_vector_width__(128))) constexpr
22
23/* Default attributes for YMM unmasked form. */
24#define __DEFAULT_FN_ATTRS_Y \
25 __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), \
26 __min_vector_width__(256))) constexpr
27
28/* Default attributes for VLX masked forms. */
29#define __DEFAULT_FN_ATTRS_VL128 \
30 __attribute__((__always_inline__, __nodebug__, \
31 __target__("avx512bw,avx512vl,gfni"), \
32 __min_vector_width__(128))) constexpr
33#define __DEFAULT_FN_ATTRS_VL256 \
34 __attribute__((__always_inline__, __nodebug__, \
35 __target__("avx512bw,avx512vl,gfni"), \
36 __min_vector_width__(256))) constexpr
37
38/* Default attributes for ZMM unmasked forms. */
39#define __DEFAULT_FN_ATTRS_Z \
40 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"), \
41 __min_vector_width__(512))) constexpr
42/* Default attributes for ZMM masked forms. */
43#define __DEFAULT_FN_ATTRS_Z_MASK \
44 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), \
45 __min_vector_width__(512))) constexpr
46#else
47#define __DEFAULT_FN_ATTRS \
48 __attribute__((__always_inline__, __nodebug__, __target__("gfni"), \
49 __min_vector_width__(128)))
50
51/* Default attributes for YMM unmasked form. */
52#define __DEFAULT_FN_ATTRS_Y \
53 __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), \
54 __min_vector_width__(256)))
55
56/* Default attributes for VLX masked forms. */
57#define __DEFAULT_FN_ATTRS_VL128 \
58 __attribute__((__always_inline__, __nodebug__, \
59 __target__("avx512bw,avx512vl,gfni"), \
60 __min_vector_width__(128)))
61#define __DEFAULT_FN_ATTRS_VL256 \
62 __attribute__((__always_inline__, __nodebug__, \
63 __target__("avx512bw,avx512vl,gfni"), \
64 __min_vector_width__(256)))
65
66/* Default attributes for ZMM unmasked forms. */
67#define __DEFAULT_FN_ATTRS_Z \
68 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"), \
69 __min_vector_width__(512)))
70/* Default attributes for ZMM masked forms. */
71#define __DEFAULT_FN_ATTRS_Z_MASK \
72 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), \
73 __min_vector_width__(512)))
74#endif
75
76#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \
77 ((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
78 (__v16qi)(__m128i)(B), \
79 (char)(I)))
80
81#define _mm_gf2p8affine_epi64_epi8(A, B, I) \
82 ((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
83 (__v16qi)(__m128i)(B), \
84 (char)(I)))
85
86static __inline__ __m128i __DEFAULT_FN_ATTRS
87_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)
88{
89 return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
90 (__v16qi) __B);
91}
92
93#ifdef __AVXINTRIN_H
94#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \
95 ((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \
96 (__v32qi)(__m256i)(B), \
97 (char)(I)))
98
99#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \
100 ((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \
101 (__v32qi)(__m256i)(B), \
102 (char)(I)))
103
104static __inline__ __m256i __DEFAULT_FN_ATTRS_Y
105_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)
106{
107 return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,
108 (__v32qi) __B);
109}
110#endif /* __AVXINTRIN_H */
111
112#ifdef __AVX512BWINTRIN_H
113#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \
114 ((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \
115 (__v64qi)(__m512i)(B), \
116 (char)(I)))
117
118#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
119 ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
120 (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \
121 (__v64qi)(__m512i)(S)))
122
123#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
124 _mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \
125 U, A, B, I)
126
127#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \
128 ((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \
129 (__v64qi)(__m512i)(B), \
130 (char)(I)))
131
132#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
133 ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
134 (__v64qi)_mm512_gf2p8affine_epi64_epi8((A), (B), (I)), \
135 (__v64qi)(__m512i)(S)))
136
137#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
138 _mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \
139 U, A, B, I)
140
141static __inline__ __m512i __DEFAULT_FN_ATTRS_Z
142_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)
143{
144 return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A,
145 (__v64qi) __B);
146}
147
148static __inline__ __m512i __DEFAULT_FN_ATTRS_Z_MASK
149_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)
150{
151 return (__m512i) __builtin_ia32_selectb_512(__U,
152 (__v64qi) _mm512_gf2p8mul_epi8(__A, __B),
153 (__v64qi) __S);
154}
155
156static __inline__ __m512i __DEFAULT_FN_ATTRS_Z_MASK
157_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B)
158{
159 return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(),
160 __U, __A, __B);
161}
162#endif /* __AVX512BWINTRIN_H */
163
164#ifdef __AVX512VLBWINTRIN_H
165#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
166 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
167 (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \
168 (__v16qi)(__m128i)(S)))
169
170#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
171 _mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \
172 U, A, B, I)
173
174#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
175 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
176 (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \
177 (__v32qi)(__m256i)(S)))
178
179#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
180 _mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
181 U, A, B, I)
182
183#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
184 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
185 (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \
186 (__v16qi)(__m128i)(S)))
187
188#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
189 _mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), U, A, B, I)
190
191#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
192 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
193 (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \
194 (__v32qi)(__m256i)(S)))
195
196#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
197 _mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \
198 U, A, B, I)
199
200static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128
201_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)
202{
203 return (__m128i) __builtin_ia32_selectb_128(__U,
204 (__v16qi) _mm_gf2p8mul_epi8(__A, __B),
205 (__v16qi) __S);
206}
207
208static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128
209_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B)
210{
211 return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(),
212 __U, __A, __B);
213}
214
215static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256
216_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B)
217{
218 return (__m256i) __builtin_ia32_selectb_256(__U,
219 (__v32qi) _mm256_gf2p8mul_epi8(__A, __B),
220 (__v32qi) __S);
221}
222
223static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256
224_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B)
225{
226 return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(),
227 __U, __A, __B);
228}
229#endif /* __AVX512VLBWINTRIN_H */
230
231#undef __DEFAULT_FN_ATTRS
232#undef __DEFAULT_FN_ATTRS_Y
233#undef __DEFAULT_FN_ATTRS_Z
234#undef __DEFAULT_FN_ATTRS_VL128
235#undef __DEFAULT_FN_ATTRS_VL256
236
237#endif /* __GFNIINTRIN_H */
238
#define __DEFAULT_FN_ATTRS
unsigned long long __mmask64
unsigned int __mmask32
unsigned short __mmask16
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_si512(void)
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4303
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3878
#define __DEFAULT_FN_ATTRS_Y
Definition gfniintrin.h:52
#define __DEFAULT_FN_ATTRS_VL128
Definition gfniintrin.h:57
#define __DEFAULT_FN_ATTRS_Z_MASK
Definition gfniintrin.h:71
#define __DEFAULT_FN_ATTRS_VL256
Definition gfniintrin.h:61
#define __DEFAULT_FN_ATTRS_Z
Definition gfniintrin.h:67
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_gf2p8mul_epi8(__m128i __A, __m128i __B)
Definition gfniintrin.h:87