clang 19.0.0git
avx512erintrin.h
Go to the documentation of this file.
1/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512ERINTRIN_H
14#define __AVX512ERINTRIN_H
15
16/* exp2a23 */
17#define _mm512_exp2a23_round_pd(A, R) \
18 ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
19 (__v8df)_mm512_setzero_pd(), \
20 (__mmask8)-1, (int)(R)))
21
22#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
23 ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
24 (__v8df)(__m512d)(S), (__mmask8)(M), \
25 (int)(R)))
26
27#define _mm512_maskz_exp2a23_round_pd(M, A, R) \
28 ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
29 (__v8df)_mm512_setzero_pd(), \
30 (__mmask8)(M), (int)(R)))
31
32#define _mm512_exp2a23_pd(A) \
33 _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
34
35#define _mm512_mask_exp2a23_pd(S, M, A) \
36 _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
37
38#define _mm512_maskz_exp2a23_pd(M, A) \
39 _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
40
41#define _mm512_exp2a23_round_ps(A, R) \
42 ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
43 (__v16sf)_mm512_setzero_ps(), \
44 (__mmask16)-1, (int)(R)))
45
46#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
47 ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
48 (__v16sf)(__m512)(S), (__mmask16)(M), \
49 (int)(R)))
50
51#define _mm512_maskz_exp2a23_round_ps(M, A, R) \
52 ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
53 (__v16sf)_mm512_setzero_ps(), \
54 (__mmask16)(M), (int)(R)))
55
56#define _mm512_exp2a23_ps(A) \
57 _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
58
59#define _mm512_mask_exp2a23_ps(S, M, A) \
60 _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
61
62#define _mm512_maskz_exp2a23_ps(M, A) \
63 _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
64
65/* rsqrt28 */
66#define _mm512_rsqrt28_round_pd(A, R) \
67 ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
68 (__v8df)_mm512_setzero_pd(), \
69 (__mmask8)-1, (int)(R)))
70
71#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
72 ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
73 (__v8df)(__m512d)(S), (__mmask8)(M), \
74 (int)(R)))
75
76#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
77 ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
78 (__v8df)_mm512_setzero_pd(), \
79 (__mmask8)(M), (int)(R)))
80
81#define _mm512_rsqrt28_pd(A) \
82 _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
83
84#define _mm512_mask_rsqrt28_pd(S, M, A) \
85 _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
86
87#define _mm512_maskz_rsqrt28_pd(M, A) \
88 _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
89
90#define _mm512_rsqrt28_round_ps(A, R) \
91 ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
92 (__v16sf)_mm512_setzero_ps(), \
93 (__mmask16)-1, (int)(R)))
94
95#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
96 ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
97 (__v16sf)(__m512)(S), (__mmask16)(M), \
98 (int)(R)))
99
100#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
101 ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
102 (__v16sf)_mm512_setzero_ps(), \
103 (__mmask16)(M), (int)(R)))
104
105#define _mm512_rsqrt28_ps(A) \
106 _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
107
108#define _mm512_mask_rsqrt28_ps(S, M, A) \
109 _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
110
111#define _mm512_maskz_rsqrt28_ps(M, A) \
112 _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
113
114#define _mm_rsqrt28_round_ss(A, B, R) \
115 ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
116 (__v4sf)(__m128)(B), \
117 (__v4sf)_mm_setzero_ps(), \
118 (__mmask8)-1, (int)(R)))
119
120#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
121 ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
122 (__v4sf)(__m128)(B), \
123 (__v4sf)(__m128)(S), \
124 (__mmask8)(M), (int)(R)))
125
126#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
127 ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
128 (__v4sf)(__m128)(B), \
129 (__v4sf)_mm_setzero_ps(), \
130 (__mmask8)(M), (int)(R)))
131
132#define _mm_rsqrt28_ss(A, B) \
133 _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
134
135#define _mm_mask_rsqrt28_ss(S, M, A, B) \
136 _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
137
138#define _mm_maskz_rsqrt28_ss(M, A, B) \
139 _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
140
141#define _mm_rsqrt28_round_sd(A, B, R) \
142 ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
143 (__v2df)(__m128d)(B), \
144 (__v2df)_mm_setzero_pd(), \
145 (__mmask8)-1, (int)(R)))
146
147#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
148 ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
149 (__v2df)(__m128d)(B), \
150 (__v2df)(__m128d)(S), \
151 (__mmask8)(M), (int)(R)))
152
153#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
154 ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
155 (__v2df)(__m128d)(B), \
156 (__v2df)_mm_setzero_pd(), \
157 (__mmask8)(M), (int)(R)))
158
159#define _mm_rsqrt28_sd(A, B) \
160 _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
161
162#define _mm_mask_rsqrt28_sd(S, M, A, B) \
163 _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
164
165#define _mm_maskz_rsqrt28_sd(M, A, B) \
166 _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
167
168/* rcp28 */
169#define _mm512_rcp28_round_pd(A, R) \
170 ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
171 (__v8df)_mm512_setzero_pd(), \
172 (__mmask8)-1, (int)(R)))
173
174#define _mm512_mask_rcp28_round_pd(S, M, A, R) \
175 ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
176 (__v8df)(__m512d)(S), (__mmask8)(M), \
177 (int)(R)))
178
179#define _mm512_maskz_rcp28_round_pd(M, A, R) \
180 ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
181 (__v8df)_mm512_setzero_pd(), \
182 (__mmask8)(M), (int)(R)))
183
184#define _mm512_rcp28_pd(A) \
185 _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
186
187#define _mm512_mask_rcp28_pd(S, M, A) \
188 _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
189
190#define _mm512_maskz_rcp28_pd(M, A) \
191 _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
192
193#define _mm512_rcp28_round_ps(A, R) \
194 ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
195 (__v16sf)_mm512_setzero_ps(), \
196 (__mmask16)-1, (int)(R)))
197
198#define _mm512_mask_rcp28_round_ps(S, M, A, R) \
199 ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
200 (__v16sf)(__m512)(S), (__mmask16)(M), \
201 (int)(R)))
202
203#define _mm512_maskz_rcp28_round_ps(M, A, R) \
204 ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
205 (__v16sf)_mm512_setzero_ps(), \
206 (__mmask16)(M), (int)(R)))
207
208#define _mm512_rcp28_ps(A) \
209 _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
210
211#define _mm512_mask_rcp28_ps(S, M, A) \
212 _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
213
214#define _mm512_maskz_rcp28_ps(M, A) \
215 _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
216
217#define _mm_rcp28_round_ss(A, B, R) \
218 ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
219 (__v4sf)(__m128)(B), \
220 (__v4sf)_mm_setzero_ps(), \
221 (__mmask8)-1, (int)(R)))
222
223#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \
224 ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
225 (__v4sf)(__m128)(B), \
226 (__v4sf)(__m128)(S), \
227 (__mmask8)(M), (int)(R)))
228
229#define _mm_maskz_rcp28_round_ss(M, A, B, R) \
230 ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
231 (__v4sf)(__m128)(B), \
232 (__v4sf)_mm_setzero_ps(), \
233 (__mmask8)(M), (int)(R)))
234
235#define _mm_rcp28_ss(A, B) \
236 _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
237
238#define _mm_mask_rcp28_ss(S, M, A, B) \
239 _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
240
241#define _mm_maskz_rcp28_ss(M, A, B) \
242 _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
243
244#define _mm_rcp28_round_sd(A, B, R) \
245 ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
246 (__v2df)(__m128d)(B), \
247 (__v2df)_mm_setzero_pd(), \
248 (__mmask8)-1, (int)(R)))
249
250#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \
251 ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
252 (__v2df)(__m128d)(B), \
253 (__v2df)(__m128d)(S), \
254 (__mmask8)(M), (int)(R)))
255
256#define _mm_maskz_rcp28_round_sd(M, A, B, R) \
257 ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
258 (__v2df)(__m128d)(B), \
259 (__v2df)_mm_setzero_pd(), \
260 (__mmask8)(M), (int)(R)))
261
262#define _mm_rcp28_sd(A, B) \
263 _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
264
265#define _mm_mask_rcp28_sd(S, M, A, B) \
266 _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
267
268#define _mm_maskz_rcp28_sd(M, A, B) \
269 _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
270
271#endif /* __AVX512ERINTRIN_H */