clang 20.0.0git
bmi2intrin.h
Go to the documentation of this file.
1/*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <bmi2intrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __BMI2INTRIN_H
15#define __BMI2INTRIN_H
16
17/* Define the default attributes for the functions in this file. */
18#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
19
20/// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits
21/// starting at bit number \a __Y.
22///
23/// \code{.operation}
24/// i := __Y[7:0]
25/// result := __X
26/// IF i < 32
27/// result[31:i] := 0
28/// FI
29/// \endcode
30///
31/// \headerfile <immintrin.h>
32///
33/// This intrinsic corresponds to the \c BZHI instruction.
34///
35/// \param __X
36/// The 32-bit source value to copy.
37/// \param __Y
38/// The lower 8 bits specify the bit number of the lowest bit to zero.
39/// \returns The partially zeroed 32-bit value.
40static __inline__ unsigned int __DEFAULT_FN_ATTRS
41_bzhi_u32(unsigned int __X, unsigned int __Y)
42{
43 return __builtin_ia32_bzhi_si(__X, __Y);
44}
45
46/// Deposit (scatter) low-order bits from the unsigned 32-bit integer \a __X
47/// into the 32-bit result, according to the mask in the unsigned 32-bit
48/// integer \a __Y. All other bits of the result are zero.
49///
50/// \code{.operation}
51/// i := 0
52/// result := 0
53/// FOR m := 0 TO 31
54/// IF __Y[m] == 1
55/// result[m] := __X[i]
56/// i := i + 1
57/// ENDIF
58/// ENDFOR
59/// \endcode
60///
61/// \headerfile <immintrin.h>
62///
63/// This intrinsic corresponds to the \c PDEP instruction.
64///
65/// \param __X
66/// The 32-bit source value to copy.
67/// \param __Y
68/// The 32-bit mask specifying where to deposit source bits.
69/// \returns The 32-bit result.
70static __inline__ unsigned int __DEFAULT_FN_ATTRS
71_pdep_u32(unsigned int __X, unsigned int __Y)
72{
73 return __builtin_ia32_pdep_si(__X, __Y);
74}
75
76/// Extract (gather) bits from the unsigned 32-bit integer \a __X into the
77/// low-order bits of the 32-bit result, according to the mask in the
78/// unsigned 32-bit integer \a __Y. All other bits of the result are zero.
79///
80/// \code{.operation}
81/// i := 0
82/// result := 0
83/// FOR m := 0 TO 31
84/// IF __Y[m] == 1
85/// result[i] := __X[m]
86/// i := i + 1
87/// ENDIF
88/// ENDFOR
89/// \endcode
90///
91/// \headerfile <immintrin.h>
92///
93/// This intrinsic corresponds to the \c PEXT instruction.
94///
95/// \param __X
96/// The 32-bit source value to copy.
97/// \param __Y
98/// The 32-bit mask specifying which source bits to extract.
99/// \returns The 32-bit result.
100static __inline__ unsigned int __DEFAULT_FN_ATTRS
101_pext_u32(unsigned int __X, unsigned int __Y)
102{
103 return __builtin_ia32_pext_si(__X, __Y);
104}
105
106/// Multiplies the unsigned 32-bit integers \a __X and \a __Y to form a
107/// 64-bit product. Stores the upper 32 bits of the product in the
108/// memory at \a __P and returns the lower 32 bits.
109///
110/// \code{.operation}
111/// Store32(__P, (__X * __Y)[63:32])
112/// result := (__X * __Y)[31:0]
113/// \endcode
114///
115/// \headerfile <immintrin.h>
116///
117/// This intrinsic corresponds to the \c MULX instruction.
118///
119/// \param __X
120/// An unsigned 32-bit multiplicand.
121/// \param __Y
122/// An unsigned 32-bit multiplicand.
123/// \param __P
124/// A pointer to memory for storing the upper half of the product.
125/// \returns The lower half of the product.
126static __inline__ unsigned int __DEFAULT_FN_ATTRS
127_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
128{
129 unsigned long long __res = (unsigned long long) __X * __Y;
130 *__P = (unsigned int)(__res >> 32);
131 return (unsigned int)__res;
132}
133
134#ifdef __x86_64__
135
136/// Copies the unsigned 64-bit integer \a __X and zeroes the upper bits
137/// starting at bit number \a __Y.
138///
139/// \code{.operation}
140/// i := __Y[7:0]
141/// result := __X
142/// IF i < 64
143/// result[63:i] := 0
144/// FI
145/// \endcode
146///
147/// \headerfile <immintrin.h>
148///
149/// This intrinsic corresponds to the \c BZHI instruction.
150///
151/// \param __X
152/// The 64-bit source value to copy.
153/// \param __Y
154/// The lower 8 bits specify the bit number of the lowest bit to zero.
155/// \returns The partially zeroed 64-bit value.
156static __inline__ unsigned long long __DEFAULT_FN_ATTRS
157_bzhi_u64(unsigned long long __X, unsigned long long __Y)
158{
159 return __builtin_ia32_bzhi_di(__X, __Y);
160}
161
162/// Deposit (scatter) low-order bits from the unsigned 64-bit integer \a __X
163/// into the 64-bit result, according to the mask in the unsigned 64-bit
164/// integer \a __Y. All other bits of the result are zero.
165///
166/// \code{.operation}
167/// i := 0
168/// result := 0
169/// FOR m := 0 TO 63
170/// IF __Y[m] == 1
171/// result[m] := __X[i]
172/// i := i + 1
173/// ENDIF
174/// ENDFOR
175/// \endcode
176///
177/// \headerfile <immintrin.h>
178///
179/// This intrinsic corresponds to the \c PDEP instruction.
180///
181/// \param __X
182/// The 64-bit source value to copy.
183/// \param __Y
184/// The 64-bit mask specifying where to deposit source bits.
185/// \returns The 64-bit result.
186static __inline__ unsigned long long __DEFAULT_FN_ATTRS
187_pdep_u64(unsigned long long __X, unsigned long long __Y)
188{
189 return __builtin_ia32_pdep_di(__X, __Y);
190}
191
192/// Extract (gather) bits from the unsigned 64-bit integer \a __X into the
193/// low-order bits of the 64-bit result, according to the mask in the
194/// unsigned 64-bit integer \a __Y. All other bits of the result are zero.
195///
196/// \code{.operation}
197/// i := 0
198/// result := 0
199/// FOR m := 0 TO 63
200/// IF __Y[m] == 1
201/// result[i] := __X[m]
202/// i := i + 1
203/// ENDIF
204/// ENDFOR
205/// \endcode
206///
207/// \headerfile <immintrin.h>
208///
209/// This intrinsic corresponds to the \c PEXT instruction.
210///
211/// \param __X
212/// The 64-bit source value to copy.
213/// \param __Y
214/// The 64-bit mask specifying which source bits to extract.
215/// \returns The 64-bit result.
216static __inline__ unsigned long long __DEFAULT_FN_ATTRS
217_pext_u64(unsigned long long __X, unsigned long long __Y)
218{
219 return __builtin_ia32_pext_di(__X, __Y);
220}
221
222/// Multiplies the unsigned 64-bit integers \a __X and \a __Y to form a
223/// 128-bit product. Stores the upper 64 bits of the product to the
224/// memory addressed by \a __P and returns the lower 64 bits.
225///
226/// \code{.operation}
227/// Store64(__P, (__X * __Y)[127:64])
228/// result := (__X * __Y)[63:0]
229/// \endcode
230///
231/// \headerfile <immintrin.h>
232///
233/// This intrinsic corresponds to the \c MULX instruction.
234///
235/// \param __X
236/// An unsigned 64-bit multiplicand.
237/// \param __Y
238/// An unsigned 64-bit multiplicand.
239/// \param __P
240/// A pointer to memory for storing the upper half of the product.
241/// \returns The lower half of the product.
242static __inline__ unsigned long long __DEFAULT_FN_ATTRS
243_mulx_u64 (unsigned long long __X, unsigned long long __Y,
244 unsigned long long *__P)
245{
246 unsigned __int128 __res = (unsigned __int128) __X * __Y;
247 *__P = (unsigned long long) (__res >> 64);
248 return (unsigned long long) __res;
249}
250
251#endif /* __x86_64__ */
252
253#undef __DEFAULT_FN_ATTRS
254
255#endif /* __BMI2INTRIN_H */
__device__ int
static __inline__ unsigned int __DEFAULT_FN_ATTRS _pext_u32(unsigned int __X, unsigned int __Y)
Extract (gather) bits from the unsigned 32-bit integer __X into the low-order bits of the 32-bit resu...
Definition: bmi2intrin.h:101
static __inline__ unsigned int __DEFAULT_FN_ATTRS _bzhi_u32(unsigned int __X, unsigned int __Y)
Copies the unsigned 32-bit integer __X and zeroes the upper bits starting at bit number __Y.
Definition: bmi2intrin.h:41
static __inline__ unsigned int __DEFAULT_FN_ATTRS _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
Multiplies the unsigned 32-bit integers __X and __Y to form a 64-bit product.
Definition: bmi2intrin.h:127
#define __DEFAULT_FN_ATTRS
Definition: bmi2intrin.h:18
static __inline__ unsigned int __DEFAULT_FN_ATTRS _pdep_u32(unsigned int __X, unsigned int __Y)
Deposit (scatter) low-order bits from the unsigned 32-bit integer __X into the 32-bit result,...
Definition: bmi2intrin.h:71
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25
__inline unsigned int unsigned int __Y
Definition: bmi2intrin.h:19