clang 20.0.0git
pmmintrin.h
Go to the documentation of this file.
1/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __PMMINTRIN_H
11#define __PMMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17#include <emmintrin.h>
18
19/* Define the default attributes for the functions in this file. */
20#if defined(__EVEX512__) && !defined(__AVX10_1_512__)
21#define __DEFAULT_FN_ATTRS \
22 __attribute__((__always_inline__, __nodebug__, \
23 __target__("sse3,no-evex512"), __min_vector_width__(128)))
24#else
25#define __DEFAULT_FN_ATTRS \
26 __attribute__((__always_inline__, __nodebug__, __target__("sse3"), \
27 __min_vector_width__(128)))
28#endif
29
30#if defined(__cplusplus) && (__cplusplus >= 201103L)
31#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
32#else
33#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
34#endif
35
36/// Loads data from an unaligned memory location to elements in a 128-bit
37/// vector.
38///
39/// If the address of the data is not 16-byte aligned, the instruction may
40/// read two adjacent aligned blocks of memory to retrieve the requested
41/// data.
42///
43/// \headerfile <x86intrin.h>
44///
45/// This intrinsic corresponds to the <c> VLDDQU </c> instruction.
46///
47/// \param __p
48/// A pointer to a 128-bit integer vector containing integer values.
49/// \returns A 128-bit vector containing the moved values.
50static __inline__ __m128i __DEFAULT_FN_ATTRS
51_mm_lddqu_si128(__m128i_u const *__p)
52{
53 return (__m128i)__builtin_ia32_lddqu((char const *)__p);
54}
55
56/// Adds the even-indexed values and subtracts the odd-indexed values of
57/// two 128-bit vectors of [4 x float].
58///
59/// \headerfile <x86intrin.h>
60///
61/// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.
62///
63/// \param __a
64/// A 128-bit vector of [4 x float] containing the left source operand.
65/// \param __b
66/// A 128-bit vector of [4 x float] containing the right source operand.
67/// \returns A 128-bit vector of [4 x float] containing the alternating sums and
68/// differences of both operands.
69static __inline__ __m128 __DEFAULT_FN_ATTRS
70_mm_addsub_ps(__m128 __a, __m128 __b)
71{
72 return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
73}
74
75/// Horizontally adds the adjacent pairs of values contained in two
76/// 128-bit vectors of [4 x float].
77///
78/// \headerfile <x86intrin.h>
79///
80/// This intrinsic corresponds to the <c> VHADDPS </c> instruction.
81///
82/// \param __a
83/// A 128-bit vector of [4 x float] containing one of the source operands.
84/// The horizontal sums of the values are stored in the lower bits of the
85/// destination.
86/// \param __b
87/// A 128-bit vector of [4 x float] containing one of the source operands.
88/// The horizontal sums of the values are stored in the upper bits of the
89/// destination.
90/// \returns A 128-bit vector of [4 x float] containing the horizontal sums of
91/// both operands.
92static __inline__ __m128 __DEFAULT_FN_ATTRS
93_mm_hadd_ps(__m128 __a, __m128 __b)
94{
95 return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
96}
97
98/// Horizontally subtracts the adjacent pairs of values contained in two
99/// 128-bit vectors of [4 x float].
100///
101/// \headerfile <x86intrin.h>
102///
103/// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.
104///
105/// \param __a
106/// A 128-bit vector of [4 x float] containing one of the source operands.
107/// The horizontal differences between the values are stored in the lower
108/// bits of the destination.
109/// \param __b
110/// A 128-bit vector of [4 x float] containing one of the source operands.
111/// The horizontal differences between the values are stored in the upper
112/// bits of the destination.
113/// \returns A 128-bit vector of [4 x float] containing the horizontal
114/// differences of both operands.
115static __inline__ __m128 __DEFAULT_FN_ATTRS
116_mm_hsub_ps(__m128 __a, __m128 __b)
117{
118 return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
119}
120
121/// Moves and duplicates odd-indexed values from a 128-bit vector
122/// of [4 x float] to float values stored in a 128-bit vector of
123/// [4 x float].
124///
125/// \headerfile <x86intrin.h>
126///
127/// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.
128///
129/// \param __a
130/// A 128-bit vector of [4 x float]. \n
131/// Bits [127:96] of the source are written to bits [127:96] and [95:64] of
132/// the destination. \n
133/// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the
134/// destination.
135/// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
136/// values.
137static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
139{
140 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
141}
142
143/// Duplicates even-indexed values from a 128-bit vector of
144/// [4 x float] to float values stored in a 128-bit vector of [4 x float].
145///
146/// \headerfile <x86intrin.h>
147///
148/// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.
149///
150/// \param __a
151/// A 128-bit vector of [4 x float] \n
152/// Bits [95:64] of the source are written to bits [127:96] and [95:64] of
153/// the destination. \n
154/// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the
155/// destination.
156/// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
157/// values.
158static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
160{
161 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);
162}
163
164/// Adds the even-indexed values and subtracts the odd-indexed values of
165/// two 128-bit vectors of [2 x double].
166///
167/// \headerfile <x86intrin.h>
168///
169/// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.
170///
171/// \param __a
172/// A 128-bit vector of [2 x double] containing the left source operand.
173/// \param __b
174/// A 128-bit vector of [2 x double] containing the right source operand.
175/// \returns A 128-bit vector of [2 x double] containing the alternating sums
176/// and differences of both operands.
177static __inline__ __m128d __DEFAULT_FN_ATTRS
178_mm_addsub_pd(__m128d __a, __m128d __b)
179{
180 return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
181}
182
183/// Horizontally adds the pairs of values contained in two 128-bit
184/// vectors of [2 x double].
185///
186/// \headerfile <x86intrin.h>
187///
188/// This intrinsic corresponds to the <c> VHADDPD </c> instruction.
189///
190/// \param __a
191/// A 128-bit vector of [2 x double] containing one of the source operands.
192/// The horizontal sum of the values is stored in the lower bits of the
193/// destination.
194/// \param __b
195/// A 128-bit vector of [2 x double] containing one of the source operands.
196/// The horizontal sum of the values is stored in the upper bits of the
197/// destination.
198/// \returns A 128-bit vector of [2 x double] containing the horizontal sums of
199/// both operands.
200static __inline__ __m128d __DEFAULT_FN_ATTRS
201_mm_hadd_pd(__m128d __a, __m128d __b)
202{
203 return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
204}
205
206/// Horizontally subtracts the pairs of values contained in two 128-bit
207/// vectors of [2 x double].
208///
209/// \headerfile <x86intrin.h>
210///
211/// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.
212///
213/// \param __a
214/// A 128-bit vector of [2 x double] containing one of the source operands.
215/// The horizontal difference of the values is stored in the lower bits of
216/// the destination.
217/// \param __b
218/// A 128-bit vector of [2 x double] containing one of the source operands.
219/// The horizontal difference of the values is stored in the upper bits of
220/// the destination.
221/// \returns A 128-bit vector of [2 x double] containing the horizontal
222/// differences of both operands.
223static __inline__ __m128d __DEFAULT_FN_ATTRS
224_mm_hsub_pd(__m128d __a, __m128d __b)
225{
226 return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
227}
228
229/// Moves and duplicates one double-precision value to double-precision
230/// values stored in a 128-bit vector of [2 x double].
231///
232/// \headerfile <x86intrin.h>
233///
234/// \code
235/// __m128d _mm_loaddup_pd(double const *dp);
236/// \endcode
237///
238/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
239///
240/// \param dp
241/// A pointer to a double-precision value to be moved and duplicated.
242/// \returns A 128-bit vector of [2 x double] containing the moved and
243/// duplicated values.
244#define _mm_loaddup_pd(dp) _mm_load1_pd(dp)
245
246/// Moves and duplicates the double-precision value in the lower bits of
247/// a 128-bit vector of [2 x double] to double-precision values stored in a
248/// 128-bit vector of [2 x double].
249///
250/// \headerfile <x86intrin.h>
251///
252/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
253///
254/// \param __a
255/// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits
256/// [127:64] and [63:0] of the destination.
257/// \returns A 128-bit vector of [2 x double] containing the moved and
258/// duplicated values.
259static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
261{
262 return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
263}
264
265/// Establishes a linear address memory range to be monitored and puts
266/// the processor in the monitor event pending state. Data stored in the
267/// monitored address range causes the processor to exit the pending state.
268///
269/// The \c MONITOR instruction can be used in kernel mode, and in other modes
270/// if MSR <c> C001_0015h[MonMwaitUserEn] </c> is set.
271///
272/// \headerfile <x86intrin.h>
273///
274/// This intrinsic corresponds to the \c MONITOR instruction.
275///
276/// \param __p
277/// The memory range to be monitored. The size of the range is determined by
278/// CPUID function 0000_0005h.
279/// \param __extensions
280/// Optional extensions for the monitoring state.
281/// \param __hints
282/// Optional hints for the monitoring state.
283static __inline__ void __DEFAULT_FN_ATTRS
284_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
285{
286 __builtin_ia32_monitor(__p, __extensions, __hints);
287}
288
289/// Used with the \c MONITOR instruction to wait while the processor is in
290/// the monitor event pending state. Data stored in the monitored address
291/// range, or an interrupt, causes the processor to exit the pending state.
292///
293/// The \c MWAIT instruction can be used in kernel mode, and in other modes if
294/// MSR <c> C001_0015h[MonMwaitUserEn] </c> is set.
295///
296/// \headerfile <x86intrin.h>
297///
298/// This intrinsic corresponds to the \c MWAIT instruction.
299///
300/// \param __extensions
301/// Optional extensions for the monitoring state, which can vary by
302/// processor.
303/// \param __hints
304/// Optional hints for the monitoring state, which can vary by processor.
305static __inline__ void __DEFAULT_FN_ATTRS
306_mm_mwait(unsigned __extensions, unsigned __hints)
307{
308 __builtin_ia32_mwait(__extensions, __hints);
309}
310
311#undef __DEFAULT_FN_ATTRS
312#undef __DEFAULT_FN_ATTRS_CONSTEXPR
313
314#endif /* __PMMINTRIN_H */
static __inline__ vector float vector float __b
Definition: altivec.h:578
static __inline__ uint32_t volatile uint32_t * __p
Definition: arm_acle.h:88
static __inline__ void int __a
Definition: emmintrin.h:4079
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hadd_pd(__m128d __a, __m128d __b)
Horizontally adds the pairs of values contained in two 128-bit vectors of [2 x double].
Definition: pmmintrin.h:201
#define __DEFAULT_FN_ATTRS
Definition: pmmintrin.h:25
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hadd_ps(__m128 __a, __m128 __b)
Horizontally adds the adjacent pairs of values contained in two 128-bit vectors of [4 x float].
Definition: pmmintrin.h:93
static __inline__ void __DEFAULT_FN_ATTRS _mm_mwait(unsigned __extensions, unsigned __hints)
Used with the MONITOR instruction to wait while the processor is in the monitor event pending state.
Definition: pmmintrin.h:306
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_addsub_pd(__m128d __a, __m128d __b)
Adds the even-indexed values and subtracts the odd-indexed values of two 128-bit vectors of [2 x doub...
Definition: pmmintrin.h:178
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hsub_pd(__m128d __a, __m128d __b)
Horizontally subtracts the pairs of values contained in two 128-bit vectors of [2 x double].
Definition: pmmintrin.h:224
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movedup_pd(__m128d __a)
Moves and duplicates the double-precision value in the lower bits of a 128-bit vector of [2 x double]...
Definition: pmmintrin.h:260
#define __DEFAULT_FN_ATTRS_CONSTEXPR
Definition: pmmintrin.h:33
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_moveldup_ps(__m128 __a)
Duplicates even-indexed values from a 128-bit vector of [4 x float] to float values stored in a 128-b...
Definition: pmmintrin.h:159
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_addsub_ps(__m128 __a, __m128 __b)
Adds the even-indexed values and subtracts the odd-indexed values of two 128-bit vectors of [4 x floa...
Definition: pmmintrin.h:70
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hsub_ps(__m128 __a, __m128 __b)
Horizontally subtracts the adjacent pairs of values contained in two 128-bit vectors of [4 x float].
Definition: pmmintrin.h:116
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_lddqu_si128(__m128i_u const *__p)
Loads data from an unaligned memory location to elements in a 128-bit vector.
Definition: pmmintrin.h:51
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movehdup_ps(__m128 __a)
Moves and duplicates odd-indexed values from a 128-bit vector of [4 x float] to float values stored i...
Definition: pmmintrin.h:138
static __inline__ void __DEFAULT_FN_ATTRS _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
Establishes a linear address memory range to be monitored and puts the processor in the monitor event...
Definition: pmmintrin.h:284