clang 22.0.0git
mmintrin.h
Go to the documentation of this file.
1/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __MMINTRIN_H
11#define __MMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8)));
18
19typedef long long __v1di __attribute__((__vector_size__(8)));
20typedef int __v2si __attribute__((__vector_size__(8)));
21typedef short __v4hi __attribute__((__vector_size__(8)));
22typedef char __v8qi __attribute__((__vector_size__(8)));
23
24/* Unsigned types */
25typedef unsigned long long __v1du __attribute__ ((__vector_size__ (8)));
26typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
27typedef unsigned short __v4hu __attribute__((__vector_size__(8)));
28typedef unsigned char __v8qu __attribute__((__vector_size__(8)));
29
30/* We need an explicitly signed variant for char. Note that this shouldn't
31 * appear in the interface though. */
32typedef signed char __v8qs __attribute__((__vector_size__(8)));
33
34/* SSE/SSE2 types */
35typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
36typedef long long __v2di __attribute__ ((__vector_size__ (16)));
37typedef int __v4si __attribute__((__vector_size__(16)));
38typedef short __v8hi __attribute__((__vector_size__(16)));
39typedef char __v16qi __attribute__((__vector_size__(16)));
40
41/* Define the default attributes for the functions in this file. */
42#if defined(__cplusplus) && (__cplusplus >= 201103L)
43#define __DEFAULT_FN_ATTRS_SSE2 \
44 __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \
45 __min_vector_width__(128))) constexpr
46#else
47#define __DEFAULT_FN_ATTRS_SSE2 \
48 __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \
49 __min_vector_width__(128)))
50#endif
51
52#define __trunc64(x) \
53 (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
54#define __zext128(x) \
55 (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
56 1, 2, 3)
57
58/// Clears the MMX state by setting the state of the x87 stack registers
59/// to empty.
60///
61/// \headerfile <x86intrin.h>
62///
63/// This intrinsic corresponds to the <c> EMMS </c> instruction.
64///
65static __inline__ void
66 __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
67 _mm_empty(void) {
68 __builtin_ia32_emms();
69}
70
71/// Constructs a 64-bit integer vector, setting the lower 32 bits to the
72/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.
73///
74/// \headerfile <x86intrin.h>
75///
76/// This intrinsic corresponds to the <c> MOVD </c> instruction.
77///
78/// \param __i
79/// A 32-bit integer value.
80/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
81/// parameter. The upper 32 bits are set to 0.
82static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi32_si64(int __i) {
83 return __extension__(__m64)(__v2si){__i, 0};
84}
85
86/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
87/// signed integer.
88///
89/// \headerfile <x86intrin.h>
90///
91/// This intrinsic corresponds to the <c> MOVD </c> instruction.
92///
93/// \param __m
94/// A 64-bit integer vector.
95/// \returns A 32-bit signed integer value containing the lower 32 bits of the
96/// parameter.
97static __inline__ int __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_si32(__m64 __m) {
98 return ((__v2si)__m)[0];
99}
100
101/// Casts a 64-bit signed integer value into a 64-bit integer vector.
102///
103/// \headerfile <x86intrin.h>
104///
105/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
106///
107/// \param __i
108/// A 64-bit signed integer.
109/// \returns A 64-bit integer vector containing the same bitwise pattern as the
110/// parameter.
111static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_m64(long long __i) {
112 return __extension__(__m64)(__v1di){__i};
113}
114
115/// Casts a 64-bit integer vector into a 64-bit signed integer value.
116///
117/// \headerfile <x86intrin.h>
118///
119/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
120///
121/// \param __m
122/// A 64-bit integer vector.
123/// \returns A 64-bit signed integer containing the same bitwise pattern as the
124/// parameter.
125static __inline__ long long __DEFAULT_FN_ATTRS_SSE2 _mm_cvtm64_si64(__m64 __m) {
126 return ((__v1di)__m)[0];
127}
128
129/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
130/// vector parameters of [4 x i16] into 8-bit signed integer values, and
131/// constructs a 64-bit integer vector of [8 x i8] as the result.
132///
133/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
134/// less than 0x80 are saturated to 0x80.
135///
136/// \headerfile <x86intrin.h>
137///
138/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
139///
140/// \param __m1
141/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
142/// written to the lower 32 bits of the result.
143/// \param __m2
144/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
145/// written to the upper 32 bits of the result.
146/// \returns A 64-bit integer vector of [8 x i8] containing the converted
147/// values.
148static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi16(__m64 __m1,
149 __m64 __m2) {
150 return __trunc64(__builtin_ia32_packsswb128(
151 (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
152}
153
154/// Converts, with saturation, 32-bit signed integers from both 64-bit integer
155/// vector parameters of [2 x i32] into 16-bit signed integer values, and
156/// constructs a 64-bit integer vector of [4 x i16] as the result.
157///
158/// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative
159/// values less than 0x8000 are saturated to 0x8000.
160///
161/// \headerfile <x86intrin.h>
162///
163/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
164///
165/// \param __m1
166/// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
167/// written to the lower 32 bits of the result.
168/// \param __m2
169/// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
170/// written to the upper 32 bits of the result.
171/// \returns A 64-bit integer vector of [4 x i16] containing the converted
172/// values.
173static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi32(__m64 __m1,
174 __m64 __m2) {
175 return __trunc64(__builtin_ia32_packssdw128(
176 (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){}));
177}
178
179/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
180/// vector parameters of [4 x i16] into 8-bit unsigned integer values, and
181/// constructs a 64-bit integer vector of [8 x i8] as the result.
182///
183/// Values greater than 0xFF are saturated to 0xFF. Values less than 0 are
184/// saturated to 0.
185///
186/// \headerfile <x86intrin.h>
187///
188/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
189///
190/// \param __m1
191/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
192/// written to the lower 32 bits of the result.
193/// \param __m2
194/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
195/// written to the upper 32 bits of the result.
196/// \returns A 64-bit integer vector of [8 x i8] containing the converted
197/// values.
198static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pu16(__m64 __m1,
199 __m64 __m2) {
200 return __trunc64(__builtin_ia32_packuswb128(
201 (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
202}
203
204/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
205/// and interleaves them into a 64-bit integer vector of [8 x i8].
206///
207/// \headerfile <x86intrin.h>
208///
209/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
210///
211/// \param __m1
212/// A 64-bit integer vector of [8 x i8]. \n
213/// Bits [39:32] are written to bits [7:0] of the result. \n
214/// Bits [47:40] are written to bits [23:16] of the result. \n
215/// Bits [55:48] are written to bits [39:32] of the result. \n
216/// Bits [63:56] are written to bits [55:48] of the result.
217/// \param __m2
218/// A 64-bit integer vector of [8 x i8].
219/// Bits [39:32] are written to bits [15:8] of the result. \n
220/// Bits [47:40] are written to bits [31:24] of the result. \n
221/// Bits [55:48] are written to bits [47:40] of the result. \n
222/// Bits [63:56] are written to bits [63:56] of the result.
223/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
224/// values.
225static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi8(__m64 __m1,
226 __m64 __m2) {
227 return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 4, 12, 5,
228 13, 6, 14, 7, 15);
229}
230
231/// Unpacks the upper 32 bits from two 64-bit integer vectors of
232/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
233///
234/// \headerfile <x86intrin.h>
235///
236/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.
237///
238/// \param __m1
239/// A 64-bit integer vector of [4 x i16].
240/// Bits [47:32] are written to bits [15:0] of the result. \n
241/// Bits [63:48] are written to bits [47:32] of the result.
242/// \param __m2
243/// A 64-bit integer vector of [4 x i16].
244/// Bits [47:32] are written to bits [31:16] of the result. \n
245/// Bits [63:48] are written to bits [63:48] of the result.
246/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
247/// values.
248static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi16(__m64 __m1,
249 __m64 __m2) {
250 return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 2, 6, 3, 7);
251}
252
253/// Unpacks the upper 32 bits from two 64-bit integer vectors of
254/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
255///
256/// \headerfile <x86intrin.h>
257///
258/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.
259///
260/// \param __m1
261/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
262/// the lower 32 bits of the result.
263/// \param __m2
264/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
265/// the upper 32 bits of the result.
266/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
267/// values.
268static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi32(__m64 __m1,
269 __m64 __m2) {
270 return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3);
271}
272
273/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
274/// and interleaves them into a 64-bit integer vector of [8 x i8].
275///
276/// \headerfile <x86intrin.h>
277///
278/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.
279///
280/// \param __m1
281/// A 64-bit integer vector of [8 x i8].
282/// Bits [7:0] are written to bits [7:0] of the result. \n
283/// Bits [15:8] are written to bits [23:16] of the result. \n
284/// Bits [23:16] are written to bits [39:32] of the result. \n
285/// Bits [31:24] are written to bits [55:48] of the result.
286/// \param __m2
287/// A 64-bit integer vector of [8 x i8].
288/// Bits [7:0] are written to bits [15:8] of the result. \n
289/// Bits [15:8] are written to bits [31:24] of the result. \n
290/// Bits [23:16] are written to bits [47:40] of the result. \n
291/// Bits [31:24] are written to bits [63:56] of the result.
292/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
293/// values.
294static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi8(__m64 __m1,
295 __m64 __m2) {
296 return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 0, 8, 1, 9,
297 2, 10, 3, 11);
298}
299
300/// Unpacks the lower 32 bits from two 64-bit integer vectors of
301/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
302///
303/// \headerfile <x86intrin.h>
304///
305/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.
306///
307/// \param __m1
308/// A 64-bit integer vector of [4 x i16].
309/// Bits [15:0] are written to bits [15:0] of the result. \n
310/// Bits [31:16] are written to bits [47:32] of the result.
311/// \param __m2
312/// A 64-bit integer vector of [4 x i16].
313/// Bits [15:0] are written to bits [31:16] of the result. \n
314/// Bits [31:16] are written to bits [63:48] of the result.
315/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
316/// values.
317static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi16(__m64 __m1,
318 __m64 __m2) {
319 return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 0, 4, 1, 5);
320}
321
322/// Unpacks the lower 32 bits from two 64-bit integer vectors of
323/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
324///
325/// \headerfile <x86intrin.h>
326///
327/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.
328///
329/// \param __m1
330/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
331/// the lower 32 bits of the result.
332/// \param __m2
333/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
334/// the upper 32 bits of the result.
335/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
336/// values.
337static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi32(__m64 __m1,
338 __m64 __m2) {
339 return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2);
340}
341
342/// Adds each 8-bit integer element of the first 64-bit integer vector
343/// of [8 x i8] to the corresponding 8-bit integer element of the second
344/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
345/// packed into a 64-bit integer vector of [8 x i8].
346///
347/// \headerfile <x86intrin.h>
348///
349/// This intrinsic corresponds to the <c> PADDB </c> instruction.
350///
351/// \param __m1
352/// A 64-bit integer vector of [8 x i8].
353/// \param __m2
354/// A 64-bit integer vector of [8 x i8].
355/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
356/// parameters.
357static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi8(__m64 __m1,
358 __m64 __m2) {
359 return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2));
360}
361
362/// Adds each 16-bit integer element of the first 64-bit integer vector
363/// of [4 x i16] to the corresponding 16-bit integer element of the second
364/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
365/// packed into a 64-bit integer vector of [4 x i16].
366///
367/// \headerfile <x86intrin.h>
368///
369/// This intrinsic corresponds to the <c> PADDW </c> instruction.
370///
371/// \param __m1
372/// A 64-bit integer vector of [4 x i16].
373/// \param __m2
374/// A 64-bit integer vector of [4 x i16].
375/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
376/// parameters.
377static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi16(__m64 __m1,
378 __m64 __m2) {
379 return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2));
380}
381
382/// Adds each 32-bit integer element of the first 64-bit integer vector
383/// of [2 x i32] to the corresponding 32-bit integer element of the second
384/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
385/// packed into a 64-bit integer vector of [2 x i32].
386///
387/// \headerfile <x86intrin.h>
388///
389/// This intrinsic corresponds to the <c> PADDD </c> instruction.
390///
391/// \param __m1
392/// A 64-bit integer vector of [2 x i32].
393/// \param __m2
394/// A 64-bit integer vector of [2 x i32].
395/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
396/// parameters.
397static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi32(__m64 __m1,
398 __m64 __m2) {
399 return (__m64)(((__v2su)__m1) + ((__v2su)__m2));
400}
401
402/// Adds, with saturation, each 8-bit signed integer element of the first
403/// 64-bit integer vector of [8 x i8] to the corresponding 8-bit signed
404/// integer element of the second 64-bit integer vector of [8 x i8].
405///
406/// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums
407/// less than 0x80 are saturated to 0x80. The results are packed into a
408/// 64-bit integer vector of [8 x i8].
409///
410/// \headerfile <x86intrin.h>
411///
412/// This intrinsic corresponds to the <c> PADDSB </c> instruction.
413///
414/// \param __m1
415/// A 64-bit integer vector of [8 x i8].
416/// \param __m2
417/// A 64-bit integer vector of [8 x i8].
418/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
419/// of both parameters.
420static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi8(__m64 __m1,
421 __m64 __m2) {
422 return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2);
423}
424
425/// Adds, with saturation, each 16-bit signed integer element of the first
426/// 64-bit integer vector of [4 x i16] to the corresponding 16-bit signed
427/// integer element of the second 64-bit integer vector of [4 x i16].
428///
429/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
430/// less than 0x8000 are saturated to 0x8000. The results are packed into a
431/// 64-bit integer vector of [4 x i16].
432///
433/// \headerfile <x86intrin.h>
434///
435/// This intrinsic corresponds to the <c> PADDSW </c> instruction.
436///
437/// \param __m1
438/// A 64-bit integer vector of [4 x i16].
439/// \param __m2
440/// A 64-bit integer vector of [4 x i16].
441/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
442/// of both parameters.
443static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi16(__m64 __m1,
444 __m64 __m2) {
445 return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2);
446}
447
448/// Adds, with saturation, each 8-bit unsigned integer element of the first
449/// 64-bit integer vector of [8 x i8] to the corresponding 8-bit unsigned
450/// integer element of the second 64-bit integer vector of [8 x i8].
451///
452/// Sums greater than 0xFF are saturated to 0xFF. The results are packed
453/// into a 64-bit integer vector of [8 x i8].
454///
455/// \headerfile <x86intrin.h>
456///
457/// This intrinsic corresponds to the <c> PADDUSB </c> instruction.
458///
459/// \param __m1
460/// A 64-bit integer vector of [8 x i8].
461/// \param __m2
462/// A 64-bit integer vector of [8 x i8].
463/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
464/// unsigned sums of both parameters.
465static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu8(__m64 __m1,
466 __m64 __m2) {
467 return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2);
468}
469
470/// Adds, with saturation, each 16-bit unsigned integer element of the first
471/// 64-bit integer vector of [4 x i16] to the corresponding 16-bit unsigned
472/// integer element of the second 64-bit integer vector of [4 x i16].
473///
474/// Sums greater than 0xFFFF are saturated to 0xFFFF. The results are packed
475/// into a 64-bit integer vector of [4 x i16].
476///
477/// \headerfile <x86intrin.h>
478///
479/// This intrinsic corresponds to the <c> PADDUSW </c> instruction.
480///
481/// \param __m1
482/// A 64-bit integer vector of [4 x i16].
483/// \param __m2
484/// A 64-bit integer vector of [4 x i16].
485/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
486/// unsigned sums of both parameters.
487static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu16(__m64 __m1,
488 __m64 __m2) {
489 return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2);
490}
491
492/// Subtracts each 8-bit integer element of the second 64-bit integer
493/// vector of [8 x i8] from the corresponding 8-bit integer element of the
494/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
495/// are packed into a 64-bit integer vector of [8 x i8].
496///
497/// \headerfile <x86intrin.h>
498///
499/// This intrinsic corresponds to the <c> PSUBB </c> instruction.
500///
501/// \param __m1
502/// A 64-bit integer vector of [8 x i8] containing the minuends.
503/// \param __m2
504/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
505/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
506/// both parameters.
507static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi8(__m64 __m1,
508 __m64 __m2) {
509 return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2));
510}
511
512/// Subtracts each 16-bit integer element of the second 64-bit integer
513/// vector of [4 x i16] from the corresponding 16-bit integer element of the
514/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
515/// results are packed into a 64-bit integer vector of [4 x i16].
516///
517/// \headerfile <x86intrin.h>
518///
519/// This intrinsic corresponds to the <c> PSUBW </c> instruction.
520///
521/// \param __m1
522/// A 64-bit integer vector of [4 x i16] containing the minuends.
523/// \param __m2
524/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
525/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
526/// both parameters.
527static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi16(__m64 __m1,
528 __m64 __m2) {
529 return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2));
530}
531
532/// Subtracts each 32-bit integer element of the second 64-bit integer
533/// vector of [2 x i32] from the corresponding 32-bit integer element of the
534/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
535/// results are packed into a 64-bit integer vector of [2 x i32].
536///
537/// \headerfile <x86intrin.h>
538///
539/// This intrinsic corresponds to the <c> PSUBD </c> instruction.
540///
541/// \param __m1
542/// A 64-bit integer vector of [2 x i32] containing the minuends.
543/// \param __m2
544/// A 64-bit integer vector of [2 x i32] containing the subtrahends.
545/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
546/// both parameters.
547static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi32(__m64 __m1,
548 __m64 __m2) {
549 return (__m64)(((__v2su)__m1) - ((__v2su)__m2));
550}
551
552/// Subtracts, with saturation, each 8-bit signed integer element of the second
553/// 64-bit integer vector of [8 x i8] from the corresponding 8-bit signed
554/// integer element of the first 64-bit integer vector of [8 x i8].
555///
556/// Positive results greater than 0x7F are saturated to 0x7F. Negative
557/// results less than 0x80 are saturated to 0x80. The results are packed
558/// into a 64-bit integer vector of [8 x i8].
559///
560/// \headerfile <x86intrin.h>
561///
562/// This intrinsic corresponds to the <c> PSUBSB </c> instruction.
563///
564/// \param __m1
565/// A 64-bit integer vector of [8 x i8] containing the minuends.
566/// \param __m2
567/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
568/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
569/// differences of both parameters.
570static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi8(__m64 __m1,
571 __m64 __m2) {
572 return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2);
573}
574
575/// Subtracts, with saturation, each 16-bit signed integer element of the
576/// second 64-bit integer vector of [4 x i16] from the corresponding 16-bit
577/// signed integer element of the first 64-bit integer vector of [4 x i16].
578///
579/// Positive results greater than 0x7FFF are saturated to 0x7FFF. Negative
580/// results less than 0x8000 are saturated to 0x8000. The results are packed
581/// into a 64-bit integer vector of [4 x i16].
582///
583/// \headerfile <x86intrin.h>
584///
585/// This intrinsic corresponds to the <c> PSUBSW </c> instruction.
586///
587/// \param __m1
588/// A 64-bit integer vector of [4 x i16] containing the minuends.
589/// \param __m2
590/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
591/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
592/// differences of both parameters.
593static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi16(__m64 __m1,
594 __m64 __m2) {
595 return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2);
596}
597
598/// Subtracts each 8-bit unsigned integer element of the second 64-bit
599/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
600/// element of the first 64-bit integer vector of [8 x i8].
601///
602/// If an element of the first vector is less than the corresponding element
603/// of the second vector, the result is saturated to 0. The results are
604/// packed into a 64-bit integer vector of [8 x i8].
605///
606/// \headerfile <x86intrin.h>
607///
608/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.
609///
610/// \param __m1
611/// A 64-bit integer vector of [8 x i8] containing the minuends.
612/// \param __m2
613/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
614/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
615/// differences of both parameters.
616static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu8(__m64 __m1,
617 __m64 __m2) {
618 return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2);
619}
620
621/// Subtracts each 16-bit unsigned integer element of the second 64-bit
622/// integer vector of [4 x i16] from the corresponding 16-bit unsigned
623/// integer element of the first 64-bit integer vector of [4 x i16].
624///
625/// If an element of the first vector is less than the corresponding element
626/// of the second vector, the result is saturated to 0. The results are
627/// packed into a 64-bit integer vector of [4 x i16].
628///
629/// \headerfile <x86intrin.h>
630///
631/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.
632///
633/// \param __m1
634/// A 64-bit integer vector of [4 x i16] containing the minuends.
635/// \param __m2
636/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
637/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
638/// differences of both parameters.
639static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu16(__m64 __m1,
640 __m64 __m2) {
641 return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2);
642}
643
644/// Multiplies each 16-bit signed integer element of the first 64-bit
645/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
646/// element of the second 64-bit integer vector of [4 x i16] and get four
647/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
648/// The lower 32 bits of these two sums are packed into a 64-bit integer
649/// vector of [2 x i32].
650///
651/// For example, bits [15:0] of both parameters are multiplied, bits [31:16]
652/// of both parameters are multiplied, and the sum of both results is written
653/// to bits [31:0] of the result.
654///
655/// \headerfile <x86intrin.h>
656///
657/// This intrinsic corresponds to the <c> PMADDWD </c> instruction.
658///
659/// \param __m1
660/// A 64-bit integer vector of [4 x i16].
661/// \param __m2
662/// A 64-bit integer vector of [4 x i16].
663/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
664/// products of both parameters.
665static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_madd_pi16(__m64 __m1,
666 __m64 __m2) {
667 return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__zext128(__m1),
668 (__v8hi)__zext128(__m2)));
669}
670
671/// Multiplies each 16-bit signed integer element of the first 64-bit
672/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
673/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper
674/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
675///
676/// \headerfile <x86intrin.h>
677///
678/// This intrinsic corresponds to the <c> PMULHW </c> instruction.
679///
680/// \param __m1
681/// A 64-bit integer vector of [4 x i16].
682/// \param __m2
683/// A 64-bit integer vector of [4 x i16].
684/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
685/// of the products of both parameters.
686static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mulhi_pi16(__m64 __m1,
687 __m64 __m2) {
688 return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__zext128(__m1),
689 (__v8hi)__zext128(__m2)));
690}
691
692/// Multiplies each 16-bit signed integer element of the first 64-bit
693/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
694/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower
695/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
696///
697/// \headerfile <x86intrin.h>
698///
699/// This intrinsic corresponds to the <c> PMULLW </c> instruction.
700///
701/// \param __m1
702/// A 64-bit integer vector of [4 x i16].
703/// \param __m2
704/// A 64-bit integer vector of [4 x i16].
705/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
706/// of the products of both parameters.
707static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mullo_pi16(__m64 __m1,
708 __m64 __m2) {
709 return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2));
710}
711
712/// Left-shifts each 16-bit signed integer element of the first
713/// parameter, which is a 64-bit integer vector of [4 x i16], by the number
714/// of bits specified by the second parameter, which is a 64-bit integer. The
715/// lower 16 bits of the results are packed into a 64-bit integer vector of
716/// [4 x i16].
717///
718/// \headerfile <x86intrin.h>
719///
720/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
721///
722/// \param __m
723/// A 64-bit integer vector of [4 x i16].
724/// \param __count
725/// A 64-bit integer vector interpreted as a single 64-bit integer.
726/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
727/// values. If \a __count is greater or equal to 16, the result is set to all
728/// 0.
729static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
730_mm_sll_pi16(__m64 __m, __m64 __count)
731{
732 return __trunc64(__builtin_ia32_psllw128((__v8hi)__zext128(__m),
733 (__v8hi)__zext128(__count)));
734}
735
736/// Left-shifts each 16-bit signed integer element of a 64-bit integer
737/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.
738/// The lower 16 bits of the results are packed into a 64-bit integer vector
739/// of [4 x i16].
740///
741/// \headerfile <x86intrin.h>
742///
743/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
744///
745/// \param __m
746/// A 64-bit integer vector of [4 x i16].
747/// \param __count
748/// A 32-bit integer value.
749/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
750/// values. If \a __count is greater or equal to 16, the result is set to all
751/// 0.
752static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi16(__m64 __m,
753 int __count) {
754 return __trunc64(__builtin_ia32_psllwi128((__v8hi)__zext128(__m), __count));
755}
756
757/// Left-shifts each 32-bit signed integer element of the first
758/// parameter, which is a 64-bit integer vector of [2 x i32], by the number
759/// of bits specified by the second parameter, which is a 64-bit integer. The
760/// lower 32 bits of the results are packed into a 64-bit integer vector of
761/// [2 x i32].
762///
763/// \headerfile <x86intrin.h>
764///
765/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
766///
767/// \param __m
768/// A 64-bit integer vector of [2 x i32].
769/// \param __count
770/// A 64-bit integer vector interpreted as a single 64-bit integer.
771/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
772/// values. If \a __count is greater or equal to 32, the result is set to all
773/// 0.
774static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
775_mm_sll_pi32(__m64 __m, __m64 __count)
776{
777 return __trunc64(__builtin_ia32_pslld128((__v4si)__zext128(__m),
778 (__v4si)__zext128(__count)));
779}
780
781/// Left-shifts each 32-bit signed integer element of a 64-bit integer
782/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.
783/// The lower 32 bits of the results are packed into a 64-bit integer vector
784/// of [2 x i32].
785///
786/// \headerfile <x86intrin.h>
787///
788/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
789///
790/// \param __m
791/// A 64-bit integer vector of [2 x i32].
792/// \param __count
793/// A 32-bit integer value.
794/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
795/// values. If \a __count is greater or equal to 32, the result is set to all
796/// 0.
797static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi32(__m64 __m,
798 int __count) {
799 return __trunc64(__builtin_ia32_pslldi128((__v4si)__zext128(__m), __count));
800}
801
802/// Left-shifts the first 64-bit integer parameter by the number of bits
803/// specified by the second 64-bit integer parameter. The lower 64 bits of
804/// result are returned.
805///
806/// \headerfile <x86intrin.h>
807///
808/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
809///
810/// \param __m
811/// A 64-bit integer vector interpreted as a single 64-bit integer.
812/// \param __count
813/// A 64-bit integer vector interpreted as a single 64-bit integer.
814/// \returns A 64-bit integer vector containing the left-shifted value. If
815/// \a __count is greater or equal to 64, the result is set to 0.
816static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
817_mm_sll_si64(__m64 __m, __m64 __count)
818{
819 return __trunc64(__builtin_ia32_psllq128((__v2di)__zext128(__m),
820 (__v2di)__zext128(__count)));
821}
822
823/// Left-shifts the first parameter, which is a 64-bit integer, by the
824/// number of bits specified by the second parameter, which is a 32-bit
825/// integer. The lower 64 bits of result are returned.
826///
827/// \headerfile <x86intrin.h>
828///
829/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
830///
831/// \param __m
832/// A 64-bit integer vector interpreted as a single 64-bit integer.
833/// \param __count
834/// A 32-bit integer value.
835/// \returns A 64-bit integer vector containing the left-shifted value. If
836/// \a __count is greater or equal to 64, the result is set to 0.
837static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_si64(__m64 __m,
838 int __count) {
839 return __trunc64(__builtin_ia32_psllqi128((__v2di)__zext128(__m), __count));
840}
841
842/// Right-shifts each 16-bit integer element of the first parameter,
843/// which is a 64-bit integer vector of [4 x i16], by the number of bits
844/// specified by the second parameter, which is a 64-bit integer.
845///
846/// High-order bits are filled with the sign bit of the initial value of each
847/// 16-bit element. The 16-bit results are packed into a 64-bit integer
848/// vector of [4 x i16].
849///
850/// \headerfile <x86intrin.h>
851///
852/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
853///
854/// \param __m
855/// A 64-bit integer vector of [4 x i16].
856/// \param __count
857/// A 64-bit integer vector interpreted as a single 64-bit integer.
858/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
859/// values.
860static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
861_mm_sra_pi16(__m64 __m, __m64 __count)
862{
863 return __trunc64(__builtin_ia32_psraw128((__v8hi)__zext128(__m),
864 (__v8hi)__zext128(__count)));
865}
866
867/// Right-shifts each 16-bit integer element of a 64-bit integer vector
868/// of [4 x i16] by the number of bits specified by a 32-bit integer.
869///
870/// High-order bits are filled with the sign bit of the initial value of each
871/// 16-bit element. The 16-bit results are packed into a 64-bit integer
872/// vector of [4 x i16].
873///
874/// \headerfile <x86intrin.h>
875///
876/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
877///
878/// \param __m
879/// A 64-bit integer vector of [4 x i16].
880/// \param __count
881/// A 32-bit integer value.
882/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
883/// values.
884static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi16(__m64 __m,
885 int __count) {
886 return __trunc64(__builtin_ia32_psrawi128((__v8hi)__zext128(__m), __count));
887}
888
889/// Right-shifts each 32-bit integer element of the first parameter,
890/// which is a 64-bit integer vector of [2 x i32], by the number of bits
891/// specified by the second parameter, which is a 64-bit integer.
892///
893/// High-order bits are filled with the sign bit of the initial value of each
894/// 32-bit element. The 32-bit results are packed into a 64-bit integer
895/// vector of [2 x i32].
896///
897/// \headerfile <x86intrin.h>
898///
899/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
900///
901/// \param __m
902/// A 64-bit integer vector of [2 x i32].
903/// \param __count
904/// A 64-bit integer vector interpreted as a single 64-bit integer.
905/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
906/// values.
907static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
908_mm_sra_pi32(__m64 __m, __m64 __count)
909{
910 return __trunc64(__builtin_ia32_psrad128((__v4si)__zext128(__m),
911 (__v4si)__zext128(__count)));
912}
913
914/// Right-shifts each 32-bit integer element of a 64-bit integer vector
915/// of [2 x i32] by the number of bits specified by a 32-bit integer.
916///
917/// High-order bits are filled with the sign bit of the initial value of each
918/// 32-bit element. The 32-bit results are packed into a 64-bit integer
919/// vector of [2 x i32].
920///
921/// \headerfile <x86intrin.h>
922///
923/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
924///
925/// \param __m
926/// A 64-bit integer vector of [2 x i32].
927/// \param __count
928/// A 32-bit integer value.
929/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
930/// values.
931static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi32(__m64 __m,
932 int __count) {
933 return __trunc64(__builtin_ia32_psradi128((__v4si)__zext128(__m), __count));
934}
935
936/// Right-shifts each 16-bit integer element of the first parameter,
937/// which is a 64-bit integer vector of [4 x i16], by the number of bits
938/// specified by the second parameter, which is a 64-bit integer.
939///
940/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
941/// integer vector of [4 x i16].
942///
943/// \headerfile <x86intrin.h>
944///
945/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
946///
947/// \param __m
948/// A 64-bit integer vector of [4 x i16].
949/// \param __count
950/// A 64-bit integer vector interpreted as a single 64-bit integer.
951/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
952/// values.
953static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
954_mm_srl_pi16(__m64 __m, __m64 __count)
955{
956 return __trunc64(__builtin_ia32_psrlw128((__v8hi)__zext128(__m),
957 (__v8hi)__zext128(__count)));
958}
959
960/// Right-shifts each 16-bit integer element of a 64-bit integer vector
961/// of [4 x i16] by the number of bits specified by a 32-bit integer.
962///
963/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
964/// integer vector of [4 x i16].
965///
966/// \headerfile <x86intrin.h>
967///
968/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
969///
970/// \param __m
971/// A 64-bit integer vector of [4 x i16].
972/// \param __count
973/// A 32-bit integer value.
974/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
975/// values.
976static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi16(__m64 __m,
977 int __count) {
978 return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__zext128(__m), __count));
979}
980
981/// Right-shifts each 32-bit integer element of the first parameter,
982/// which is a 64-bit integer vector of [2 x i32], by the number of bits
983/// specified by the second parameter, which is a 64-bit integer.
984///
985/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
986/// integer vector of [2 x i32].
987///
988/// \headerfile <x86intrin.h>
989///
990/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
991///
992/// \param __m
993/// A 64-bit integer vector of [2 x i32].
994/// \param __count
995/// A 64-bit integer vector interpreted as a single 64-bit integer.
996/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
997/// values.
998static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
999_mm_srl_pi32(__m64 __m, __m64 __count)
1000{
1001 return __trunc64(__builtin_ia32_psrld128((__v4si)__zext128(__m),
1002 (__v4si)__zext128(__count)));
1003}
1004
1005/// Right-shifts each 32-bit integer element of a 64-bit integer vector
1006/// of [2 x i32] by the number of bits specified by a 32-bit integer.
1007///
1008/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
1009/// integer vector of [2 x i32].
1010///
1011/// \headerfile <x86intrin.h>
1012///
1013/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
1014///
1015/// \param __m
1016/// A 64-bit integer vector of [2 x i32].
1017/// \param __count
1018/// A 32-bit integer value.
1019/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1020/// values.
1021static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi32(__m64 __m,
1022 int __count) {
1023 return __trunc64(__builtin_ia32_psrldi128((__v4si)__zext128(__m), __count));
1024}
1025
1026/// Right-shifts the first 64-bit integer parameter by the number of bits
1027/// specified by the second 64-bit integer parameter.
1028///
1029/// High-order bits are cleared.
1030///
1031/// \headerfile <x86intrin.h>
1032///
1033/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1034///
1035/// \param __m
1036/// A 64-bit integer vector interpreted as a single 64-bit integer.
1037/// \param __count
1038/// A 64-bit integer vector interpreted as a single 64-bit integer.
1039/// \returns A 64-bit integer vector containing the right-shifted value.
1040static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1041_mm_srl_si64(__m64 __m, __m64 __count)
1042{
1043 return __trunc64(__builtin_ia32_psrlq128((__v2di)__zext128(__m),
1044 (__v2di)__zext128(__count)));
1045}
1046
1047/// Right-shifts the first parameter, which is a 64-bit integer, by the
1048/// number of bits specified by the second parameter, which is a 32-bit
1049/// integer.
1050///
1051/// High-order bits are cleared.
1052///
1053/// \headerfile <x86intrin.h>
1054///
1055/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1056///
1057/// \param __m
1058/// A 64-bit integer vector interpreted as a single 64-bit integer.
1059/// \param __count
1060/// A 32-bit integer value.
1061/// \returns A 64-bit integer vector containing the right-shifted value.
1062static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_si64(__m64 __m,
1063 int __count) {
1064 return __trunc64(__builtin_ia32_psrlqi128((__v2di)__zext128(__m), __count));
1065}
1066
1067/// Performs a bitwise AND of two 64-bit integer vectors.
1068///
1069/// \headerfile <x86intrin.h>
1070///
1071/// This intrinsic corresponds to the <c> PAND </c> instruction.
1072///
1073/// \param __m1
1074/// A 64-bit integer vector.
1075/// \param __m2
1076/// A 64-bit integer vector.
1077/// \returns A 64-bit integer vector containing the bitwise AND of both
1078/// parameters.
1079static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_and_si64(__m64 __m1,
1080 __m64 __m2) {
1081 return (__m64)(((__v1du)__m1) & ((__v1du)__m2));
1082}
1083
1084/// Performs a bitwise NOT of the first 64-bit integer vector, and then
1085/// performs a bitwise AND of the intermediate result and the second 64-bit
1086/// integer vector.
1087///
1088/// \headerfile <x86intrin.h>
1089///
1090/// This intrinsic corresponds to the <c> PANDN </c> instruction.
1091///
1092/// \param __m1
1093/// A 64-bit integer vector. The one's complement of this parameter is used
1094/// in the bitwise AND.
1095/// \param __m2
1096/// A 64-bit integer vector.
1097/// \returns A 64-bit integer vector containing the bitwise AND of the second
1098/// parameter and the one's complement of the first parameter.
1099static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_andnot_si64(__m64 __m1,
1100 __m64 __m2) {
1101 return (__m64)(~((__v1du)__m1) & ((__v1du)__m2));
1102}
1103
1104/// Performs a bitwise OR of two 64-bit integer vectors.
1105///
1106/// \headerfile <x86intrin.h>
1107///
1108/// This intrinsic corresponds to the <c> POR </c> instruction.
1109///
1110/// \param __m1
1111/// A 64-bit integer vector.
1112/// \param __m2
1113/// A 64-bit integer vector.
1114/// \returns A 64-bit integer vector containing the bitwise OR of both
1115/// parameters.
1116static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_or_si64(__m64 __m1,
1117 __m64 __m2) {
1118 return (__m64)(((__v1du)__m1) | ((__v1du)__m2));
1119}
1120
1121/// Performs a bitwise exclusive OR of two 64-bit integer vectors.
1122///
1123/// \headerfile <x86intrin.h>
1124///
1125/// This intrinsic corresponds to the <c> PXOR </c> instruction.
1126///
1127/// \param __m1
1128/// A 64-bit integer vector.
1129/// \param __m2
1130/// A 64-bit integer vector.
1131/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
1132/// parameters.
1133static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_xor_si64(__m64 __m1,
1134 __m64 __m2) {
1135 return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2));
1136}
1137
1138/// Compares the 8-bit integer elements of two 64-bit integer vectors of
1139/// [8 x i8] to determine if the element of the first vector is equal to the
1140/// corresponding element of the second vector.
1141///
1142/// Each comparison returns 0 for false, 0xFF for true.
1143///
1144/// \headerfile <x86intrin.h>
1145///
1146/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.
1147///
1148/// \param __m1
1149/// A 64-bit integer vector of [8 x i8].
1150/// \param __m2
1151/// A 64-bit integer vector of [8 x i8].
1152/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1153/// results.
1154static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi8(__m64 __m1,
1155 __m64 __m2) {
1156 return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2));
1157}
1158
1159/// Compares the 16-bit integer elements of two 64-bit integer vectors of
1160/// [4 x i16] to determine if the element of the first vector is equal to the
1161/// corresponding element of the second vector.
1162///
1163/// Each comparison returns 0 for false, 0xFFFF for true.
1164///
1165/// \headerfile <x86intrin.h>
1166///
1167/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.
1168///
1169/// \param __m1
1170/// A 64-bit integer vector of [4 x i16].
1171/// \param __m2
1172/// A 64-bit integer vector of [4 x i16].
1173/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1174/// results.
1175static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi16(__m64 __m1,
1176 __m64 __m2) {
1177 return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2));
1178}
1179
1180/// Compares the 32-bit integer elements of two 64-bit integer vectors of
1181/// [2 x i32] to determine if the element of the first vector is equal to the
1182/// corresponding element of the second vector.
1183///
1184/// Each comparison returns 0 for false, 0xFFFFFFFF for true.
1185///
1186/// \headerfile <x86intrin.h>
1187///
1188/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.
1189///
1190/// \param __m1
1191/// A 64-bit integer vector of [2 x i32].
1192/// \param __m2
1193/// A 64-bit integer vector of [2 x i32].
1194/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1195/// results.
1196static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi32(__m64 __m1,
1197 __m64 __m2) {
1198 return (__m64)(((__v2si)__m1) == ((__v2si)__m2));
1199}
1200
1201/// Compares the 8-bit integer elements of two 64-bit integer vectors of
1202/// [8 x i8] to determine if the element of the first vector is greater than
1203/// the corresponding element of the second vector.
1204///
1205/// Each comparison returns 0 for false, 0xFF for true.
1206///
1207/// \headerfile <x86intrin.h>
1208///
1209/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.
1210///
1211/// \param __m1
1212/// A 64-bit integer vector of [8 x i8].
1213/// \param __m2
1214/// A 64-bit integer vector of [8 x i8].
1215/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1216/// results.
1217static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi8(__m64 __m1,
1218 __m64 __m2) {
1219 /* This function always performs a signed comparison, but __v8qi is a char
1220 which may be signed or unsigned, so use __v8qs. */
1221 return (__m64)((__v8qs)__m1 > (__v8qs)__m2);
1222}
1223
1224/// Compares the 16-bit integer elements of two 64-bit integer vectors of
1225/// [4 x i16] to determine if the element of the first vector is greater than
1226/// the corresponding element of the second vector.
1227///
1228/// Each comparison returns 0 for false, 0xFFFF for true.
1229///
1230/// \headerfile <x86intrin.h>
1231///
1232/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.
1233///
1234/// \param __m1
1235/// A 64-bit integer vector of [4 x i16].
1236/// \param __m2
1237/// A 64-bit integer vector of [4 x i16].
1238/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1239/// results.
1240static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi16(__m64 __m1,
1241 __m64 __m2) {
1242 return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
1243}
1244
1245/// Compares the 32-bit integer elements of two 64-bit integer vectors of
1246/// [2 x i32] to determine if the element of the first vector is greater than
1247/// the corresponding element of the second vector.
1248///
1249/// Each comparison returns 0 for false, 0xFFFFFFFF for true.
1250///
1251/// \headerfile <x86intrin.h>
1252///
1253/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.
1254///
1255/// \param __m1
1256/// A 64-bit integer vector of [2 x i32].
1257/// \param __m2
1258/// A 64-bit integer vector of [2 x i32].
1259/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1260/// results.
1261static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi32(__m64 __m1,
1262 __m64 __m2) {
1263 return (__m64)((__v2si)__m1 > (__v2si)__m2);
1264}
1265
1266/// Constructs a 64-bit integer vector initialized to zero.
1267///
1268/// \headerfile <x86intrin.h>
1269///
1270/// This intrinsic corresponds to the <c> PXOR </c> instruction.
1271///
1272/// \returns An initialized 64-bit integer vector with all elements set to zero.
1273static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setzero_si64(void) {
1274 return __extension__(__m64){0LL};
1275}
1276
1277/// Constructs a 64-bit integer vector initialized with the specified
1278/// 32-bit integer values.
1279///
1280/// \headerfile <x86intrin.h>
1281///
1282/// This intrinsic is a utility function and does not correspond to a specific
1283/// instruction.
1284///
1285/// \param __i1
1286/// A 32-bit integer value used to initialize the upper 32 bits of the
1287/// result.
1288/// \param __i0
1289/// A 32-bit integer value used to initialize the lower 32 bits of the
1290/// result.
1291/// \returns An initialized 64-bit integer vector.
1292static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi32(int __i1,
1293 int __i0) {
1294 return __extension__(__m64)(__v2si){__i0, __i1};
1295}
1296
1297/// Constructs a 64-bit integer vector initialized with the specified
1298/// 16-bit integer values.
1299///
1300/// \headerfile <x86intrin.h>
1301///
1302/// This intrinsic is a utility function and does not correspond to a specific
1303/// instruction.
1304///
1305/// \param __s3
1306/// A 16-bit integer value used to initialize bits [63:48] of the result.
1307/// \param __s2
1308/// A 16-bit integer value used to initialize bits [47:32] of the result.
1309/// \param __s1
1310/// A 16-bit integer value used to initialize bits [31:16] of the result.
1311/// \param __s0
1312/// A 16-bit integer value used to initialize bits [15:0] of the result.
1313/// \returns An initialized 64-bit integer vector.
1314static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi16(short __s3,
1315 short __s2,
1316 short __s1,
1317 short __s0) {
1318 return __extension__(__m64)(__v4hi){__s0, __s1, __s2, __s3};
1319}
1320
1321/// Constructs a 64-bit integer vector initialized with the specified
1322/// 8-bit integer values.
1323///
1324/// \headerfile <x86intrin.h>
1325///
1326/// This intrinsic is a utility function and does not correspond to a specific
1327/// instruction.
1328///
1329/// \param __b7
1330/// An 8-bit integer value used to initialize bits [63:56] of the result.
1331/// \param __b6
1332/// An 8-bit integer value used to initialize bits [55:48] of the result.
1333/// \param __b5
1334/// An 8-bit integer value used to initialize bits [47:40] of the result.
1335/// \param __b4
1336/// An 8-bit integer value used to initialize bits [39:32] of the result.
1337/// \param __b3
1338/// An 8-bit integer value used to initialize bits [31:24] of the result.
1339/// \param __b2
1340/// An 8-bit integer value used to initialize bits [23:16] of the result.
1341/// \param __b1
1342/// An 8-bit integer value used to initialize bits [15:8] of the result.
1343/// \param __b0
1344/// An 8-bit integer value used to initialize bits [7:0] of the result.
1345/// \returns An initialized 64-bit integer vector.
1346static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1347_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
1348 char __b1, char __b0) {
1349 return __extension__(__m64)(__v8qi){__b0, __b1, __b2, __b3,
1350 __b4, __b5, __b6, __b7};
1351}
1352
1353/// Constructs a 64-bit integer vector of [2 x i32], with each of the
1354/// 32-bit integer vector elements set to the specified 32-bit integer
1355/// value.
1356///
1357/// \headerfile <x86intrin.h>
1358///
1359/// This intrinsic is a utility function and does not correspond to a specific
1360/// instruction.
1361///
1362/// \param __i
1363/// A 32-bit integer value used to initialize each vector element of the
1364/// result.
1365/// \returns An initialized 64-bit integer vector of [2 x i32].
1366static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi32(int __i) {
1367 return _mm_set_pi32(__i, __i);
1368}
1369
1370/// Constructs a 64-bit integer vector of [4 x i16], with each of the
1371/// 16-bit integer vector elements set to the specified 16-bit integer
1372/// value.
1373///
1374/// \headerfile <x86intrin.h>
1375///
1376/// This intrinsic is a utility function and does not correspond to a specific
1377/// instruction.
1378///
1379/// \param __w
1380/// A 16-bit integer value used to initialize each vector element of the
1381/// result.
1382/// \returns An initialized 64-bit integer vector of [4 x i16].
1383static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi16(short __w) {
1384 return _mm_set_pi16(__w, __w, __w, __w);
1385}
1386
1387/// Constructs a 64-bit integer vector of [8 x i8], with each of the
1388/// 8-bit integer vector elements set to the specified 8-bit integer value.
1389///
1390/// \headerfile <x86intrin.h>
1391///
1392/// This intrinsic is a utility function and does not correspond to a specific
1393/// instruction.
1394///
1395/// \param __b
1396/// An 8-bit integer value used to initialize each vector element of the
1397/// result.
1398/// \returns An initialized 64-bit integer vector of [8 x i8].
1399static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi8(char __b) {
1400 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
1401}
1402
1403/// Constructs a 64-bit integer vector, initialized in reverse order with
1404/// the specified 32-bit integer values.
1405///
1406/// \headerfile <x86intrin.h>
1407///
1408/// This intrinsic is a utility function and does not correspond to a specific
1409/// instruction.
1410///
1411/// \param __i0
1412/// A 32-bit integer value used to initialize the lower 32 bits of the
1413/// result.
1414/// \param __i1
1415/// A 32-bit integer value used to initialize the upper 32 bits of the
1416/// result.
1417/// \returns An initialized 64-bit integer vector.
1418static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi32(int __i0,
1419 int __i1) {
1420 return _mm_set_pi32(__i1, __i0);
1421}
1422
1423/// Constructs a 64-bit integer vector, initialized in reverse order with
1424/// the specified 16-bit integer values.
1425///
1426/// \headerfile <x86intrin.h>
1427///
1428/// This intrinsic is a utility function and does not correspond to a specific
1429/// instruction.
1430///
1431/// \param __w0
1432/// A 16-bit integer value used to initialize bits [15:0] of the result.
1433/// \param __w1
1434/// A 16-bit integer value used to initialize bits [31:16] of the result.
1435/// \param __w2
1436/// A 16-bit integer value used to initialize bits [47:32] of the result.
1437/// \param __w3
1438/// A 16-bit integer value used to initialize bits [63:48] of the result.
1439/// \returns An initialized 64-bit integer vector.
1440static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi16(short __w0,
1441 short __w1,
1442 short __w2,
1443 short __w3) {
1444 return _mm_set_pi16(__w3, __w2, __w1, __w0);
1445}
1446
1447/// Constructs a 64-bit integer vector, initialized in reverse order with
1448/// the specified 8-bit integer values.
1449///
1450/// \headerfile <x86intrin.h>
1451///
1452/// This intrinsic is a utility function and does not correspond to a specific
1453/// instruction.
1454///
1455/// \param __b0
1456/// An 8-bit integer value used to initialize bits [7:0] of the result.
1457/// \param __b1
1458/// An 8-bit integer value used to initialize bits [15:8] of the result.
1459/// \param __b2
1460/// An 8-bit integer value used to initialize bits [23:16] of the result.
1461/// \param __b3
1462/// An 8-bit integer value used to initialize bits [31:24] of the result.
1463/// \param __b4
1464/// An 8-bit integer value used to initialize bits [39:32] of the result.
1465/// \param __b5
1466/// An 8-bit integer value used to initialize bits [47:40] of the result.
1467/// \param __b6
1468/// An 8-bit integer value used to initialize bits [55:48] of the result.
1469/// \param __b7
1470/// An 8-bit integer value used to initialize bits [63:56] of the result.
1471/// \returns An initialized 64-bit integer vector.
1472static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1473_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
1474 char __b6, char __b7) {
1475 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
1476}
1477
1478#undef __trunc64
1479#undef __DEFAULT_FN_ATTRS_SSE2
1480
1481/* Aliases for compatibility. */
1482#define _m_empty _mm_empty
1483#define _m_from_int _mm_cvtsi32_si64
1484#define _m_from_int64 _mm_cvtsi64_m64
1485#define _m_to_int _mm_cvtsi64_si32
1486#define _m_to_int64 _mm_cvtm64_si64
1487#define _m_packsswb _mm_packs_pi16
1488#define _m_packssdw _mm_packs_pi32
1489#define _m_packuswb _mm_packs_pu16
1490#define _m_punpckhbw _mm_unpackhi_pi8
1491#define _m_punpckhwd _mm_unpackhi_pi16
1492#define _m_punpckhdq _mm_unpackhi_pi32
1493#define _m_punpcklbw _mm_unpacklo_pi8
1494#define _m_punpcklwd _mm_unpacklo_pi16
1495#define _m_punpckldq _mm_unpacklo_pi32
1496#define _m_paddb _mm_add_pi8
1497#define _m_paddw _mm_add_pi16
1498#define _m_paddd _mm_add_pi32
1499#define _m_paddsb _mm_adds_pi8
1500#define _m_paddsw _mm_adds_pi16
1501#define _m_paddusb _mm_adds_pu8
1502#define _m_paddusw _mm_adds_pu16
1503#define _m_psubb _mm_sub_pi8
1504#define _m_psubw _mm_sub_pi16
1505#define _m_psubd _mm_sub_pi32
1506#define _m_psubsb _mm_subs_pi8
1507#define _m_psubsw _mm_subs_pi16
1508#define _m_psubusb _mm_subs_pu8
1509#define _m_psubusw _mm_subs_pu16
1510#define _m_pmaddwd _mm_madd_pi16
1511#define _m_pmulhw _mm_mulhi_pi16
1512#define _m_pmullw _mm_mullo_pi16
1513#define _m_psllw _mm_sll_pi16
1514#define _m_psllwi _mm_slli_pi16
1515#define _m_pslld _mm_sll_pi32
1516#define _m_pslldi _mm_slli_pi32
1517#define _m_psllq _mm_sll_si64
1518#define _m_psllqi _mm_slli_si64
1519#define _m_psraw _mm_sra_pi16
1520#define _m_psrawi _mm_srai_pi16
1521#define _m_psrad _mm_sra_pi32
1522#define _m_psradi _mm_srai_pi32
1523#define _m_psrlw _mm_srl_pi16
1524#define _m_psrlwi _mm_srli_pi16
1525#define _m_psrld _mm_srl_pi32
1526#define _m_psrldi _mm_srli_pi32
1527#define _m_psrlq _mm_srl_si64
1528#define _m_psrlqi _mm_srli_si64
1529#define _m_pand _mm_and_si64
1530#define _m_pandn _mm_andnot_si64
1531#define _m_por _mm_or_si64
1532#define _m_pxor _mm_xor_si64
1533#define _m_pcmpeqb _mm_cmpeq_pi8
1534#define _m_pcmpeqw _mm_cmpeq_pi16
1535#define _m_pcmpeqd _mm_cmpeq_pi32
1536#define _m_pcmpgtb _mm_cmpgt_pi8
1537#define _m_pcmpgtw _mm_cmpgt_pi16
1538#define _m_pcmpgtd _mm_cmpgt_pi32
1539
1540#endif /* __MMINTRIN_H */
1541
static __inline__ vector float vector float __b
Definition altivec.h:578
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_andnot_si64(__m64 __m1, __m64 __m2)
Performs a bitwise NOT of the first 64-bit integer vector, and then performs a bitwise AND of the int...
Definition mmintrin.h:1099
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi32(__m64 __m1, __m64 __m2)
Adds each 32-bit integer element of the first 64-bit integer vector of [2 x i32] to the corresponding...
Definition mmintrin.h:397
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_and_si64(__m64 __m1, __m64 __m2)
Performs a bitwise AND of two 64-bit integer vectors.
Definition mmintrin.h:1079
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi8(__m64 __m1, __m64 __m2)
Adds each 8-bit integer element of the first 64-bit integer vector of [8 x i8] to the corresponding 8...
Definition mmintrin.h:357
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Constructs a 64-bit integer vector initialized with the specified 8-bit integer values.
Definition mmintrin.h:1347
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi16(__m64 __m1, __m64 __m2)
Adds each 16-bit integer element of the first 64-bit integer vector of [4 x i16] to the corresponding...
Definition mmintrin.h:377
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu16(__m64 __m1, __m64 __m2)
Adds, with saturation, each 16-bit unsigned integer element of the first 64-bit integer vector of [4 ...
Definition mmintrin.h:487
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi32(__m64 __m1, __m64 __m2)
Converts, with saturation, 32-bit signed integers from both 64-bit integer vector parameters of [2 x ...
Definition mmintrin.h:173
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
Definition mmintrin.h:1196
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
Definition mmintrin.h:317
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi16(short __w)
Constructs a 64-bit integer vector of [4 x i16], with each of the 16-bit integer vector elements set ...
Definition mmintrin.h:1383
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi16(__m64 __m1, __m64 __m2)
Converts, with saturation, 16-bit signed integers from both 64-bit integer vector parameters of [4 x ...
Definition mmintrin.h:148
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit integer element of the second 64-bit integer vector of [8 x i8] from the corresp...
Definition mmintrin.h:507
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 8-bit integer val...
Definition mmintrin.h:1473
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
Definition mmintrin.h:931
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu8(__m64 __m1, __m64 __m2)
Adds, with saturation, each 8-bit unsigned integer element of the first 64-bit integer vector of [8 x...
Definition mmintrin.h:465
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi8(char __b)
Constructs a 64-bit integer vector of [8 x i8], with each of the 8-bit integer vector elements set to...
Definition mmintrin.h:1399
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
Definition mmintrin.h:976
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
Definition mmintrin.h:884
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit unsigned integer element of the second 64-bit integer vector of [8 x i8] from th...
Definition mmintrin.h:616
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi8(__m64 __m1, __m64 __m2)
Adds, with saturation, each 8-bit signed integer element of the first 64-bit integer vector of [8 x i...
Definition mmintrin.h:420
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi32(int __i)
Constructs a 64-bit integer vector of [2 x i32], with each of the 32-bit integer vector elements set ...
Definition mmintrin.h:1366
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_or_si64(__m64 __m1, __m64 __m2)
Performs a bitwise OR of two 64-bit integer vectors.
Definition mmintrin.h:1116
static __inline__ long long __DEFAULT_FN_ATTRS_SSE2 _mm_cvtm64_si64(__m64 __m)
Casts a 64-bit integer vector into a 64-bit signed integer value.
Definition mmintrin.h:125
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setzero_si64(void)
Constructs a 64-bit integer vector initialized to zero.
Definition mmintrin.h:1273
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
Definition mmintrin.h:1021
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi16(__m64 __m1, __m64 __m2)
Adds, with saturation, each 16-bit signed integer element of the first 64-bit integer vector of [4 x ...
Definition mmintrin.h:443
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi32_si64(int __i)
Constructs a 64-bit integer vector, setting the lower 32 bits to the value of the 32-bit integer para...
Definition mmintrin.h:82
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_si64(__m64 __m, int __count)
Left-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the se...
Definition mmintrin.h:837
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition mmintrin.h:686
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sll_pi16(__m64 __m, __m64 __count)
Left-shifts each 16-bit signed integer element of the first parameter, which is a 64-bit integer vect...
Definition mmintrin.h:730
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srl_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition mmintrin.h:954
static __inline__ int __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_si32(__m64 __m)
Returns the lower 32 bits of a 64-bit integer vector as a 32-bit signed integer.
Definition mmintrin.h:97
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
Definition mmintrin.h:1217
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sra_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition mmintrin.h:908
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srl_si64(__m64 __m, __m64 __count)
Right-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit ...
Definition mmintrin.h:1041
long long __m64 __attribute__((__vector_size__(8), __aligned__(8)))
Definition mmintrin.h:17
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi8(__m64 __m1, __m64 __m2)
Subtracts, with saturation, each 8-bit signed integer element of the second 64-bit integer vector of ...
Definition mmintrin.h:570
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mullo_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition mmintrin.h:707
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
Definition mmintrin.h:225
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pu16(__m64 __m1, __m64 __m2)
Converts, with saturation, 16-bit signed integers from both 64-bit integer vector parameters of [4 x ...
Definition mmintrin.h:198
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_madd_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition mmintrin.h:665
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srl_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition mmintrin.h:999
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_xor_si64(__m64 __m1, __m64 __m2)
Performs a bitwise exclusive OR of two 64-bit integer vectors.
Definition mmintrin.h:1133
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_m64(long long __i)
Casts a 64-bit signed integer value into a 64-bit integer vector.
Definition mmintrin.h:111
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
Definition mmintrin.h:1154
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sll_si64(__m64 __m, __m64 __count)
Left-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit i...
Definition mmintrin.h:817
#define __trunc64(x)
Definition mmintrin.h:52
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
Definition mmintrin.h:268
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi32(__m64 __m1, __m64 __m2)
Subtracts each 32-bit integer element of the second 64-bit integer vector of [2 x i32] from the corre...
Definition mmintrin.h:547
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
Definition mmintrin.h:248
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi32(int __i1, int __i0)
Constructs a 64-bit integer vector initialized with the specified 32-bit integer values.
Definition mmintrin.h:1292
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_si64(__m64 __m, int __count)
Right-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the s...
Definition mmintrin.h:1062
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
Definition mmintrin.h:294
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sra_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition mmintrin.h:861
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi16(__m64 __m1, __m64 __m2)
Subtracts, with saturation, each 16-bit signed integer element of the second 64-bit integer vector of...
Definition mmintrin.h:593
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi32(__m64 __m, int __count)
Left-shifts each 32-bit signed integer element of a 64-bit integer vector of [2 x i32] by the number ...
Definition mmintrin.h:797
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
Definition mmintrin.h:1175
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi16(__m64 __m, int __count)
Left-shifts each 16-bit signed integer element of a 64-bit integer vector of [4 x i16] by the number ...
Definition mmintrin.h:752
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sll_pi32(__m64 __m, __m64 __count)
Left-shifts each 32-bit signed integer element of the first parameter, which is a 64-bit integer vect...
Definition mmintrin.h:775
#define __zext128(x)
Definition mmintrin.h:54
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi32(int __i0, int __i1)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 32-bit integer va...
Definition mmintrin.h:1418
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit unsigned integer element of the second 64-bit integer vector of [4 x i16] from ...
Definition mmintrin.h:639
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 16-bit integer va...
Definition mmintrin.h:1440
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit integer element of the second 64-bit integer vector of [4 x i16] from the corre...
Definition mmintrin.h:527
#define __DEFAULT_FN_ATTRS_SSE2
Definition mmintrin.h:47
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
Definition mmintrin.h:1261
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
Definition mmintrin.h:337
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
Constructs a 64-bit integer vector initialized with the specified 16-bit integer values.
Definition mmintrin.h:1314
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
Definition mmintrin.h:1240