clang 20.0.0git
mmintrin.h
Go to the documentation of this file.
1/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __MMINTRIN_H
11#define __MMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8)));
18
19typedef long long __v1di __attribute__((__vector_size__(8)));
20typedef int __v2si __attribute__((__vector_size__(8)));
21typedef short __v4hi __attribute__((__vector_size__(8)));
22typedef char __v8qi __attribute__((__vector_size__(8)));
23
24/* Unsigned types */
25typedef unsigned long long __v1du __attribute__ ((__vector_size__ (8)));
26typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
27typedef unsigned short __v4hu __attribute__((__vector_size__(8)));
28typedef unsigned char __v8qu __attribute__((__vector_size__(8)));
29
30/* We need an explicitly signed variant for char. Note that this shouldn't
31 * appear in the interface though. */
32typedef signed char __v8qs __attribute__((__vector_size__(8)));
33
34/* SSE/SSE2 types */
35typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
36typedef long long __v2di __attribute__ ((__vector_size__ (16)));
37typedef int __v4si __attribute__((__vector_size__(16)));
38typedef short __v8hi __attribute__((__vector_size__(16)));
39typedef char __v16qi __attribute__((__vector_size__(16)));
40
41/* Define the default attributes for the functions in this file. */
42#define __DEFAULT_FN_ATTRS_SSE2 \
43 __attribute__((__always_inline__, __nodebug__, \
44 __target__("sse2,no-evex512"), __min_vector_width__(128)))
45
46#define __trunc64(x) \
47 (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
48#define __anyext128(x) \
49 (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
50 1, -1, -1)
51
52/// Clears the MMX state by setting the state of the x87 stack registers
53/// to empty.
54///
55/// \headerfile <x86intrin.h>
56///
57/// This intrinsic corresponds to the <c> EMMS </c> instruction.
58///
59static __inline__ void __attribute__((__always_inline__, __nodebug__,
60 __target__("mmx,no-evex512")))
61_mm_empty(void) {
62 __builtin_ia32_emms();
63}
64
65/// Constructs a 64-bit integer vector, setting the lower 32 bits to the
66/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.
67///
68/// \headerfile <x86intrin.h>
69///
70/// This intrinsic corresponds to the <c> MOVD </c> instruction.
71///
72/// \param __i
73/// A 32-bit integer value.
74/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
75/// parameter. The upper 32 bits are set to 0.
76static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
78{
79 return __extension__ (__m64)(__v2si){__i, 0};
80}
81
82/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
83/// signed integer.
84///
85/// \headerfile <x86intrin.h>
86///
87/// This intrinsic corresponds to the <c> MOVD </c> instruction.
88///
89/// \param __m
90/// A 64-bit integer vector.
91/// \returns A 32-bit signed integer value containing the lower 32 bits of the
92/// parameter.
93static __inline__ int __DEFAULT_FN_ATTRS_SSE2
95{
96 return ((__v2si)__m)[0];
97}
98
99/// Casts a 64-bit signed integer value into a 64-bit integer vector.
100///
101/// \headerfile <x86intrin.h>
102///
103/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
104///
105/// \param __i
106/// A 64-bit signed integer.
107/// \returns A 64-bit integer vector containing the same bitwise pattern as the
108/// parameter.
109static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
110_mm_cvtsi64_m64(long long __i)
111{
112 return (__m64)__i;
113}
114
115/// Casts a 64-bit integer vector into a 64-bit signed integer value.
116///
117/// \headerfile <x86intrin.h>
118///
119/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
120///
121/// \param __m
122/// A 64-bit integer vector.
123/// \returns A 64-bit signed integer containing the same bitwise pattern as the
124/// parameter.
125static __inline__ long long __DEFAULT_FN_ATTRS_SSE2
127{
128 return (long long)__m;
129}
130
131/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
132/// vector parameters of [4 x i16] into 8-bit signed integer values, and
133/// constructs a 64-bit integer vector of [8 x i8] as the result.
134///
135/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
136/// less than 0x80 are saturated to 0x80.
137///
138/// \headerfile <x86intrin.h>
139///
140/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
141///
142/// \param __m1
143/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
144/// written to the lower 32 bits of the result.
145/// \param __m2
146/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
147/// written to the upper 32 bits of the result.
148/// \returns A 64-bit integer vector of [8 x i8] containing the converted
149/// values.
150static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
151_mm_packs_pi16(__m64 __m1, __m64 __m2)
152{
153 return __trunc64(__builtin_ia32_packsswb128(
154 (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
155}
156
157/// Converts, with saturation, 32-bit signed integers from both 64-bit integer
158/// vector parameters of [2 x i32] into 16-bit signed integer values, and
159/// constructs a 64-bit integer vector of [4 x i16] as the result.
160///
161/// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative
162/// values less than 0x8000 are saturated to 0x8000.
163///
164/// \headerfile <x86intrin.h>
165///
166/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
167///
168/// \param __m1
169/// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
170/// written to the lower 32 bits of the result.
171/// \param __m2
172/// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
173/// written to the upper 32 bits of the result.
174/// \returns A 64-bit integer vector of [4 x i16] containing the converted
175/// values.
176static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
177_mm_packs_pi32(__m64 __m1, __m64 __m2)
178{
179 return __trunc64(__builtin_ia32_packssdw128(
180 (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){}));
181}
182
183/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
184/// vector parameters of [4 x i16] into 8-bit unsigned integer values, and
185/// constructs a 64-bit integer vector of [8 x i8] as the result.
186///
187/// Values greater than 0xFF are saturated to 0xFF. Values less than 0 are
188/// saturated to 0.
189///
190/// \headerfile <x86intrin.h>
191///
192/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
193///
194/// \param __m1
195/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
196/// written to the lower 32 bits of the result.
197/// \param __m2
198/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
199/// written to the upper 32 bits of the result.
200/// \returns A 64-bit integer vector of [8 x i8] containing the converted
201/// values.
202static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
203_mm_packs_pu16(__m64 __m1, __m64 __m2)
204{
205 return __trunc64(__builtin_ia32_packuswb128(
206 (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
207}
208
209/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
210/// and interleaves them into a 64-bit integer vector of [8 x i8].
211///
212/// \headerfile <x86intrin.h>
213///
214/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
215///
216/// \param __m1
217/// A 64-bit integer vector of [8 x i8]. \n
218/// Bits [39:32] are written to bits [7:0] of the result. \n
219/// Bits [47:40] are written to bits [23:16] of the result. \n
220/// Bits [55:48] are written to bits [39:32] of the result. \n
221/// Bits [63:56] are written to bits [55:48] of the result.
222/// \param __m2
223/// A 64-bit integer vector of [8 x i8].
224/// Bits [39:32] are written to bits [15:8] of the result. \n
225/// Bits [47:40] are written to bits [31:24] of the result. \n
226/// Bits [55:48] are written to bits [47:40] of the result. \n
227/// Bits [63:56] are written to bits [63:56] of the result.
228/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
229/// values.
230static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
231_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
232{
233 return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
234 4, 12, 5, 13, 6, 14, 7, 15);
235}
236
237/// Unpacks the upper 32 bits from two 64-bit integer vectors of
238/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
239///
240/// \headerfile <x86intrin.h>
241///
242/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.
243///
244/// \param __m1
245/// A 64-bit integer vector of [4 x i16].
246/// Bits [47:32] are written to bits [15:0] of the result. \n
247/// Bits [63:48] are written to bits [47:32] of the result.
248/// \param __m2
249/// A 64-bit integer vector of [4 x i16].
250/// Bits [47:32] are written to bits [31:16] of the result. \n
251/// Bits [63:48] are written to bits [63:48] of the result.
252/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
253/// values.
254static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
255_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
256{
257 return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
258 2, 6, 3, 7);
259}
260
261/// Unpacks the upper 32 bits from two 64-bit integer vectors of
262/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
263///
264/// \headerfile <x86intrin.h>
265///
266/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.
267///
268/// \param __m1
269/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
270/// the lower 32 bits of the result.
271/// \param __m2
272/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
273/// the upper 32 bits of the result.
274/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
275/// values.
276static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
277_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
278{
279 return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3);
280}
281
282/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
283/// and interleaves them into a 64-bit integer vector of [8 x i8].
284///
285/// \headerfile <x86intrin.h>
286///
287/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.
288///
289/// \param __m1
290/// A 64-bit integer vector of [8 x i8].
291/// Bits [7:0] are written to bits [7:0] of the result. \n
292/// Bits [15:8] are written to bits [23:16] of the result. \n
293/// Bits [23:16] are written to bits [39:32] of the result. \n
294/// Bits [31:24] are written to bits [55:48] of the result.
295/// \param __m2
296/// A 64-bit integer vector of [8 x i8].
297/// Bits [7:0] are written to bits [15:8] of the result. \n
298/// Bits [15:8] are written to bits [31:24] of the result. \n
299/// Bits [23:16] are written to bits [47:40] of the result. \n
300/// Bits [31:24] are written to bits [63:56] of the result.
301/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
302/// values.
303static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
304_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
305{
306 return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
307 0, 8, 1, 9, 2, 10, 3, 11);
308}
309
310/// Unpacks the lower 32 bits from two 64-bit integer vectors of
311/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
312///
313/// \headerfile <x86intrin.h>
314///
315/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.
316///
317/// \param __m1
318/// A 64-bit integer vector of [4 x i16].
319/// Bits [15:0] are written to bits [15:0] of the result. \n
320/// Bits [31:16] are written to bits [47:32] of the result.
321/// \param __m2
322/// A 64-bit integer vector of [4 x i16].
323/// Bits [15:0] are written to bits [31:16] of the result. \n
324/// Bits [31:16] are written to bits [63:48] of the result.
325/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
326/// values.
327static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
328_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
329{
330 return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
331 0, 4, 1, 5);
332}
333
334/// Unpacks the lower 32 bits from two 64-bit integer vectors of
335/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
336///
337/// \headerfile <x86intrin.h>
338///
339/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.
340///
341/// \param __m1
342/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
343/// the lower 32 bits of the result.
344/// \param __m2
345/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
346/// the upper 32 bits of the result.
347/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
348/// values.
349static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
350_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
351{
352 return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2);
353}
354
355/// Adds each 8-bit integer element of the first 64-bit integer vector
356/// of [8 x i8] to the corresponding 8-bit integer element of the second
357/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
358/// packed into a 64-bit integer vector of [8 x i8].
359///
360/// \headerfile <x86intrin.h>
361///
362/// This intrinsic corresponds to the <c> PADDB </c> instruction.
363///
364/// \param __m1
365/// A 64-bit integer vector of [8 x i8].
366/// \param __m2
367/// A 64-bit integer vector of [8 x i8].
368/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
369/// parameters.
370static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
371_mm_add_pi8(__m64 __m1, __m64 __m2)
372{
373 return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2));
374}
375
376/// Adds each 16-bit integer element of the first 64-bit integer vector
377/// of [4 x i16] to the corresponding 16-bit integer element of the second
378/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
379/// packed into a 64-bit integer vector of [4 x i16].
380///
381/// \headerfile <x86intrin.h>
382///
383/// This intrinsic corresponds to the <c> PADDW </c> instruction.
384///
385/// \param __m1
386/// A 64-bit integer vector of [4 x i16].
387/// \param __m2
388/// A 64-bit integer vector of [4 x i16].
389/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
390/// parameters.
391static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
392_mm_add_pi16(__m64 __m1, __m64 __m2)
393{
394 return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2));
395}
396
397/// Adds each 32-bit integer element of the first 64-bit integer vector
398/// of [2 x i32] to the corresponding 32-bit integer element of the second
399/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
400/// packed into a 64-bit integer vector of [2 x i32].
401///
402/// \headerfile <x86intrin.h>
403///
404/// This intrinsic corresponds to the <c> PADDD </c> instruction.
405///
406/// \param __m1
407/// A 64-bit integer vector of [2 x i32].
408/// \param __m2
409/// A 64-bit integer vector of [2 x i32].
410/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
411/// parameters.
412static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
413_mm_add_pi32(__m64 __m1, __m64 __m2)
414{
415 return (__m64)(((__v2su)__m1) + ((__v2su)__m2));
416}
417
418/// Adds, with saturation, each 8-bit signed integer element of the first
419/// 64-bit integer vector of [8 x i8] to the corresponding 8-bit signed
420/// integer element of the second 64-bit integer vector of [8 x i8].
421///
422/// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums
423/// less than 0x80 are saturated to 0x80. The results are packed into a
424/// 64-bit integer vector of [8 x i8].
425///
426/// \headerfile <x86intrin.h>
427///
428/// This intrinsic corresponds to the <c> PADDSB </c> instruction.
429///
430/// \param __m1
431/// A 64-bit integer vector of [8 x i8].
432/// \param __m2
433/// A 64-bit integer vector of [8 x i8].
434/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
435/// of both parameters.
436static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
437_mm_adds_pi8(__m64 __m1, __m64 __m2)
438{
439 return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2);
440}
441
442/// Adds, with saturation, each 16-bit signed integer element of the first
443/// 64-bit integer vector of [4 x i16] to the corresponding 16-bit signed
444/// integer element of the second 64-bit integer vector of [4 x i16].
445///
446/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
447/// less than 0x8000 are saturated to 0x8000. The results are packed into a
448/// 64-bit integer vector of [4 x i16].
449///
450/// \headerfile <x86intrin.h>
451///
452/// This intrinsic corresponds to the <c> PADDSW </c> instruction.
453///
454/// \param __m1
455/// A 64-bit integer vector of [4 x i16].
456/// \param __m2
457/// A 64-bit integer vector of [4 x i16].
458/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
459/// of both parameters.
460static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
461_mm_adds_pi16(__m64 __m1, __m64 __m2)
462{
463 return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2);
464}
465
466/// Adds, with saturation, each 8-bit unsigned integer element of the first
467/// 64-bit integer vector of [8 x i8] to the corresponding 8-bit unsigned
468/// integer element of the second 64-bit integer vector of [8 x i8].
469///
470/// Sums greater than 0xFF are saturated to 0xFF. The results are packed
471/// into a 64-bit integer vector of [8 x i8].
472///
473/// \headerfile <x86intrin.h>
474///
475/// This intrinsic corresponds to the <c> PADDUSB </c> instruction.
476///
477/// \param __m1
478/// A 64-bit integer vector of [8 x i8].
479/// \param __m2
480/// A 64-bit integer vector of [8 x i8].
481/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
482/// unsigned sums of both parameters.
483static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
484_mm_adds_pu8(__m64 __m1, __m64 __m2)
485{
486 return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2);
487}
488
489/// Adds, with saturation, each 16-bit unsigned integer element of the first
490/// 64-bit integer vector of [4 x i16] to the corresponding 16-bit unsigned
491/// integer element of the second 64-bit integer vector of [4 x i16].
492///
493/// Sums greater than 0xFFFF are saturated to 0xFFFF. The results are packed
494/// into a 64-bit integer vector of [4 x i16].
495///
496/// \headerfile <x86intrin.h>
497///
498/// This intrinsic corresponds to the <c> PADDUSW </c> instruction.
499///
500/// \param __m1
501/// A 64-bit integer vector of [4 x i16].
502/// \param __m2
503/// A 64-bit integer vector of [4 x i16].
504/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
505/// unsigned sums of both parameters.
506static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
507_mm_adds_pu16(__m64 __m1, __m64 __m2)
508{
509 return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2);
510}
511
512/// Subtracts each 8-bit integer element of the second 64-bit integer
513/// vector of [8 x i8] from the corresponding 8-bit integer element of the
514/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
515/// are packed into a 64-bit integer vector of [8 x i8].
516///
517/// \headerfile <x86intrin.h>
518///
519/// This intrinsic corresponds to the <c> PSUBB </c> instruction.
520///
521/// \param __m1
522/// A 64-bit integer vector of [8 x i8] containing the minuends.
523/// \param __m2
524/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
525/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
526/// both parameters.
527static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
528_mm_sub_pi8(__m64 __m1, __m64 __m2)
529{
530 return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2));
531}
532
533/// Subtracts each 16-bit integer element of the second 64-bit integer
534/// vector of [4 x i16] from the corresponding 16-bit integer element of the
535/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
536/// results are packed into a 64-bit integer vector of [4 x i16].
537///
538/// \headerfile <x86intrin.h>
539///
540/// This intrinsic corresponds to the <c> PSUBW </c> instruction.
541///
542/// \param __m1
543/// A 64-bit integer vector of [4 x i16] containing the minuends.
544/// \param __m2
545/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
546/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
547/// both parameters.
548static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
549_mm_sub_pi16(__m64 __m1, __m64 __m2)
550{
551 return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2));
552}
553
554/// Subtracts each 32-bit integer element of the second 64-bit integer
555/// vector of [2 x i32] from the corresponding 32-bit integer element of the
556/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
557/// results are packed into a 64-bit integer vector of [2 x i32].
558///
559/// \headerfile <x86intrin.h>
560///
561/// This intrinsic corresponds to the <c> PSUBD </c> instruction.
562///
563/// \param __m1
564/// A 64-bit integer vector of [2 x i32] containing the minuends.
565/// \param __m2
566/// A 64-bit integer vector of [2 x i32] containing the subtrahends.
567/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
568/// both parameters.
569static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
570_mm_sub_pi32(__m64 __m1, __m64 __m2)
571{
572 return (__m64)(((__v2su)__m1) - ((__v2su)__m2));
573}
574
575/// Subtracts, with saturation, each 8-bit signed integer element of the second
576/// 64-bit integer vector of [8 x i8] from the corresponding 8-bit signed
577/// integer element of the first 64-bit integer vector of [8 x i8].
578///
579/// Positive results greater than 0x7F are saturated to 0x7F. Negative
580/// results less than 0x80 are saturated to 0x80. The results are packed
581/// into a 64-bit integer vector of [8 x i8].
582///
583/// \headerfile <x86intrin.h>
584///
585/// This intrinsic corresponds to the <c> PSUBSB </c> instruction.
586///
587/// \param __m1
588/// A 64-bit integer vector of [8 x i8] containing the minuends.
589/// \param __m2
590/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
591/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
592/// differences of both parameters.
593static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
594_mm_subs_pi8(__m64 __m1, __m64 __m2)
595{
596 return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2);
597}
598
599/// Subtracts, with saturation, each 16-bit signed integer element of the
600/// second 64-bit integer vector of [4 x i16] from the corresponding 16-bit
601/// signed integer element of the first 64-bit integer vector of [4 x i16].
602///
603/// Positive results greater than 0x7FFF are saturated to 0x7FFF. Negative
604/// results less than 0x8000 are saturated to 0x8000. The results are packed
605/// into a 64-bit integer vector of [4 x i16].
606///
607/// \headerfile <x86intrin.h>
608///
609/// This intrinsic corresponds to the <c> PSUBSW </c> instruction.
610///
611/// \param __m1
612/// A 64-bit integer vector of [4 x i16] containing the minuends.
613/// \param __m2
614/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
615/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
616/// differences of both parameters.
617static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
618_mm_subs_pi16(__m64 __m1, __m64 __m2)
619{
620 return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2);
621}
622
623/// Subtracts each 8-bit unsigned integer element of the second 64-bit
624/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
625/// element of the first 64-bit integer vector of [8 x i8].
626///
627/// If an element of the first vector is less than the corresponding element
628/// of the second vector, the result is saturated to 0. The results are
629/// packed into a 64-bit integer vector of [8 x i8].
630///
631/// \headerfile <x86intrin.h>
632///
633/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.
634///
635/// \param __m1
636/// A 64-bit integer vector of [8 x i8] containing the minuends.
637/// \param __m2
638/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
639/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
640/// differences of both parameters.
641static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
642_mm_subs_pu8(__m64 __m1, __m64 __m2)
643{
644 return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2);
645}
646
647/// Subtracts each 16-bit unsigned integer element of the second 64-bit
648/// integer vector of [4 x i16] from the corresponding 16-bit unsigned
649/// integer element of the first 64-bit integer vector of [4 x i16].
650///
651/// If an element of the first vector is less than the corresponding element
652/// of the second vector, the result is saturated to 0. The results are
653/// packed into a 64-bit integer vector of [4 x i16].
654///
655/// \headerfile <x86intrin.h>
656///
657/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.
658///
659/// \param __m1
660/// A 64-bit integer vector of [4 x i16] containing the minuends.
661/// \param __m2
662/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
663/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
664/// differences of both parameters.
665static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
666_mm_subs_pu16(__m64 __m1, __m64 __m2)
667{
668 return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2);
669}
670
671/// Multiplies each 16-bit signed integer element of the first 64-bit
672/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
673/// element of the second 64-bit integer vector of [4 x i16] and get four
674/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
675/// The lower 32 bits of these two sums are packed into a 64-bit integer
676/// vector of [2 x i32].
677///
678/// For example, bits [15:0] of both parameters are multiplied, bits [31:16]
679/// of both parameters are multiplied, and the sum of both results is written
680/// to bits [31:0] of the result.
681///
682/// \headerfile <x86intrin.h>
683///
684/// This intrinsic corresponds to the <c> PMADDWD </c> instruction.
685///
686/// \param __m1
687/// A 64-bit integer vector of [4 x i16].
688/// \param __m2
689/// A 64-bit integer vector of [4 x i16].
690/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
691/// products of both parameters.
692static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
693_mm_madd_pi16(__m64 __m1, __m64 __m2)
694{
695 return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__anyext128(__m1),
696 (__v8hi)__anyext128(__m2)));
697}
698
699/// Multiplies each 16-bit signed integer element of the first 64-bit
700/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
701/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper
702/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
703///
704/// \headerfile <x86intrin.h>
705///
706/// This intrinsic corresponds to the <c> PMULHW </c> instruction.
707///
708/// \param __m1
709/// A 64-bit integer vector of [4 x i16].
710/// \param __m2
711/// A 64-bit integer vector of [4 x i16].
712/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
713/// of the products of both parameters.
714static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
715_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
716{
717 return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__anyext128(__m1),
718 (__v8hi)__anyext128(__m2)));
719}
720
721/// Multiplies each 16-bit signed integer element of the first 64-bit
722/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
723/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower
724/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
725///
726/// \headerfile <x86intrin.h>
727///
728/// This intrinsic corresponds to the <c> PMULLW </c> instruction.
729///
730/// \param __m1
731/// A 64-bit integer vector of [4 x i16].
732/// \param __m2
733/// A 64-bit integer vector of [4 x i16].
734/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
735/// of the products of both parameters.
736static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
737_mm_mullo_pi16(__m64 __m1, __m64 __m2)
738{
739 return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2));
740}
741
742/// Left-shifts each 16-bit signed integer element of the first
743/// parameter, which is a 64-bit integer vector of [4 x i16], by the number
744/// of bits specified by the second parameter, which is a 64-bit integer. The
745/// lower 16 bits of the results are packed into a 64-bit integer vector of
746/// [4 x i16].
747///
748/// \headerfile <x86intrin.h>
749///
750/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
751///
752/// \param __m
753/// A 64-bit integer vector of [4 x i16].
754/// \param __count
755/// A 64-bit integer vector interpreted as a single 64-bit integer.
756/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
757/// values. If \a __count is greater or equal to 16, the result is set to all
758/// 0.
759static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
760_mm_sll_pi16(__m64 __m, __m64 __count)
761{
762 return __trunc64(__builtin_ia32_psllw128((__v8hi)__anyext128(__m),
763 (__v8hi)__anyext128(__count)));
764}
765
766/// Left-shifts each 16-bit signed integer element of a 64-bit integer
767/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.
768/// The lower 16 bits of the results are packed into a 64-bit integer vector
769/// of [4 x i16].
770///
771/// \headerfile <x86intrin.h>
772///
773/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
774///
775/// \param __m
776/// A 64-bit integer vector of [4 x i16].
777/// \param __count
778/// A 32-bit integer value.
779/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
780/// values. If \a __count is greater or equal to 16, the result is set to all
781/// 0.
782static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
783_mm_slli_pi16(__m64 __m, int __count)
784{
785 return __trunc64(__builtin_ia32_psllwi128((__v8hi)__anyext128(__m),
786 __count));
787}
788
789/// Left-shifts each 32-bit signed integer element of the first
790/// parameter, which is a 64-bit integer vector of [2 x i32], by the number
791/// of bits specified by the second parameter, which is a 64-bit integer. The
792/// lower 32 bits of the results are packed into a 64-bit integer vector of
793/// [2 x i32].
794///
795/// \headerfile <x86intrin.h>
796///
797/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
798///
799/// \param __m
800/// A 64-bit integer vector of [2 x i32].
801/// \param __count
802/// A 64-bit integer vector interpreted as a single 64-bit integer.
803/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
804/// values. If \a __count is greater or equal to 32, the result is set to all
805/// 0.
806static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
807_mm_sll_pi32(__m64 __m, __m64 __count)
808{
809 return __trunc64(__builtin_ia32_pslld128((__v4si)__anyext128(__m),
810 (__v4si)__anyext128(__count)));
811}
812
813/// Left-shifts each 32-bit signed integer element of a 64-bit integer
814/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.
815/// The lower 32 bits of the results are packed into a 64-bit integer vector
816/// of [2 x i32].
817///
818/// \headerfile <x86intrin.h>
819///
820/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
821///
822/// \param __m
823/// A 64-bit integer vector of [2 x i32].
824/// \param __count
825/// A 32-bit integer value.
826/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
827/// values. If \a __count is greater or equal to 32, the result is set to all
828/// 0.
829static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
830_mm_slli_pi32(__m64 __m, int __count)
831{
832 return __trunc64(__builtin_ia32_pslldi128((__v4si)__anyext128(__m),
833 __count));
834}
835
836/// Left-shifts the first 64-bit integer parameter by the number of bits
837/// specified by the second 64-bit integer parameter. The lower 64 bits of
838/// result are returned.
839///
840/// \headerfile <x86intrin.h>
841///
842/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
843///
844/// \param __m
845/// A 64-bit integer vector interpreted as a single 64-bit integer.
846/// \param __count
847/// A 64-bit integer vector interpreted as a single 64-bit integer.
848/// \returns A 64-bit integer vector containing the left-shifted value. If
849/// \a __count is greater or equal to 64, the result is set to 0.
850static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
851_mm_sll_si64(__m64 __m, __m64 __count)
852{
853 return __trunc64(__builtin_ia32_psllq128((__v2di)__anyext128(__m),
854 (__v2di)__anyext128(__count)));
855}
856
857/// Left-shifts the first parameter, which is a 64-bit integer, by the
858/// number of bits specified by the second parameter, which is a 32-bit
859/// integer. The lower 64 bits of result are returned.
860///
861/// \headerfile <x86intrin.h>
862///
863/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
864///
865/// \param __m
866/// A 64-bit integer vector interpreted as a single 64-bit integer.
867/// \param __count
868/// A 32-bit integer value.
869/// \returns A 64-bit integer vector containing the left-shifted value. If
870/// \a __count is greater or equal to 64, the result is set to 0.
871static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
872_mm_slli_si64(__m64 __m, int __count)
873{
874 return __trunc64(__builtin_ia32_psllqi128((__v2di)__anyext128(__m),
875 __count));
876}
877
878/// Right-shifts each 16-bit integer element of the first parameter,
879/// which is a 64-bit integer vector of [4 x i16], by the number of bits
880/// specified by the second parameter, which is a 64-bit integer.
881///
882/// High-order bits are filled with the sign bit of the initial value of each
883/// 16-bit element. The 16-bit results are packed into a 64-bit integer
884/// vector of [4 x i16].
885///
886/// \headerfile <x86intrin.h>
887///
888/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
889///
890/// \param __m
891/// A 64-bit integer vector of [4 x i16].
892/// \param __count
893/// A 64-bit integer vector interpreted as a single 64-bit integer.
894/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
895/// values.
896static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
897_mm_sra_pi16(__m64 __m, __m64 __count)
898{
899 return __trunc64(__builtin_ia32_psraw128((__v8hi)__anyext128(__m),
900 (__v8hi)__anyext128(__count)));
901}
902
903/// Right-shifts each 16-bit integer element of a 64-bit integer vector
904/// of [4 x i16] by the number of bits specified by a 32-bit integer.
905///
906/// High-order bits are filled with the sign bit of the initial value of each
907/// 16-bit element. The 16-bit results are packed into a 64-bit integer
908/// vector of [4 x i16].
909///
910/// \headerfile <x86intrin.h>
911///
912/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
913///
914/// \param __m
915/// A 64-bit integer vector of [4 x i16].
916/// \param __count
917/// A 32-bit integer value.
918/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
919/// values.
920static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
921_mm_srai_pi16(__m64 __m, int __count)
922{
923 return __trunc64(__builtin_ia32_psrawi128((__v8hi)__anyext128(__m),
924 __count));
925}
926
927/// Right-shifts each 32-bit integer element of the first parameter,
928/// which is a 64-bit integer vector of [2 x i32], by the number of bits
929/// specified by the second parameter, which is a 64-bit integer.
930///
931/// High-order bits are filled with the sign bit of the initial value of each
932/// 32-bit element. The 32-bit results are packed into a 64-bit integer
933/// vector of [2 x i32].
934///
935/// \headerfile <x86intrin.h>
936///
937/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
938///
939/// \param __m
940/// A 64-bit integer vector of [2 x i32].
941/// \param __count
942/// A 64-bit integer vector interpreted as a single 64-bit integer.
943/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
944/// values.
945static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
946_mm_sra_pi32(__m64 __m, __m64 __count)
947{
948 return __trunc64(__builtin_ia32_psrad128((__v4si)__anyext128(__m),
949 (__v4si)__anyext128(__count)));
950}
951
952/// Right-shifts each 32-bit integer element of a 64-bit integer vector
953/// of [2 x i32] by the number of bits specified by a 32-bit integer.
954///
955/// High-order bits are filled with the sign bit of the initial value of each
956/// 32-bit element. The 32-bit results are packed into a 64-bit integer
957/// vector of [2 x i32].
958///
959/// \headerfile <x86intrin.h>
960///
961/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
962///
963/// \param __m
964/// A 64-bit integer vector of [2 x i32].
965/// \param __count
966/// A 32-bit integer value.
967/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
968/// values.
969static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
970_mm_srai_pi32(__m64 __m, int __count)
971{
972 return __trunc64(__builtin_ia32_psradi128((__v4si)__anyext128(__m),
973 __count));
974}
975
976/// Right-shifts each 16-bit integer element of the first parameter,
977/// which is a 64-bit integer vector of [4 x i16], by the number of bits
978/// specified by the second parameter, which is a 64-bit integer.
979///
980/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
981/// integer vector of [4 x i16].
982///
983/// \headerfile <x86intrin.h>
984///
985/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
986///
987/// \param __m
988/// A 64-bit integer vector of [4 x i16].
989/// \param __count
990/// A 64-bit integer vector interpreted as a single 64-bit integer.
991/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
992/// values.
993static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
994_mm_srl_pi16(__m64 __m, __m64 __count)
995{
996 return __trunc64(__builtin_ia32_psrlw128((__v8hi)__anyext128(__m),
997 (__v8hi)__anyext128(__count)));
998}
999
1000/// Right-shifts each 16-bit integer element of a 64-bit integer vector
1001/// of [4 x i16] by the number of bits specified by a 32-bit integer.
1002///
1003/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
1004/// integer vector of [4 x i16].
1005///
1006/// \headerfile <x86intrin.h>
1007///
1008/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
1009///
1010/// \param __m
1011/// A 64-bit integer vector of [4 x i16].
1012/// \param __count
1013/// A 32-bit integer value.
1014/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
1015/// values.
1016static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1017_mm_srli_pi16(__m64 __m, int __count)
1018{
1019 return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__anyext128(__m),
1020 __count));
1021}
1022
1023/// Right-shifts each 32-bit integer element of the first parameter,
1024/// which is a 64-bit integer vector of [2 x i32], by the number of bits
1025/// specified by the second parameter, which is a 64-bit integer.
1026///
1027/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
1028/// integer vector of [2 x i32].
1029///
1030/// \headerfile <x86intrin.h>
1031///
1032/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
1033///
1034/// \param __m
1035/// A 64-bit integer vector of [2 x i32].
1036/// \param __count
1037/// A 64-bit integer vector interpreted as a single 64-bit integer.
1038/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1039/// values.
1040static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1041_mm_srl_pi32(__m64 __m, __m64 __count)
1042{
1043 return __trunc64(__builtin_ia32_psrld128((__v4si)__anyext128(__m),
1044 (__v4si)__anyext128(__count)));
1045}
1046
1047/// Right-shifts each 32-bit integer element of a 64-bit integer vector
1048/// of [2 x i32] by the number of bits specified by a 32-bit integer.
1049///
1050/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
1051/// integer vector of [2 x i32].
1052///
1053/// \headerfile <x86intrin.h>
1054///
1055/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
1056///
1057/// \param __m
1058/// A 64-bit integer vector of [2 x i32].
1059/// \param __count
1060/// A 32-bit integer value.
1061/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1062/// values.
1063static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1064_mm_srli_pi32(__m64 __m, int __count)
1065{
1066 return __trunc64(__builtin_ia32_psrldi128((__v4si)__anyext128(__m),
1067 __count));
1068}
1069
1070/// Right-shifts the first 64-bit integer parameter by the number of bits
1071/// specified by the second 64-bit integer parameter.
1072///
1073/// High-order bits are cleared.
1074///
1075/// \headerfile <x86intrin.h>
1076///
1077/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1078///
1079/// \param __m
1080/// A 64-bit integer vector interpreted as a single 64-bit integer.
1081/// \param __count
1082/// A 64-bit integer vector interpreted as a single 64-bit integer.
1083/// \returns A 64-bit integer vector containing the right-shifted value.
1084static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1085_mm_srl_si64(__m64 __m, __m64 __count)
1086{
1087 return __trunc64(__builtin_ia32_psrlq128((__v2di)__anyext128(__m),
1088 (__v2di)__anyext128(__count)));
1089}
1090
1091/// Right-shifts the first parameter, which is a 64-bit integer, by the
1092/// number of bits specified by the second parameter, which is a 32-bit
1093/// integer.
1094///
1095/// High-order bits are cleared.
1096///
1097/// \headerfile <x86intrin.h>
1098///
1099/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1100///
1101/// \param __m
1102/// A 64-bit integer vector interpreted as a single 64-bit integer.
1103/// \param __count
1104/// A 32-bit integer value.
1105/// \returns A 64-bit integer vector containing the right-shifted value.
1106static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1107_mm_srli_si64(__m64 __m, int __count)
1108{
1109 return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m),
1110 __count));
1111}
1112
1113/// Performs a bitwise AND of two 64-bit integer vectors.
1114///
1115/// \headerfile <x86intrin.h>
1116///
1117/// This intrinsic corresponds to the <c> PAND </c> instruction.
1118///
1119/// \param __m1
1120/// A 64-bit integer vector.
1121/// \param __m2
1122/// A 64-bit integer vector.
1123/// \returns A 64-bit integer vector containing the bitwise AND of both
1124/// parameters.
1125static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1126_mm_and_si64(__m64 __m1, __m64 __m2)
1127{
1128 return (__m64)(((__v1du)__m1) & ((__v1du)__m2));
1129}
1130
1131/// Performs a bitwise NOT of the first 64-bit integer vector, and then
1132/// performs a bitwise AND of the intermediate result and the second 64-bit
1133/// integer vector.
1134///
1135/// \headerfile <x86intrin.h>
1136///
1137/// This intrinsic corresponds to the <c> PANDN </c> instruction.
1138///
1139/// \param __m1
1140/// A 64-bit integer vector. The one's complement of this parameter is used
1141/// in the bitwise AND.
1142/// \param __m2
1143/// A 64-bit integer vector.
1144/// \returns A 64-bit integer vector containing the bitwise AND of the second
1145/// parameter and the one's complement of the first parameter.
1146static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1147_mm_andnot_si64(__m64 __m1, __m64 __m2)
1148{
1149 return (__m64)(~((__v1du)__m1) & ((__v1du)__m2));
1150}
1151
1152/// Performs a bitwise OR of two 64-bit integer vectors.
1153///
1154/// \headerfile <x86intrin.h>
1155///
1156/// This intrinsic corresponds to the <c> POR </c> instruction.
1157///
1158/// \param __m1
1159/// A 64-bit integer vector.
1160/// \param __m2
1161/// A 64-bit integer vector.
1162/// \returns A 64-bit integer vector containing the bitwise OR of both
1163/// parameters.
1164static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1165_mm_or_si64(__m64 __m1, __m64 __m2)
1166{
1167 return (__m64)(((__v1du)__m1) | ((__v1du)__m2));
1168}
1169
1170/// Performs a bitwise exclusive OR of two 64-bit integer vectors.
1171///
1172/// \headerfile <x86intrin.h>
1173///
1174/// This intrinsic corresponds to the <c> PXOR </c> instruction.
1175///
1176/// \param __m1
1177/// A 64-bit integer vector.
1178/// \param __m2
1179/// A 64-bit integer vector.
1180/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
1181/// parameters.
1182static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1183_mm_xor_si64(__m64 __m1, __m64 __m2)
1184{
1185 return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2));
1186}
1187
1188/// Compares the 8-bit integer elements of two 64-bit integer vectors of
1189/// [8 x i8] to determine if the element of the first vector is equal to the
1190/// corresponding element of the second vector.
1191///
1192/// Each comparison returns 0 for false, 0xFF for true.
1193///
1194/// \headerfile <x86intrin.h>
1195///
1196/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.
1197///
1198/// \param __m1
1199/// A 64-bit integer vector of [8 x i8].
1200/// \param __m2
1201/// A 64-bit integer vector of [8 x i8].
1202/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1203/// results.
1204static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1205_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
1206{
1207 return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2));
1208}
1209
1210/// Compares the 16-bit integer elements of two 64-bit integer vectors of
1211/// [4 x i16] to determine if the element of the first vector is equal to the
1212/// corresponding element of the second vector.
1213///
1214/// Each comparison returns 0 for false, 0xFFFF for true.
1215///
1216/// \headerfile <x86intrin.h>
1217///
1218/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.
1219///
1220/// \param __m1
1221/// A 64-bit integer vector of [4 x i16].
1222/// \param __m2
1223/// A 64-bit integer vector of [4 x i16].
1224/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1225/// results.
1226static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1227_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
1228{
1229 return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2));
1230}
1231
1232/// Compares the 32-bit integer elements of two 64-bit integer vectors of
1233/// [2 x i32] to determine if the element of the first vector is equal to the
1234/// corresponding element of the second vector.
1235///
1236/// Each comparison returns 0 for false, 0xFFFFFFFF for true.
1237///
1238/// \headerfile <x86intrin.h>
1239///
1240/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.
1241///
1242/// \param __m1
1243/// A 64-bit integer vector of [2 x i32].
1244/// \param __m2
1245/// A 64-bit integer vector of [2 x i32].
1246/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1247/// results.
1248static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1249_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
1250{
1251 return (__m64)(((__v2si)__m1) == ((__v2si)__m2));
1252}
1253
1254/// Compares the 8-bit integer elements of two 64-bit integer vectors of
1255/// [8 x i8] to determine if the element of the first vector is greater than
1256/// the corresponding element of the second vector.
1257///
1258/// Each comparison returns 0 for false, 0xFF for true.
1259///
1260/// \headerfile <x86intrin.h>
1261///
1262/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.
1263///
1264/// \param __m1
1265/// A 64-bit integer vector of [8 x i8].
1266/// \param __m2
1267/// A 64-bit integer vector of [8 x i8].
1268/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1269/// results.
1270static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1271_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
1272{
1273 /* This function always performs a signed comparison, but __v8qi is a char
1274 which may be signed or unsigned, so use __v8qs. */
1275 return (__m64)((__v8qs)__m1 > (__v8qs)__m2);
1276}
1277
1278/// Compares the 16-bit integer elements of two 64-bit integer vectors of
1279/// [4 x i16] to determine if the element of the first vector is greater than
1280/// the corresponding element of the second vector.
1281///
1282/// Each comparison returns 0 for false, 0xFFFF for true.
1283///
1284/// \headerfile <x86intrin.h>
1285///
1286/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.
1287///
1288/// \param __m1
1289/// A 64-bit integer vector of [4 x i16].
1290/// \param __m2
1291/// A 64-bit integer vector of [4 x i16].
1292/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1293/// results.
1294static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1295_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
1296{
1297 return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
1298}
1299
1300/// Compares the 32-bit integer elements of two 64-bit integer vectors of
1301/// [2 x i32] to determine if the element of the first vector is greater than
1302/// the corresponding element of the second vector.
1303///
1304/// Each comparison returns 0 for false, 0xFFFFFFFF for true.
1305///
1306/// \headerfile <x86intrin.h>
1307///
1308/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.
1309///
1310/// \param __m1
1311/// A 64-bit integer vector of [2 x i32].
1312/// \param __m2
1313/// A 64-bit integer vector of [2 x i32].
1314/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1315/// results.
1316static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1317_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
1318{
1319 return (__m64)((__v2si)__m1 > (__v2si)__m2);
1320}
1321
1322/// Constructs a 64-bit integer vector initialized to zero.
1323///
1324/// \headerfile <x86intrin.h>
1325///
1326/// This intrinsic corresponds to the <c> PXOR </c> instruction.
1327///
1328/// \returns An initialized 64-bit integer vector with all elements set to zero.
1329static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1331{
1332 return __extension__ (__m64){ 0LL };
1333}
1334
1335/// Constructs a 64-bit integer vector initialized with the specified
1336/// 32-bit integer values.
1337///
1338/// \headerfile <x86intrin.h>
1339///
1340/// This intrinsic is a utility function and does not correspond to a specific
1341/// instruction.
1342///
1343/// \param __i1
1344/// A 32-bit integer value used to initialize the upper 32 bits of the
1345/// result.
1346/// \param __i0
1347/// A 32-bit integer value used to initialize the lower 32 bits of the
1348/// result.
1349/// \returns An initialized 64-bit integer vector.
1350static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1351_mm_set_pi32(int __i1, int __i0)
1352{
1353 return __extension__ (__m64)(__v2si){__i0, __i1};
1354}
1355
1356/// Constructs a 64-bit integer vector initialized with the specified
1357/// 16-bit integer values.
1358///
1359/// \headerfile <x86intrin.h>
1360///
1361/// This intrinsic is a utility function and does not correspond to a specific
1362/// instruction.
1363///
1364/// \param __s3
1365/// A 16-bit integer value used to initialize bits [63:48] of the result.
1366/// \param __s2
1367/// A 16-bit integer value used to initialize bits [47:32] of the result.
1368/// \param __s1
1369/// A 16-bit integer value used to initialize bits [31:16] of the result.
1370/// \param __s0
1371/// A 16-bit integer value used to initialize bits [15:0] of the result.
1372/// \returns An initialized 64-bit integer vector.
1373static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1374_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
1375{
1376 return __extension__ (__m64)(__v4hi){__s0, __s1, __s2, __s3};
1377}
1378
1379/// Constructs a 64-bit integer vector initialized with the specified
1380/// 8-bit integer values.
1381///
1382/// \headerfile <x86intrin.h>
1383///
1384/// This intrinsic is a utility function and does not correspond to a specific
1385/// instruction.
1386///
1387/// \param __b7
1388/// An 8-bit integer value used to initialize bits [63:56] of the result.
1389/// \param __b6
1390/// An 8-bit integer value used to initialize bits [55:48] of the result.
1391/// \param __b5
1392/// An 8-bit integer value used to initialize bits [47:40] of the result.
1393/// \param __b4
1394/// An 8-bit integer value used to initialize bits [39:32] of the result.
1395/// \param __b3
1396/// An 8-bit integer value used to initialize bits [31:24] of the result.
1397/// \param __b2
1398/// An 8-bit integer value used to initialize bits [23:16] of the result.
1399/// \param __b1
1400/// An 8-bit integer value used to initialize bits [15:8] of the result.
1401/// \param __b0
1402/// An 8-bit integer value used to initialize bits [7:0] of the result.
1403/// \returns An initialized 64-bit integer vector.
1404static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1405_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
1406 char __b1, char __b0)
1407{
1408 return __extension__ (__m64)(__v8qi){__b0, __b1, __b2, __b3,
1409 __b4, __b5, __b6, __b7};
1410}
1411
1412/// Constructs a 64-bit integer vector of [2 x i32], with each of the
1413/// 32-bit integer vector elements set to the specified 32-bit integer
1414/// value.
1415///
1416/// \headerfile <x86intrin.h>
1417///
1418/// This intrinsic is a utility function and does not correspond to a specific
1419/// instruction.
1420///
1421/// \param __i
1422/// A 32-bit integer value used to initialize each vector element of the
1423/// result.
1424/// \returns An initialized 64-bit integer vector of [2 x i32].
1425static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1427{
1428 return _mm_set_pi32(__i, __i);
1429}
1430
1431/// Constructs a 64-bit integer vector of [4 x i16], with each of the
1432/// 16-bit integer vector elements set to the specified 16-bit integer
1433/// value.
1434///
1435/// \headerfile <x86intrin.h>
1436///
1437/// This intrinsic is a utility function and does not correspond to a specific
1438/// instruction.
1439///
1440/// \param __w
1441/// A 16-bit integer value used to initialize each vector element of the
1442/// result.
1443/// \returns An initialized 64-bit integer vector of [4 x i16].
1444static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1446{
1447 return _mm_set_pi16(__w, __w, __w, __w);
1448}
1449
1450/// Constructs a 64-bit integer vector of [8 x i8], with each of the
1451/// 8-bit integer vector elements set to the specified 8-bit integer value.
1452///
1453/// \headerfile <x86intrin.h>
1454///
1455/// This intrinsic is a utility function and does not correspond to a specific
1456/// instruction.
1457///
1458/// \param __b
1459/// An 8-bit integer value used to initialize each vector element of the
1460/// result.
1461/// \returns An initialized 64-bit integer vector of [8 x i8].
1462static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1464{
1465 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
1466}
1467
1468/// Constructs a 64-bit integer vector, initialized in reverse order with
1469/// the specified 32-bit integer values.
1470///
1471/// \headerfile <x86intrin.h>
1472///
1473/// This intrinsic is a utility function and does not correspond to a specific
1474/// instruction.
1475///
1476/// \param __i0
1477/// A 32-bit integer value used to initialize the lower 32 bits of the
1478/// result.
1479/// \param __i1
1480/// A 32-bit integer value used to initialize the upper 32 bits of the
1481/// result.
1482/// \returns An initialized 64-bit integer vector.
1483static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1484_mm_setr_pi32(int __i0, int __i1)
1485{
1486 return _mm_set_pi32(__i1, __i0);
1487}
1488
1489/// Constructs a 64-bit integer vector, initialized in reverse order with
1490/// the specified 16-bit integer values.
1491///
1492/// \headerfile <x86intrin.h>
1493///
1494/// This intrinsic is a utility function and does not correspond to a specific
1495/// instruction.
1496///
1497/// \param __w0
1498/// A 16-bit integer value used to initialize bits [15:0] of the result.
1499/// \param __w1
1500/// A 16-bit integer value used to initialize bits [31:16] of the result.
1501/// \param __w2
1502/// A 16-bit integer value used to initialize bits [47:32] of the result.
1503/// \param __w3
1504/// A 16-bit integer value used to initialize bits [63:48] of the result.
1505/// \returns An initialized 64-bit integer vector.
1506static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1507_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
1508{
1509 return _mm_set_pi16(__w3, __w2, __w1, __w0);
1510}
1511
1512/// Constructs a 64-bit integer vector, initialized in reverse order with
1513/// the specified 8-bit integer values.
1514///
1515/// \headerfile <x86intrin.h>
1516///
1517/// This intrinsic is a utility function and does not correspond to a specific
1518/// instruction.
1519///
1520/// \param __b0
1521/// An 8-bit integer value used to initialize bits [7:0] of the result.
1522/// \param __b1
1523/// An 8-bit integer value used to initialize bits [15:8] of the result.
1524/// \param __b2
1525/// An 8-bit integer value used to initialize bits [23:16] of the result.
1526/// \param __b3
1527/// An 8-bit integer value used to initialize bits [31:24] of the result.
1528/// \param __b4
1529/// An 8-bit integer value used to initialize bits [39:32] of the result.
1530/// \param __b5
1531/// An 8-bit integer value used to initialize bits [47:40] of the result.
1532/// \param __b6
1533/// An 8-bit integer value used to initialize bits [55:48] of the result.
1534/// \param __b7
1535/// An 8-bit integer value used to initialize bits [63:56] of the result.
1536/// \returns An initialized 64-bit integer vector.
1537static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1538_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
1539 char __b6, char __b7)
1540{
1541 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
1542}
1543
1544#undef __anyext128
1545#undef __trunc64
1546#undef __DEFAULT_FN_ATTRS_SSE2
1547
1548/* Aliases for compatibility. */
1549#define _m_empty _mm_empty
1550#define _m_from_int _mm_cvtsi32_si64
1551#define _m_from_int64 _mm_cvtsi64_m64
1552#define _m_to_int _mm_cvtsi64_si32
1553#define _m_to_int64 _mm_cvtm64_si64
1554#define _m_packsswb _mm_packs_pi16
1555#define _m_packssdw _mm_packs_pi32
1556#define _m_packuswb _mm_packs_pu16
1557#define _m_punpckhbw _mm_unpackhi_pi8
1558#define _m_punpckhwd _mm_unpackhi_pi16
1559#define _m_punpckhdq _mm_unpackhi_pi32
1560#define _m_punpcklbw _mm_unpacklo_pi8
1561#define _m_punpcklwd _mm_unpacklo_pi16
1562#define _m_punpckldq _mm_unpacklo_pi32
1563#define _m_paddb _mm_add_pi8
1564#define _m_paddw _mm_add_pi16
1565#define _m_paddd _mm_add_pi32
1566#define _m_paddsb _mm_adds_pi8
1567#define _m_paddsw _mm_adds_pi16
1568#define _m_paddusb _mm_adds_pu8
1569#define _m_paddusw _mm_adds_pu16
1570#define _m_psubb _mm_sub_pi8
1571#define _m_psubw _mm_sub_pi16
1572#define _m_psubd _mm_sub_pi32
1573#define _m_psubsb _mm_subs_pi8
1574#define _m_psubsw _mm_subs_pi16
1575#define _m_psubusb _mm_subs_pu8
1576#define _m_psubusw _mm_subs_pu16
1577#define _m_pmaddwd _mm_madd_pi16
1578#define _m_pmulhw _mm_mulhi_pi16
1579#define _m_pmullw _mm_mullo_pi16
1580#define _m_psllw _mm_sll_pi16
1581#define _m_psllwi _mm_slli_pi16
1582#define _m_pslld _mm_sll_pi32
1583#define _m_pslldi _mm_slli_pi32
1584#define _m_psllq _mm_sll_si64
1585#define _m_psllqi _mm_slli_si64
1586#define _m_psraw _mm_sra_pi16
1587#define _m_psrawi _mm_srai_pi16
1588#define _m_psrad _mm_sra_pi32
1589#define _m_psradi _mm_srai_pi32
1590#define _m_psrlw _mm_srl_pi16
1591#define _m_psrlwi _mm_srli_pi16
1592#define _m_psrld _mm_srl_pi32
1593#define _m_psrldi _mm_srli_pi32
1594#define _m_psrlq _mm_srl_si64
1595#define _m_psrlqi _mm_srli_si64
1596#define _m_pand _mm_and_si64
1597#define _m_pandn _mm_andnot_si64
1598#define _m_por _mm_or_si64
1599#define _m_pxor _mm_xor_si64
1600#define _m_pcmpeqb _mm_cmpeq_pi8
1601#define _m_pcmpeqw _mm_cmpeq_pi16
1602#define _m_pcmpeqd _mm_cmpeq_pi32
1603#define _m_pcmpgtb _mm_cmpgt_pi8
1604#define _m_pcmpgtw _mm_cmpgt_pi16
1605#define _m_pcmpgtd _mm_cmpgt_pi32
1606
1607#endif /* __MMINTRIN_H */
1608
static __inline__ vector float vector float __b
Definition: altivec.h:578
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_andnot_si64(__m64 __m1, __m64 __m2)
Performs a bitwise NOT of the first 64-bit integer vector, and then performs a bitwise AND of the int...
Definition: mmintrin.h:1147
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi32(__m64 __m1, __m64 __m2)
Adds each 32-bit integer element of the first 64-bit integer vector of [2 x i32] to the corresponding...
Definition: mmintrin.h:413
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_and_si64(__m64 __m1, __m64 __m2)
Performs a bitwise AND of two 64-bit integer vectors.
Definition: mmintrin.h:1126
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi8(__m64 __m1, __m64 __m2)
Adds each 8-bit integer element of the first 64-bit integer vector of [8 x i8] to the corresponding 8...
Definition: mmintrin.h:371
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Constructs a 64-bit integer vector initialized with the specified 8-bit integer values.
Definition: mmintrin.h:1405
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi16(__m64 __m1, __m64 __m2)
Adds each 16-bit integer element of the first 64-bit integer vector of [4 x i16] to the corresponding...
Definition: mmintrin.h:392
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu16(__m64 __m1, __m64 __m2)
Adds, with saturation, each 16-bit unsigned integer element of the first 64-bit integer vector of [4 ...
Definition: mmintrin.h:507
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi32(__m64 __m1, __m64 __m2)
Converts, with saturation, 32-bit signed integers from both 64-bit integer vector parameters of [2 x ...
Definition: mmintrin.h:177
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
Definition: mmintrin.h:1249
#define __anyext128(x)
Definition: mmintrin.h:48
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
Definition: mmintrin.h:328
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi16(short __w)
Constructs a 64-bit integer vector of [4 x i16], with each of the 16-bit integer vector elements set ...
Definition: mmintrin.h:1445
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi16(__m64 __m1, __m64 __m2)
Converts, with saturation, 16-bit signed integers from both 64-bit integer vector parameters of [4 x ...
Definition: mmintrin.h:151
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit integer element of the second 64-bit integer vector of [8 x i8] from the corresp...
Definition: mmintrin.h:528
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 8-bit integer val...
Definition: mmintrin.h:1538
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
Definition: mmintrin.h:970
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu8(__m64 __m1, __m64 __m2)
Adds, with saturation, each 8-bit unsigned integer element of the first 64-bit integer vector of [8 x...
Definition: mmintrin.h:484
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi8(char __b)
Constructs a 64-bit integer vector of [8 x i8], with each of the 8-bit integer vector elements set to...
Definition: mmintrin.h:1463
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
Definition: mmintrin.h:1017
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
Definition: mmintrin.h:921
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit unsigned integer element of the second 64-bit integer vector of [8 x i8] from th...
Definition: mmintrin.h:642
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi8(__m64 __m1, __m64 __m2)
Adds, with saturation, each 8-bit signed integer element of the first 64-bit integer vector of [8 x i...
Definition: mmintrin.h:437
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi32(int __i)
Constructs a 64-bit integer vector of [2 x i32], with each of the 32-bit integer vector elements set ...
Definition: mmintrin.h:1426
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_or_si64(__m64 __m1, __m64 __m2)
Performs a bitwise OR of two 64-bit integer vectors.
Definition: mmintrin.h:1165
static __inline__ long long __DEFAULT_FN_ATTRS_SSE2 _mm_cvtm64_si64(__m64 __m)
Casts a 64-bit integer vector into a 64-bit signed integer value.
Definition: mmintrin.h:126
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setzero_si64(void)
Constructs a 64-bit integer vector initialized to zero.
Definition: mmintrin.h:1330
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
Definition: mmintrin.h:1064
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi16(__m64 __m1, __m64 __m2)
Adds, with saturation, each 16-bit signed integer element of the first 64-bit integer vector of [4 x ...
Definition: mmintrin.h:461
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi32_si64(int __i)
Constructs a 64-bit integer vector, setting the lower 32 bits to the value of the 32-bit integer para...
Definition: mmintrin.h:77
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_si64(__m64 __m, int __count)
Left-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the se...
Definition: mmintrin.h:872
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition: mmintrin.h:715
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sll_pi16(__m64 __m, __m64 __count)
Left-shifts each 16-bit signed integer element of the first parameter, which is a 64-bit integer vect...
Definition: mmintrin.h:760
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srl_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:994
static __inline__ int __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_si32(__m64 __m)
Returns the lower 32 bits of a 64-bit integer vector as a 32-bit signed integer.
Definition: mmintrin.h:94
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
Definition: mmintrin.h:1271
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sra_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:946
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srl_si64(__m64 __m, __m64 __count)
Right-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit ...
Definition: mmintrin.h:1085
long long __m64 __attribute__((__vector_size__(8), __aligned__(8)))
Definition: mmintrin.h:17
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi8(__m64 __m1, __m64 __m2)
Subtracts, with saturation, each 8-bit signed integer element of the second 64-bit integer vector of ...
Definition: mmintrin.h:594
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mullo_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition: mmintrin.h:737
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
Definition: mmintrin.h:231
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pu16(__m64 __m1, __m64 __m2)
Converts, with saturation, 16-bit signed integers from both 64-bit integer vector parameters of [4 x ...
Definition: mmintrin.h:203
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_madd_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition: mmintrin.h:693
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srl_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:1041
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_xor_si64(__m64 __m1, __m64 __m2)
Performs a bitwise exclusive OR of two 64-bit integer vectors.
Definition: mmintrin.h:1183
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_m64(long long __i)
Casts a 64-bit signed integer value into a 64-bit integer vector.
Definition: mmintrin.h:110
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
Definition: mmintrin.h:1205
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sll_si64(__m64 __m, __m64 __count)
Left-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit i...
Definition: mmintrin.h:851
#define __trunc64(x)
Definition: mmintrin.h:46
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
Definition: mmintrin.h:277
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi32(__m64 __m1, __m64 __m2)
Subtracts each 32-bit integer element of the second 64-bit integer vector of [2 x i32] from the corre...
Definition: mmintrin.h:570
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
Definition: mmintrin.h:255
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi32(int __i1, int __i0)
Constructs a 64-bit integer vector initialized with the specified 32-bit integer values.
Definition: mmintrin.h:1351
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_si64(__m64 __m, int __count)
Right-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the s...
Definition: mmintrin.h:1107
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
Definition: mmintrin.h:304
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sra_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:897
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi16(__m64 __m1, __m64 __m2)
Subtracts, with saturation, each 16-bit signed integer element of the second 64-bit integer vector of...
Definition: mmintrin.h:618
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi32(__m64 __m, int __count)
Left-shifts each 32-bit signed integer element of a 64-bit integer vector of [2 x i32] by the number ...
Definition: mmintrin.h:830
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
Definition: mmintrin.h:1227
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi16(__m64 __m, int __count)
Left-shifts each 16-bit signed integer element of a 64-bit integer vector of [4 x i16] by the number ...
Definition: mmintrin.h:783
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sll_pi32(__m64 __m, __m64 __count)
Left-shifts each 32-bit signed integer element of the first parameter, which is a 64-bit integer vect...
Definition: mmintrin.h:807
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi32(int __i0, int __i1)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 32-bit integer va...
Definition: mmintrin.h:1484
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit unsigned integer element of the second 64-bit integer vector of [4 x i16] from ...
Definition: mmintrin.h:666
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 16-bit integer va...
Definition: mmintrin.h:1507
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit integer element of the second 64-bit integer vector of [4 x i16] from the corre...
Definition: mmintrin.h:549
#define __DEFAULT_FN_ATTRS_SSE2
Definition: mmintrin.h:42
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
Definition: mmintrin.h:1317
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
Definition: mmintrin.h:350
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
Constructs a 64-bit integer vector initialized with the specified 16-bit integer values.
Definition: mmintrin.h:1374
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
Definition: mmintrin.h:1295