clang 20.0.0git
mmintrin.h
Go to the documentation of this file.
1/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __MMINTRIN_H
11#define __MMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8)));
18
19typedef long long __v1di __attribute__((__vector_size__(8)));
20typedef int __v2si __attribute__((__vector_size__(8)));
21typedef short __v4hi __attribute__((__vector_size__(8)));
22typedef char __v8qi __attribute__((__vector_size__(8)));
23
24/* Unsigned types */
25typedef unsigned long long __v1du __attribute__ ((__vector_size__ (8)));
26typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
27typedef unsigned short __v4hu __attribute__((__vector_size__(8)));
28typedef unsigned char __v8qu __attribute__((__vector_size__(8)));
29
30/* We need an explicitly signed variant for char. Note that this shouldn't
31 * appear in the interface though. */
32typedef signed char __v8qs __attribute__((__vector_size__(8)));
33
34/* SSE/SSE2 types */
35typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
36typedef long long __v2di __attribute__ ((__vector_size__ (16)));
37typedef int __v4si __attribute__((__vector_size__(16)));
38typedef short __v8hi __attribute__((__vector_size__(16)));
39typedef char __v16qi __attribute__((__vector_size__(16)));
40
41/* Define the default attributes for the functions in this file. */
42#if defined(__EVEX512__) && !defined(__AVX10_1_512__)
43#define __DEFAULT_FN_ATTRS_SSE2 \
44 __attribute__((__always_inline__, __nodebug__, \
45 __target__("sse2,no-evex512"), __min_vector_width__(128)))
46#else
47#define __DEFAULT_FN_ATTRS_SSE2 \
48 __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \
49 __min_vector_width__(128)))
50#endif
51
52#define __trunc64(x) \
53 (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
54#define __anyext128(x) \
55 (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
56 1, -1, -1)
57
58/// Clears the MMX state by setting the state of the x87 stack registers
59/// to empty.
60///
61/// \headerfile <x86intrin.h>
62///
63/// This intrinsic corresponds to the <c> EMMS </c> instruction.
64///
65static __inline__ void __attribute__((__always_inline__, __nodebug__,
66 __target__("mmx,no-evex512")))
67_mm_empty(void) {
68 __builtin_ia32_emms();
69}
70
71/// Constructs a 64-bit integer vector, setting the lower 32 bits to the
72/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.
73///
74/// \headerfile <x86intrin.h>
75///
76/// This intrinsic corresponds to the <c> MOVD </c> instruction.
77///
78/// \param __i
79/// A 32-bit integer value.
80/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
81/// parameter. The upper 32 bits are set to 0.
82static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
84{
85 return __extension__ (__m64)(__v2si){__i, 0};
86}
87
88/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
89/// signed integer.
90///
91/// \headerfile <x86intrin.h>
92///
93/// This intrinsic corresponds to the <c> MOVD </c> instruction.
94///
95/// \param __m
96/// A 64-bit integer vector.
97/// \returns A 32-bit signed integer value containing the lower 32 bits of the
98/// parameter.
99static __inline__ int __DEFAULT_FN_ATTRS_SSE2
101{
102 return ((__v2si)__m)[0];
103}
104
105/// Casts a 64-bit signed integer value into a 64-bit integer vector.
106///
107/// \headerfile <x86intrin.h>
108///
109/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
110///
111/// \param __i
112/// A 64-bit signed integer.
113/// \returns A 64-bit integer vector containing the same bitwise pattern as the
114/// parameter.
115static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
116_mm_cvtsi64_m64(long long __i)
117{
118 return (__m64)__i;
119}
120
121/// Casts a 64-bit integer vector into a 64-bit signed integer value.
122///
123/// \headerfile <x86intrin.h>
124///
125/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
126///
127/// \param __m
128/// A 64-bit integer vector.
129/// \returns A 64-bit signed integer containing the same bitwise pattern as the
130/// parameter.
131static __inline__ long long __DEFAULT_FN_ATTRS_SSE2
133{
134 return (long long)__m;
135}
136
137/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
138/// vector parameters of [4 x i16] into 8-bit signed integer values, and
139/// constructs a 64-bit integer vector of [8 x i8] as the result.
140///
141/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
142/// less than 0x80 are saturated to 0x80.
143///
144/// \headerfile <x86intrin.h>
145///
146/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
147///
148/// \param __m1
149/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
150/// written to the lower 32 bits of the result.
151/// \param __m2
152/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
153/// written to the upper 32 bits of the result.
154/// \returns A 64-bit integer vector of [8 x i8] containing the converted
155/// values.
156static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
157_mm_packs_pi16(__m64 __m1, __m64 __m2)
158{
159 return __trunc64(__builtin_ia32_packsswb128(
160 (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
161}
162
163/// Converts, with saturation, 32-bit signed integers from both 64-bit integer
164/// vector parameters of [2 x i32] into 16-bit signed integer values, and
165/// constructs a 64-bit integer vector of [4 x i16] as the result.
166///
167/// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative
168/// values less than 0x8000 are saturated to 0x8000.
169///
170/// \headerfile <x86intrin.h>
171///
172/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
173///
174/// \param __m1
175/// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
176/// written to the lower 32 bits of the result.
177/// \param __m2
178/// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
179/// written to the upper 32 bits of the result.
180/// \returns A 64-bit integer vector of [4 x i16] containing the converted
181/// values.
182static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
183_mm_packs_pi32(__m64 __m1, __m64 __m2)
184{
185 return __trunc64(__builtin_ia32_packssdw128(
186 (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){}));
187}
188
189/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
190/// vector parameters of [4 x i16] into 8-bit unsigned integer values, and
191/// constructs a 64-bit integer vector of [8 x i8] as the result.
192///
193/// Values greater than 0xFF are saturated to 0xFF. Values less than 0 are
194/// saturated to 0.
195///
196/// \headerfile <x86intrin.h>
197///
198/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
199///
200/// \param __m1
201/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
202/// written to the lower 32 bits of the result.
203/// \param __m2
204/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
205/// written to the upper 32 bits of the result.
206/// \returns A 64-bit integer vector of [8 x i8] containing the converted
207/// values.
208static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
209_mm_packs_pu16(__m64 __m1, __m64 __m2)
210{
211 return __trunc64(__builtin_ia32_packuswb128(
212 (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
213}
214
215/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
216/// and interleaves them into a 64-bit integer vector of [8 x i8].
217///
218/// \headerfile <x86intrin.h>
219///
220/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
221///
222/// \param __m1
223/// A 64-bit integer vector of [8 x i8]. \n
224/// Bits [39:32] are written to bits [7:0] of the result. \n
225/// Bits [47:40] are written to bits [23:16] of the result. \n
226/// Bits [55:48] are written to bits [39:32] of the result. \n
227/// Bits [63:56] are written to bits [55:48] of the result.
228/// \param __m2
229/// A 64-bit integer vector of [8 x i8].
230/// Bits [39:32] are written to bits [15:8] of the result. \n
231/// Bits [47:40] are written to bits [31:24] of the result. \n
232/// Bits [55:48] are written to bits [47:40] of the result. \n
233/// Bits [63:56] are written to bits [63:56] of the result.
234/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
235/// values.
236static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
237_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
238{
239 return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
240 4, 12, 5, 13, 6, 14, 7, 15);
241}
242
243/// Unpacks the upper 32 bits from two 64-bit integer vectors of
244/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
245///
246/// \headerfile <x86intrin.h>
247///
248/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.
249///
250/// \param __m1
251/// A 64-bit integer vector of [4 x i16].
252/// Bits [47:32] are written to bits [15:0] of the result. \n
253/// Bits [63:48] are written to bits [47:32] of the result.
254/// \param __m2
255/// A 64-bit integer vector of [4 x i16].
256/// Bits [47:32] are written to bits [31:16] of the result. \n
257/// Bits [63:48] are written to bits [63:48] of the result.
258/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
259/// values.
260static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
261_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
262{
263 return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
264 2, 6, 3, 7);
265}
266
267/// Unpacks the upper 32 bits from two 64-bit integer vectors of
268/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
269///
270/// \headerfile <x86intrin.h>
271///
272/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.
273///
274/// \param __m1
275/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
276/// the lower 32 bits of the result.
277/// \param __m2
278/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
279/// the upper 32 bits of the result.
280/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
281/// values.
282static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
283_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
284{
285 return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3);
286}
287
288/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
289/// and interleaves them into a 64-bit integer vector of [8 x i8].
290///
291/// \headerfile <x86intrin.h>
292///
293/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.
294///
295/// \param __m1
296/// A 64-bit integer vector of [8 x i8].
297/// Bits [7:0] are written to bits [7:0] of the result. \n
298/// Bits [15:8] are written to bits [23:16] of the result. \n
299/// Bits [23:16] are written to bits [39:32] of the result. \n
300/// Bits [31:24] are written to bits [55:48] of the result.
301/// \param __m2
302/// A 64-bit integer vector of [8 x i8].
303/// Bits [7:0] are written to bits [15:8] of the result. \n
304/// Bits [15:8] are written to bits [31:24] of the result. \n
305/// Bits [23:16] are written to bits [47:40] of the result. \n
306/// Bits [31:24] are written to bits [63:56] of the result.
307/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
308/// values.
309static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
310_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
311{
312 return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
313 0, 8, 1, 9, 2, 10, 3, 11);
314}
315
316/// Unpacks the lower 32 bits from two 64-bit integer vectors of
317/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
318///
319/// \headerfile <x86intrin.h>
320///
321/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.
322///
323/// \param __m1
324/// A 64-bit integer vector of [4 x i16].
325/// Bits [15:0] are written to bits [15:0] of the result. \n
326/// Bits [31:16] are written to bits [47:32] of the result.
327/// \param __m2
328/// A 64-bit integer vector of [4 x i16].
329/// Bits [15:0] are written to bits [31:16] of the result. \n
330/// Bits [31:16] are written to bits [63:48] of the result.
331/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
332/// values.
333static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
334_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
335{
336 return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
337 0, 4, 1, 5);
338}
339
340/// Unpacks the lower 32 bits from two 64-bit integer vectors of
341/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
342///
343/// \headerfile <x86intrin.h>
344///
345/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.
346///
347/// \param __m1
348/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
349/// the lower 32 bits of the result.
350/// \param __m2
351/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
352/// the upper 32 bits of the result.
353/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
354/// values.
355static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
356_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
357{
358 return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2);
359}
360
361/// Adds each 8-bit integer element of the first 64-bit integer vector
362/// of [8 x i8] to the corresponding 8-bit integer element of the second
363/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
364/// packed into a 64-bit integer vector of [8 x i8].
365///
366/// \headerfile <x86intrin.h>
367///
368/// This intrinsic corresponds to the <c> PADDB </c> instruction.
369///
370/// \param __m1
371/// A 64-bit integer vector of [8 x i8].
372/// \param __m2
373/// A 64-bit integer vector of [8 x i8].
374/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
375/// parameters.
376static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
377_mm_add_pi8(__m64 __m1, __m64 __m2)
378{
379 return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2));
380}
381
382/// Adds each 16-bit integer element of the first 64-bit integer vector
383/// of [4 x i16] to the corresponding 16-bit integer element of the second
384/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
385/// packed into a 64-bit integer vector of [4 x i16].
386///
387/// \headerfile <x86intrin.h>
388///
389/// This intrinsic corresponds to the <c> PADDW </c> instruction.
390///
391/// \param __m1
392/// A 64-bit integer vector of [4 x i16].
393/// \param __m2
394/// A 64-bit integer vector of [4 x i16].
395/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
396/// parameters.
397static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
398_mm_add_pi16(__m64 __m1, __m64 __m2)
399{
400 return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2));
401}
402
403/// Adds each 32-bit integer element of the first 64-bit integer vector
404/// of [2 x i32] to the corresponding 32-bit integer element of the second
405/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
406/// packed into a 64-bit integer vector of [2 x i32].
407///
408/// \headerfile <x86intrin.h>
409///
410/// This intrinsic corresponds to the <c> PADDD </c> instruction.
411///
412/// \param __m1
413/// A 64-bit integer vector of [2 x i32].
414/// \param __m2
415/// A 64-bit integer vector of [2 x i32].
416/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
417/// parameters.
418static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
419_mm_add_pi32(__m64 __m1, __m64 __m2)
420{
421 return (__m64)(((__v2su)__m1) + ((__v2su)__m2));
422}
423
424/// Adds, with saturation, each 8-bit signed integer element of the first
425/// 64-bit integer vector of [8 x i8] to the corresponding 8-bit signed
426/// integer element of the second 64-bit integer vector of [8 x i8].
427///
428/// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums
429/// less than 0x80 are saturated to 0x80. The results are packed into a
430/// 64-bit integer vector of [8 x i8].
431///
432/// \headerfile <x86intrin.h>
433///
434/// This intrinsic corresponds to the <c> PADDSB </c> instruction.
435///
436/// \param __m1
437/// A 64-bit integer vector of [8 x i8].
438/// \param __m2
439/// A 64-bit integer vector of [8 x i8].
440/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
441/// of both parameters.
442static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
443_mm_adds_pi8(__m64 __m1, __m64 __m2)
444{
445 return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2);
446}
447
448/// Adds, with saturation, each 16-bit signed integer element of the first
449/// 64-bit integer vector of [4 x i16] to the corresponding 16-bit signed
450/// integer element of the second 64-bit integer vector of [4 x i16].
451///
452/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
453/// less than 0x8000 are saturated to 0x8000. The results are packed into a
454/// 64-bit integer vector of [4 x i16].
455///
456/// \headerfile <x86intrin.h>
457///
458/// This intrinsic corresponds to the <c> PADDSW </c> instruction.
459///
460/// \param __m1
461/// A 64-bit integer vector of [4 x i16].
462/// \param __m2
463/// A 64-bit integer vector of [4 x i16].
464/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
465/// of both parameters.
466static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
467_mm_adds_pi16(__m64 __m1, __m64 __m2)
468{
469 return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2);
470}
471
472/// Adds, with saturation, each 8-bit unsigned integer element of the first
473/// 64-bit integer vector of [8 x i8] to the corresponding 8-bit unsigned
474/// integer element of the second 64-bit integer vector of [8 x i8].
475///
476/// Sums greater than 0xFF are saturated to 0xFF. The results are packed
477/// into a 64-bit integer vector of [8 x i8].
478///
479/// \headerfile <x86intrin.h>
480///
481/// This intrinsic corresponds to the <c> PADDUSB </c> instruction.
482///
483/// \param __m1
484/// A 64-bit integer vector of [8 x i8].
485/// \param __m2
486/// A 64-bit integer vector of [8 x i8].
487/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
488/// unsigned sums of both parameters.
489static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
490_mm_adds_pu8(__m64 __m1, __m64 __m2)
491{
492 return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2);
493}
494
495/// Adds, with saturation, each 16-bit unsigned integer element of the first
496/// 64-bit integer vector of [4 x i16] to the corresponding 16-bit unsigned
497/// integer element of the second 64-bit integer vector of [4 x i16].
498///
499/// Sums greater than 0xFFFF are saturated to 0xFFFF. The results are packed
500/// into a 64-bit integer vector of [4 x i16].
501///
502/// \headerfile <x86intrin.h>
503///
504/// This intrinsic corresponds to the <c> PADDUSW </c> instruction.
505///
506/// \param __m1
507/// A 64-bit integer vector of [4 x i16].
508/// \param __m2
509/// A 64-bit integer vector of [4 x i16].
510/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
511/// unsigned sums of both parameters.
512static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
513_mm_adds_pu16(__m64 __m1, __m64 __m2)
514{
515 return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2);
516}
517
518/// Subtracts each 8-bit integer element of the second 64-bit integer
519/// vector of [8 x i8] from the corresponding 8-bit integer element of the
520/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
521/// are packed into a 64-bit integer vector of [8 x i8].
522///
523/// \headerfile <x86intrin.h>
524///
525/// This intrinsic corresponds to the <c> PSUBB </c> instruction.
526///
527/// \param __m1
528/// A 64-bit integer vector of [8 x i8] containing the minuends.
529/// \param __m2
530/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
531/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
532/// both parameters.
533static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
534_mm_sub_pi8(__m64 __m1, __m64 __m2)
535{
536 return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2));
537}
538
539/// Subtracts each 16-bit integer element of the second 64-bit integer
540/// vector of [4 x i16] from the corresponding 16-bit integer element of the
541/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
542/// results are packed into a 64-bit integer vector of [4 x i16].
543///
544/// \headerfile <x86intrin.h>
545///
546/// This intrinsic corresponds to the <c> PSUBW </c> instruction.
547///
548/// \param __m1
549/// A 64-bit integer vector of [4 x i16] containing the minuends.
550/// \param __m2
551/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
552/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
553/// both parameters.
554static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
555_mm_sub_pi16(__m64 __m1, __m64 __m2)
556{
557 return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2));
558}
559
560/// Subtracts each 32-bit integer element of the second 64-bit integer
561/// vector of [2 x i32] from the corresponding 32-bit integer element of the
562/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
563/// results are packed into a 64-bit integer vector of [2 x i32].
564///
565/// \headerfile <x86intrin.h>
566///
567/// This intrinsic corresponds to the <c> PSUBD </c> instruction.
568///
569/// \param __m1
570/// A 64-bit integer vector of [2 x i32] containing the minuends.
571/// \param __m2
572/// A 64-bit integer vector of [2 x i32] containing the subtrahends.
573/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
574/// both parameters.
575static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
576_mm_sub_pi32(__m64 __m1, __m64 __m2)
577{
578 return (__m64)(((__v2su)__m1) - ((__v2su)__m2));
579}
580
581/// Subtracts, with saturation, each 8-bit signed integer element of the second
582/// 64-bit integer vector of [8 x i8] from the corresponding 8-bit signed
583/// integer element of the first 64-bit integer vector of [8 x i8].
584///
585/// Positive results greater than 0x7F are saturated to 0x7F. Negative
586/// results less than 0x80 are saturated to 0x80. The results are packed
587/// into a 64-bit integer vector of [8 x i8].
588///
589/// \headerfile <x86intrin.h>
590///
591/// This intrinsic corresponds to the <c> PSUBSB </c> instruction.
592///
593/// \param __m1
594/// A 64-bit integer vector of [8 x i8] containing the minuends.
595/// \param __m2
596/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
597/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
598/// differences of both parameters.
599static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
600_mm_subs_pi8(__m64 __m1, __m64 __m2)
601{
602 return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2);
603}
604
605/// Subtracts, with saturation, each 16-bit signed integer element of the
606/// second 64-bit integer vector of [4 x i16] from the corresponding 16-bit
607/// signed integer element of the first 64-bit integer vector of [4 x i16].
608///
609/// Positive results greater than 0x7FFF are saturated to 0x7FFF. Negative
610/// results less than 0x8000 are saturated to 0x8000. The results are packed
611/// into a 64-bit integer vector of [4 x i16].
612///
613/// \headerfile <x86intrin.h>
614///
615/// This intrinsic corresponds to the <c> PSUBSW </c> instruction.
616///
617/// \param __m1
618/// A 64-bit integer vector of [4 x i16] containing the minuends.
619/// \param __m2
620/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
621/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
622/// differences of both parameters.
623static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
624_mm_subs_pi16(__m64 __m1, __m64 __m2)
625{
626 return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2);
627}
628
629/// Subtracts each 8-bit unsigned integer element of the second 64-bit
630/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
631/// element of the first 64-bit integer vector of [8 x i8].
632///
633/// If an element of the first vector is less than the corresponding element
634/// of the second vector, the result is saturated to 0. The results are
635/// packed into a 64-bit integer vector of [8 x i8].
636///
637/// \headerfile <x86intrin.h>
638///
639/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.
640///
641/// \param __m1
642/// A 64-bit integer vector of [8 x i8] containing the minuends.
643/// \param __m2
644/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
645/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
646/// differences of both parameters.
647static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
648_mm_subs_pu8(__m64 __m1, __m64 __m2)
649{
650 return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2);
651}
652
653/// Subtracts each 16-bit unsigned integer element of the second 64-bit
654/// integer vector of [4 x i16] from the corresponding 16-bit unsigned
655/// integer element of the first 64-bit integer vector of [4 x i16].
656///
657/// If an element of the first vector is less than the corresponding element
658/// of the second vector, the result is saturated to 0. The results are
659/// packed into a 64-bit integer vector of [4 x i16].
660///
661/// \headerfile <x86intrin.h>
662///
663/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.
664///
665/// \param __m1
666/// A 64-bit integer vector of [4 x i16] containing the minuends.
667/// \param __m2
668/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
669/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
670/// differences of both parameters.
671static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
672_mm_subs_pu16(__m64 __m1, __m64 __m2)
673{
674 return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2);
675}
676
677/// Multiplies each 16-bit signed integer element of the first 64-bit
678/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
679/// element of the second 64-bit integer vector of [4 x i16] and get four
680/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
681/// The lower 32 bits of these two sums are packed into a 64-bit integer
682/// vector of [2 x i32].
683///
684/// For example, bits [15:0] of both parameters are multiplied, bits [31:16]
685/// of both parameters are multiplied, and the sum of both results is written
686/// to bits [31:0] of the result.
687///
688/// \headerfile <x86intrin.h>
689///
690/// This intrinsic corresponds to the <c> PMADDWD </c> instruction.
691///
692/// \param __m1
693/// A 64-bit integer vector of [4 x i16].
694/// \param __m2
695/// A 64-bit integer vector of [4 x i16].
696/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
697/// products of both parameters.
698static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
699_mm_madd_pi16(__m64 __m1, __m64 __m2)
700{
701 return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__anyext128(__m1),
702 (__v8hi)__anyext128(__m2)));
703}
704
705/// Multiplies each 16-bit signed integer element of the first 64-bit
706/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
707/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper
708/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
709///
710/// \headerfile <x86intrin.h>
711///
712/// This intrinsic corresponds to the <c> PMULHW </c> instruction.
713///
714/// \param __m1
715/// A 64-bit integer vector of [4 x i16].
716/// \param __m2
717/// A 64-bit integer vector of [4 x i16].
718/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
719/// of the products of both parameters.
720static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
721_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
722{
723 return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__anyext128(__m1),
724 (__v8hi)__anyext128(__m2)));
725}
726
727/// Multiplies each 16-bit signed integer element of the first 64-bit
728/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
729/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower
730/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
731///
732/// \headerfile <x86intrin.h>
733///
734/// This intrinsic corresponds to the <c> PMULLW </c> instruction.
735///
736/// \param __m1
737/// A 64-bit integer vector of [4 x i16].
738/// \param __m2
739/// A 64-bit integer vector of [4 x i16].
740/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
741/// of the products of both parameters.
742static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
743_mm_mullo_pi16(__m64 __m1, __m64 __m2)
744{
745 return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2));
746}
747
748/// Left-shifts each 16-bit signed integer element of the first
749/// parameter, which is a 64-bit integer vector of [4 x i16], by the number
750/// of bits specified by the second parameter, which is a 64-bit integer. The
751/// lower 16 bits of the results are packed into a 64-bit integer vector of
752/// [4 x i16].
753///
754/// \headerfile <x86intrin.h>
755///
756/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
757///
758/// \param __m
759/// A 64-bit integer vector of [4 x i16].
760/// \param __count
761/// A 64-bit integer vector interpreted as a single 64-bit integer.
762/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
763/// values. If \a __count is greater or equal to 16, the result is set to all
764/// 0.
765static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
766_mm_sll_pi16(__m64 __m, __m64 __count)
767{
768 return __trunc64(__builtin_ia32_psllw128((__v8hi)__anyext128(__m),
769 (__v8hi)__anyext128(__count)));
770}
771
772/// Left-shifts each 16-bit signed integer element of a 64-bit integer
773/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.
774/// The lower 16 bits of the results are packed into a 64-bit integer vector
775/// of [4 x i16].
776///
777/// \headerfile <x86intrin.h>
778///
779/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
780///
781/// \param __m
782/// A 64-bit integer vector of [4 x i16].
783/// \param __count
784/// A 32-bit integer value.
785/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
786/// values. If \a __count is greater or equal to 16, the result is set to all
787/// 0.
788static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
789_mm_slli_pi16(__m64 __m, int __count)
790{
791 return __trunc64(__builtin_ia32_psllwi128((__v8hi)__anyext128(__m),
792 __count));
793}
794
795/// Left-shifts each 32-bit signed integer element of the first
796/// parameter, which is a 64-bit integer vector of [2 x i32], by the number
797/// of bits specified by the second parameter, which is a 64-bit integer. The
798/// lower 32 bits of the results are packed into a 64-bit integer vector of
799/// [2 x i32].
800///
801/// \headerfile <x86intrin.h>
802///
803/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
804///
805/// \param __m
806/// A 64-bit integer vector of [2 x i32].
807/// \param __count
808/// A 64-bit integer vector interpreted as a single 64-bit integer.
809/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
810/// values. If \a __count is greater or equal to 32, the result is set to all
811/// 0.
812static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
813_mm_sll_pi32(__m64 __m, __m64 __count)
814{
815 return __trunc64(__builtin_ia32_pslld128((__v4si)__anyext128(__m),
816 (__v4si)__anyext128(__count)));
817}
818
819/// Left-shifts each 32-bit signed integer element of a 64-bit integer
820/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.
821/// The lower 32 bits of the results are packed into a 64-bit integer vector
822/// of [2 x i32].
823///
824/// \headerfile <x86intrin.h>
825///
826/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
827///
828/// \param __m
829/// A 64-bit integer vector of [2 x i32].
830/// \param __count
831/// A 32-bit integer value.
832/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
833/// values. If \a __count is greater or equal to 32, the result is set to all
834/// 0.
835static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
836_mm_slli_pi32(__m64 __m, int __count)
837{
838 return __trunc64(__builtin_ia32_pslldi128((__v4si)__anyext128(__m),
839 __count));
840}
841
842/// Left-shifts the first 64-bit integer parameter by the number of bits
843/// specified by the second 64-bit integer parameter. The lower 64 bits of
844/// result are returned.
845///
846/// \headerfile <x86intrin.h>
847///
848/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
849///
850/// \param __m
851/// A 64-bit integer vector interpreted as a single 64-bit integer.
852/// \param __count
853/// A 64-bit integer vector interpreted as a single 64-bit integer.
854/// \returns A 64-bit integer vector containing the left-shifted value. If
855/// \a __count is greater or equal to 64, the result is set to 0.
856static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
857_mm_sll_si64(__m64 __m, __m64 __count)
858{
859 return __trunc64(__builtin_ia32_psllq128((__v2di)__anyext128(__m),
860 (__v2di)__anyext128(__count)));
861}
862
863/// Left-shifts the first parameter, which is a 64-bit integer, by the
864/// number of bits specified by the second parameter, which is a 32-bit
865/// integer. The lower 64 bits of result are returned.
866///
867/// \headerfile <x86intrin.h>
868///
869/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
870///
871/// \param __m
872/// A 64-bit integer vector interpreted as a single 64-bit integer.
873/// \param __count
874/// A 32-bit integer value.
875/// \returns A 64-bit integer vector containing the left-shifted value. If
876/// \a __count is greater or equal to 64, the result is set to 0.
877static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
878_mm_slli_si64(__m64 __m, int __count)
879{
880 return __trunc64(__builtin_ia32_psllqi128((__v2di)__anyext128(__m),
881 __count));
882}
883
884/// Right-shifts each 16-bit integer element of the first parameter,
885/// which is a 64-bit integer vector of [4 x i16], by the number of bits
886/// specified by the second parameter, which is a 64-bit integer.
887///
888/// High-order bits are filled with the sign bit of the initial value of each
889/// 16-bit element. The 16-bit results are packed into a 64-bit integer
890/// vector of [4 x i16].
891///
892/// \headerfile <x86intrin.h>
893///
894/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
895///
896/// \param __m
897/// A 64-bit integer vector of [4 x i16].
898/// \param __count
899/// A 64-bit integer vector interpreted as a single 64-bit integer.
900/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
901/// values.
902static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
903_mm_sra_pi16(__m64 __m, __m64 __count)
904{
905 return __trunc64(__builtin_ia32_psraw128((__v8hi)__anyext128(__m),
906 (__v8hi)__anyext128(__count)));
907}
908
909/// Right-shifts each 16-bit integer element of a 64-bit integer vector
910/// of [4 x i16] by the number of bits specified by a 32-bit integer.
911///
912/// High-order bits are filled with the sign bit of the initial value of each
913/// 16-bit element. The 16-bit results are packed into a 64-bit integer
914/// vector of [4 x i16].
915///
916/// \headerfile <x86intrin.h>
917///
918/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
919///
920/// \param __m
921/// A 64-bit integer vector of [4 x i16].
922/// \param __count
923/// A 32-bit integer value.
924/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
925/// values.
926static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
927_mm_srai_pi16(__m64 __m, int __count)
928{
929 return __trunc64(__builtin_ia32_psrawi128((__v8hi)__anyext128(__m),
930 __count));
931}
932
933/// Right-shifts each 32-bit integer element of the first parameter,
934/// which is a 64-bit integer vector of [2 x i32], by the number of bits
935/// specified by the second parameter, which is a 64-bit integer.
936///
937/// High-order bits are filled with the sign bit of the initial value of each
938/// 32-bit element. The 32-bit results are packed into a 64-bit integer
939/// vector of [2 x i32].
940///
941/// \headerfile <x86intrin.h>
942///
943/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
944///
945/// \param __m
946/// A 64-bit integer vector of [2 x i32].
947/// \param __count
948/// A 64-bit integer vector interpreted as a single 64-bit integer.
949/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
950/// values.
951static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
952_mm_sra_pi32(__m64 __m, __m64 __count)
953{
954 return __trunc64(__builtin_ia32_psrad128((__v4si)__anyext128(__m),
955 (__v4si)__anyext128(__count)));
956}
957
958/// Right-shifts each 32-bit integer element of a 64-bit integer vector
959/// of [2 x i32] by the number of bits specified by a 32-bit integer.
960///
961/// High-order bits are filled with the sign bit of the initial value of each
962/// 32-bit element. The 32-bit results are packed into a 64-bit integer
963/// vector of [2 x i32].
964///
965/// \headerfile <x86intrin.h>
966///
967/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
968///
969/// \param __m
970/// A 64-bit integer vector of [2 x i32].
971/// \param __count
972/// A 32-bit integer value.
973/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
974/// values.
975static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
976_mm_srai_pi32(__m64 __m, int __count)
977{
978 return __trunc64(__builtin_ia32_psradi128((__v4si)__anyext128(__m),
979 __count));
980}
981
982/// Right-shifts each 16-bit integer element of the first parameter,
983/// which is a 64-bit integer vector of [4 x i16], by the number of bits
984/// specified by the second parameter, which is a 64-bit integer.
985///
986/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
987/// integer vector of [4 x i16].
988///
989/// \headerfile <x86intrin.h>
990///
991/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
992///
993/// \param __m
994/// A 64-bit integer vector of [4 x i16].
995/// \param __count
996/// A 64-bit integer vector interpreted as a single 64-bit integer.
997/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
998/// values.
999static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1000_mm_srl_pi16(__m64 __m, __m64 __count)
1001{
1002 return __trunc64(__builtin_ia32_psrlw128((__v8hi)__anyext128(__m),
1003 (__v8hi)__anyext128(__count)));
1004}
1005
1006/// Right-shifts each 16-bit integer element of a 64-bit integer vector
1007/// of [4 x i16] by the number of bits specified by a 32-bit integer.
1008///
1009/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
1010/// integer vector of [4 x i16].
1011///
1012/// \headerfile <x86intrin.h>
1013///
1014/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
1015///
1016/// \param __m
1017/// A 64-bit integer vector of [4 x i16].
1018/// \param __count
1019/// A 32-bit integer value.
1020/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
1021/// values.
1022static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1023_mm_srli_pi16(__m64 __m, int __count)
1024{
1025 return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__anyext128(__m),
1026 __count));
1027}
1028
1029/// Right-shifts each 32-bit integer element of the first parameter,
1030/// which is a 64-bit integer vector of [2 x i32], by the number of bits
1031/// specified by the second parameter, which is a 64-bit integer.
1032///
1033/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
1034/// integer vector of [2 x i32].
1035///
1036/// \headerfile <x86intrin.h>
1037///
1038/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
1039///
1040/// \param __m
1041/// A 64-bit integer vector of [2 x i32].
1042/// \param __count
1043/// A 64-bit integer vector interpreted as a single 64-bit integer.
1044/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1045/// values.
1046static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1047_mm_srl_pi32(__m64 __m, __m64 __count)
1048{
1049 return __trunc64(__builtin_ia32_psrld128((__v4si)__anyext128(__m),
1050 (__v4si)__anyext128(__count)));
1051}
1052
1053/// Right-shifts each 32-bit integer element of a 64-bit integer vector
1054/// of [2 x i32] by the number of bits specified by a 32-bit integer.
1055///
1056/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
1057/// integer vector of [2 x i32].
1058///
1059/// \headerfile <x86intrin.h>
1060///
1061/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
1062///
1063/// \param __m
1064/// A 64-bit integer vector of [2 x i32].
1065/// \param __count
1066/// A 32-bit integer value.
1067/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1068/// values.
1069static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1070_mm_srli_pi32(__m64 __m, int __count)
1071{
1072 return __trunc64(__builtin_ia32_psrldi128((__v4si)__anyext128(__m),
1073 __count));
1074}
1075
1076/// Right-shifts the first 64-bit integer parameter by the number of bits
1077/// specified by the second 64-bit integer parameter.
1078///
1079/// High-order bits are cleared.
1080///
1081/// \headerfile <x86intrin.h>
1082///
1083/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1084///
1085/// \param __m
1086/// A 64-bit integer vector interpreted as a single 64-bit integer.
1087/// \param __count
1088/// A 64-bit integer vector interpreted as a single 64-bit integer.
1089/// \returns A 64-bit integer vector containing the right-shifted value.
1090static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1091_mm_srl_si64(__m64 __m, __m64 __count)
1092{
1093 return __trunc64(__builtin_ia32_psrlq128((__v2di)__anyext128(__m),
1094 (__v2di)__anyext128(__count)));
1095}
1096
1097/// Right-shifts the first parameter, which is a 64-bit integer, by the
1098/// number of bits specified by the second parameter, which is a 32-bit
1099/// integer.
1100///
1101/// High-order bits are cleared.
1102///
1103/// \headerfile <x86intrin.h>
1104///
1105/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1106///
1107/// \param __m
1108/// A 64-bit integer vector interpreted as a single 64-bit integer.
1109/// \param __count
1110/// A 32-bit integer value.
1111/// \returns A 64-bit integer vector containing the right-shifted value.
1112static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1113_mm_srli_si64(__m64 __m, int __count)
1114{
1115 return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m),
1116 __count));
1117}
1118
1119/// Performs a bitwise AND of two 64-bit integer vectors.
1120///
1121/// \headerfile <x86intrin.h>
1122///
1123/// This intrinsic corresponds to the <c> PAND </c> instruction.
1124///
1125/// \param __m1
1126/// A 64-bit integer vector.
1127/// \param __m2
1128/// A 64-bit integer vector.
1129/// \returns A 64-bit integer vector containing the bitwise AND of both
1130/// parameters.
1131static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1132_mm_and_si64(__m64 __m1, __m64 __m2)
1133{
1134 return (__m64)(((__v1du)__m1) & ((__v1du)__m2));
1135}
1136
1137/// Performs a bitwise NOT of the first 64-bit integer vector, and then
1138/// performs a bitwise AND of the intermediate result and the second 64-bit
1139/// integer vector.
1140///
1141/// \headerfile <x86intrin.h>
1142///
1143/// This intrinsic corresponds to the <c> PANDN </c> instruction.
1144///
1145/// \param __m1
1146/// A 64-bit integer vector. The one's complement of this parameter is used
1147/// in the bitwise AND.
1148/// \param __m2
1149/// A 64-bit integer vector.
1150/// \returns A 64-bit integer vector containing the bitwise AND of the second
1151/// parameter and the one's complement of the first parameter.
1152static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1153_mm_andnot_si64(__m64 __m1, __m64 __m2)
1154{
1155 return (__m64)(~((__v1du)__m1) & ((__v1du)__m2));
1156}
1157
1158/// Performs a bitwise OR of two 64-bit integer vectors.
1159///
1160/// \headerfile <x86intrin.h>
1161///
1162/// This intrinsic corresponds to the <c> POR </c> instruction.
1163///
1164/// \param __m1
1165/// A 64-bit integer vector.
1166/// \param __m2
1167/// A 64-bit integer vector.
1168/// \returns A 64-bit integer vector containing the bitwise OR of both
1169/// parameters.
1170static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1171_mm_or_si64(__m64 __m1, __m64 __m2)
1172{
1173 return (__m64)(((__v1du)__m1) | ((__v1du)__m2));
1174}
1175
1176/// Performs a bitwise exclusive OR of two 64-bit integer vectors.
1177///
1178/// \headerfile <x86intrin.h>
1179///
1180/// This intrinsic corresponds to the <c> PXOR </c> instruction.
1181///
1182/// \param __m1
1183/// A 64-bit integer vector.
1184/// \param __m2
1185/// A 64-bit integer vector.
1186/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
1187/// parameters.
1188static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1189_mm_xor_si64(__m64 __m1, __m64 __m2)
1190{
1191 return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2));
1192}
1193
1194/// Compares the 8-bit integer elements of two 64-bit integer vectors of
1195/// [8 x i8] to determine if the element of the first vector is equal to the
1196/// corresponding element of the second vector.
1197///
1198/// Each comparison returns 0 for false, 0xFF for true.
1199///
1200/// \headerfile <x86intrin.h>
1201///
1202/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.
1203///
1204/// \param __m1
1205/// A 64-bit integer vector of [8 x i8].
1206/// \param __m2
1207/// A 64-bit integer vector of [8 x i8].
1208/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1209/// results.
1210static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1211_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
1212{
1213 return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2));
1214}
1215
1216/// Compares the 16-bit integer elements of two 64-bit integer vectors of
1217/// [4 x i16] to determine if the element of the first vector is equal to the
1218/// corresponding element of the second vector.
1219///
1220/// Each comparison returns 0 for false, 0xFFFF for true.
1221///
1222/// \headerfile <x86intrin.h>
1223///
1224/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.
1225///
1226/// \param __m1
1227/// A 64-bit integer vector of [4 x i16].
1228/// \param __m2
1229/// A 64-bit integer vector of [4 x i16].
1230/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1231/// results.
1232static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1233_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
1234{
1235 return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2));
1236}
1237
1238/// Compares the 32-bit integer elements of two 64-bit integer vectors of
1239/// [2 x i32] to determine if the element of the first vector is equal to the
1240/// corresponding element of the second vector.
1241///
1242/// Each comparison returns 0 for false, 0xFFFFFFFF for true.
1243///
1244/// \headerfile <x86intrin.h>
1245///
1246/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.
1247///
1248/// \param __m1
1249/// A 64-bit integer vector of [2 x i32].
1250/// \param __m2
1251/// A 64-bit integer vector of [2 x i32].
1252/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1253/// results.
1254static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1255_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
1256{
1257 return (__m64)(((__v2si)__m1) == ((__v2si)__m2));
1258}
1259
1260/// Compares the 8-bit integer elements of two 64-bit integer vectors of
1261/// [8 x i8] to determine if the element of the first vector is greater than
1262/// the corresponding element of the second vector.
1263///
1264/// Each comparison returns 0 for false, 0xFF for true.
1265///
1266/// \headerfile <x86intrin.h>
1267///
1268/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.
1269///
1270/// \param __m1
1271/// A 64-bit integer vector of [8 x i8].
1272/// \param __m2
1273/// A 64-bit integer vector of [8 x i8].
1274/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1275/// results.
1276static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1277_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
1278{
1279 /* This function always performs a signed comparison, but __v8qi is a char
1280 which may be signed or unsigned, so use __v8qs. */
1281 return (__m64)((__v8qs)__m1 > (__v8qs)__m2);
1282}
1283
1284/// Compares the 16-bit integer elements of two 64-bit integer vectors of
1285/// [4 x i16] to determine if the element of the first vector is greater than
1286/// the corresponding element of the second vector.
1287///
1288/// Each comparison returns 0 for false, 0xFFFF for true.
1289///
1290/// \headerfile <x86intrin.h>
1291///
1292/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.
1293///
1294/// \param __m1
1295/// A 64-bit integer vector of [4 x i16].
1296/// \param __m2
1297/// A 64-bit integer vector of [4 x i16].
1298/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1299/// results.
1300static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1301_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
1302{
1303 return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
1304}
1305
1306/// Compares the 32-bit integer elements of two 64-bit integer vectors of
1307/// [2 x i32] to determine if the element of the first vector is greater than
1308/// the corresponding element of the second vector.
1309///
1310/// Each comparison returns 0 for false, 0xFFFFFFFF for true.
1311///
1312/// \headerfile <x86intrin.h>
1313///
1314/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.
1315///
1316/// \param __m1
1317/// A 64-bit integer vector of [2 x i32].
1318/// \param __m2
1319/// A 64-bit integer vector of [2 x i32].
1320/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1321/// results.
1322static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1323_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
1324{
1325 return (__m64)((__v2si)__m1 > (__v2si)__m2);
1326}
1327
1328/// Constructs a 64-bit integer vector initialized to zero.
1329///
1330/// \headerfile <x86intrin.h>
1331///
1332/// This intrinsic corresponds to the <c> PXOR </c> instruction.
1333///
1334/// \returns An initialized 64-bit integer vector with all elements set to zero.
1335static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1337{
1338 return __extension__ (__m64){ 0LL };
1339}
1340
1341/// Constructs a 64-bit integer vector initialized with the specified
1342/// 32-bit integer values.
1343///
1344/// \headerfile <x86intrin.h>
1345///
1346/// This intrinsic is a utility function and does not correspond to a specific
1347/// instruction.
1348///
1349/// \param __i1
1350/// A 32-bit integer value used to initialize the upper 32 bits of the
1351/// result.
1352/// \param __i0
1353/// A 32-bit integer value used to initialize the lower 32 bits of the
1354/// result.
1355/// \returns An initialized 64-bit integer vector.
1356static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1357_mm_set_pi32(int __i1, int __i0)
1358{
1359 return __extension__ (__m64)(__v2si){__i0, __i1};
1360}
1361
1362/// Constructs a 64-bit integer vector initialized with the specified
1363/// 16-bit integer values.
1364///
1365/// \headerfile <x86intrin.h>
1366///
1367/// This intrinsic is a utility function and does not correspond to a specific
1368/// instruction.
1369///
1370/// \param __s3
1371/// A 16-bit integer value used to initialize bits [63:48] of the result.
1372/// \param __s2
1373/// A 16-bit integer value used to initialize bits [47:32] of the result.
1374/// \param __s1
1375/// A 16-bit integer value used to initialize bits [31:16] of the result.
1376/// \param __s0
1377/// A 16-bit integer value used to initialize bits [15:0] of the result.
1378/// \returns An initialized 64-bit integer vector.
1379static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1380_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
1381{
1382 return __extension__ (__m64)(__v4hi){__s0, __s1, __s2, __s3};
1383}
1384
1385/// Constructs a 64-bit integer vector initialized with the specified
1386/// 8-bit integer values.
1387///
1388/// \headerfile <x86intrin.h>
1389///
1390/// This intrinsic is a utility function and does not correspond to a specific
1391/// instruction.
1392///
1393/// \param __b7
1394/// An 8-bit integer value used to initialize bits [63:56] of the result.
1395/// \param __b6
1396/// An 8-bit integer value used to initialize bits [55:48] of the result.
1397/// \param __b5
1398/// An 8-bit integer value used to initialize bits [47:40] of the result.
1399/// \param __b4
1400/// An 8-bit integer value used to initialize bits [39:32] of the result.
1401/// \param __b3
1402/// An 8-bit integer value used to initialize bits [31:24] of the result.
1403/// \param __b2
1404/// An 8-bit integer value used to initialize bits [23:16] of the result.
1405/// \param __b1
1406/// An 8-bit integer value used to initialize bits [15:8] of the result.
1407/// \param __b0
1408/// An 8-bit integer value used to initialize bits [7:0] of the result.
1409/// \returns An initialized 64-bit integer vector.
1410static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1411_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
1412 char __b1, char __b0)
1413{
1414 return __extension__ (__m64)(__v8qi){__b0, __b1, __b2, __b3,
1415 __b4, __b5, __b6, __b7};
1416}
1417
1418/// Constructs a 64-bit integer vector of [2 x i32], with each of the
1419/// 32-bit integer vector elements set to the specified 32-bit integer
1420/// value.
1421///
1422/// \headerfile <x86intrin.h>
1423///
1424/// This intrinsic is a utility function and does not correspond to a specific
1425/// instruction.
1426///
1427/// \param __i
1428/// A 32-bit integer value used to initialize each vector element of the
1429/// result.
1430/// \returns An initialized 64-bit integer vector of [2 x i32].
1431static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1433{
1434 return _mm_set_pi32(__i, __i);
1435}
1436
1437/// Constructs a 64-bit integer vector of [4 x i16], with each of the
1438/// 16-bit integer vector elements set to the specified 16-bit integer
1439/// value.
1440///
1441/// \headerfile <x86intrin.h>
1442///
1443/// This intrinsic is a utility function and does not correspond to a specific
1444/// instruction.
1445///
1446/// \param __w
1447/// A 16-bit integer value used to initialize each vector element of the
1448/// result.
1449/// \returns An initialized 64-bit integer vector of [4 x i16].
1450static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1452{
1453 return _mm_set_pi16(__w, __w, __w, __w);
1454}
1455
1456/// Constructs a 64-bit integer vector of [8 x i8], with each of the
1457/// 8-bit integer vector elements set to the specified 8-bit integer value.
1458///
1459/// \headerfile <x86intrin.h>
1460///
1461/// This intrinsic is a utility function and does not correspond to a specific
1462/// instruction.
1463///
1464/// \param __b
1465/// An 8-bit integer value used to initialize each vector element of the
1466/// result.
1467/// \returns An initialized 64-bit integer vector of [8 x i8].
1468static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1470{
1471 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
1472}
1473
1474/// Constructs a 64-bit integer vector, initialized in reverse order with
1475/// the specified 32-bit integer values.
1476///
1477/// \headerfile <x86intrin.h>
1478///
1479/// This intrinsic is a utility function and does not correspond to a specific
1480/// instruction.
1481///
1482/// \param __i0
1483/// A 32-bit integer value used to initialize the lower 32 bits of the
1484/// result.
1485/// \param __i1
1486/// A 32-bit integer value used to initialize the upper 32 bits of the
1487/// result.
1488/// \returns An initialized 64-bit integer vector.
1489static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1490_mm_setr_pi32(int __i0, int __i1)
1491{
1492 return _mm_set_pi32(__i1, __i0);
1493}
1494
1495/// Constructs a 64-bit integer vector, initialized in reverse order with
1496/// the specified 16-bit integer values.
1497///
1498/// \headerfile <x86intrin.h>
1499///
1500/// This intrinsic is a utility function and does not correspond to a specific
1501/// instruction.
1502///
1503/// \param __w0
1504/// A 16-bit integer value used to initialize bits [15:0] of the result.
1505/// \param __w1
1506/// A 16-bit integer value used to initialize bits [31:16] of the result.
1507/// \param __w2
1508/// A 16-bit integer value used to initialize bits [47:32] of the result.
1509/// \param __w3
1510/// A 16-bit integer value used to initialize bits [63:48] of the result.
1511/// \returns An initialized 64-bit integer vector.
1512static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1513_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
1514{
1515 return _mm_set_pi16(__w3, __w2, __w1, __w0);
1516}
1517
1518/// Constructs a 64-bit integer vector, initialized in reverse order with
1519/// the specified 8-bit integer values.
1520///
1521/// \headerfile <x86intrin.h>
1522///
1523/// This intrinsic is a utility function and does not correspond to a specific
1524/// instruction.
1525///
1526/// \param __b0
1527/// An 8-bit integer value used to initialize bits [7:0] of the result.
1528/// \param __b1
1529/// An 8-bit integer value used to initialize bits [15:8] of the result.
1530/// \param __b2
1531/// An 8-bit integer value used to initialize bits [23:16] of the result.
1532/// \param __b3
1533/// An 8-bit integer value used to initialize bits [31:24] of the result.
1534/// \param __b4
1535/// An 8-bit integer value used to initialize bits [39:32] of the result.
1536/// \param __b5
1537/// An 8-bit integer value used to initialize bits [47:40] of the result.
1538/// \param __b6
1539/// An 8-bit integer value used to initialize bits [55:48] of the result.
1540/// \param __b7
1541/// An 8-bit integer value used to initialize bits [63:56] of the result.
1542/// \returns An initialized 64-bit integer vector.
1543static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1544_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
1545 char __b6, char __b7)
1546{
1547 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
1548}
1549
1550#undef __anyext128
1551#undef __trunc64
1552#undef __DEFAULT_FN_ATTRS_SSE2
1553
1554/* Aliases for compatibility. */
1555#define _m_empty _mm_empty
1556#define _m_from_int _mm_cvtsi32_si64
1557#define _m_from_int64 _mm_cvtsi64_m64
1558#define _m_to_int _mm_cvtsi64_si32
1559#define _m_to_int64 _mm_cvtm64_si64
1560#define _m_packsswb _mm_packs_pi16
1561#define _m_packssdw _mm_packs_pi32
1562#define _m_packuswb _mm_packs_pu16
1563#define _m_punpckhbw _mm_unpackhi_pi8
1564#define _m_punpckhwd _mm_unpackhi_pi16
1565#define _m_punpckhdq _mm_unpackhi_pi32
1566#define _m_punpcklbw _mm_unpacklo_pi8
1567#define _m_punpcklwd _mm_unpacklo_pi16
1568#define _m_punpckldq _mm_unpacklo_pi32
1569#define _m_paddb _mm_add_pi8
1570#define _m_paddw _mm_add_pi16
1571#define _m_paddd _mm_add_pi32
1572#define _m_paddsb _mm_adds_pi8
1573#define _m_paddsw _mm_adds_pi16
1574#define _m_paddusb _mm_adds_pu8
1575#define _m_paddusw _mm_adds_pu16
1576#define _m_psubb _mm_sub_pi8
1577#define _m_psubw _mm_sub_pi16
1578#define _m_psubd _mm_sub_pi32
1579#define _m_psubsb _mm_subs_pi8
1580#define _m_psubsw _mm_subs_pi16
1581#define _m_psubusb _mm_subs_pu8
1582#define _m_psubusw _mm_subs_pu16
1583#define _m_pmaddwd _mm_madd_pi16
1584#define _m_pmulhw _mm_mulhi_pi16
1585#define _m_pmullw _mm_mullo_pi16
1586#define _m_psllw _mm_sll_pi16
1587#define _m_psllwi _mm_slli_pi16
1588#define _m_pslld _mm_sll_pi32
1589#define _m_pslldi _mm_slli_pi32
1590#define _m_psllq _mm_sll_si64
1591#define _m_psllqi _mm_slli_si64
1592#define _m_psraw _mm_sra_pi16
1593#define _m_psrawi _mm_srai_pi16
1594#define _m_psrad _mm_sra_pi32
1595#define _m_psradi _mm_srai_pi32
1596#define _m_psrlw _mm_srl_pi16
1597#define _m_psrlwi _mm_srli_pi16
1598#define _m_psrld _mm_srl_pi32
1599#define _m_psrldi _mm_srli_pi32
1600#define _m_psrlq _mm_srl_si64
1601#define _m_psrlqi _mm_srli_si64
1602#define _m_pand _mm_and_si64
1603#define _m_pandn _mm_andnot_si64
1604#define _m_por _mm_or_si64
1605#define _m_pxor _mm_xor_si64
1606#define _m_pcmpeqb _mm_cmpeq_pi8
1607#define _m_pcmpeqw _mm_cmpeq_pi16
1608#define _m_pcmpeqd _mm_cmpeq_pi32
1609#define _m_pcmpgtb _mm_cmpgt_pi8
1610#define _m_pcmpgtw _mm_cmpgt_pi16
1611#define _m_pcmpgtd _mm_cmpgt_pi32
1612
1613#endif /* __MMINTRIN_H */
1614
static __inline__ vector float vector float __b
Definition: altivec.h:578
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_andnot_si64(__m64 __m1, __m64 __m2)
Performs a bitwise NOT of the first 64-bit integer vector, and then performs a bitwise AND of the int...
Definition: mmintrin.h:1153
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi32(__m64 __m1, __m64 __m2)
Adds each 32-bit integer element of the first 64-bit integer vector of [2 x i32] to the corresponding...
Definition: mmintrin.h:419
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_and_si64(__m64 __m1, __m64 __m2)
Performs a bitwise AND of two 64-bit integer vectors.
Definition: mmintrin.h:1132
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi8(__m64 __m1, __m64 __m2)
Adds each 8-bit integer element of the first 64-bit integer vector of [8 x i8] to the corresponding 8...
Definition: mmintrin.h:377
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Constructs a 64-bit integer vector initialized with the specified 8-bit integer values.
Definition: mmintrin.h:1411
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi16(__m64 __m1, __m64 __m2)
Adds each 16-bit integer element of the first 64-bit integer vector of [4 x i16] to the corresponding...
Definition: mmintrin.h:398
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu16(__m64 __m1, __m64 __m2)
Adds, with saturation, each 16-bit unsigned integer element of the first 64-bit integer vector of [4 ...
Definition: mmintrin.h:513
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi32(__m64 __m1, __m64 __m2)
Converts, with saturation, 32-bit signed integers from both 64-bit integer vector parameters of [2 x ...
Definition: mmintrin.h:183
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
Definition: mmintrin.h:1255
#define __anyext128(x)
Definition: mmintrin.h:54
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
Definition: mmintrin.h:334
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi16(short __w)
Constructs a 64-bit integer vector of [4 x i16], with each of the 16-bit integer vector elements set ...
Definition: mmintrin.h:1451
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi16(__m64 __m1, __m64 __m2)
Converts, with saturation, 16-bit signed integers from both 64-bit integer vector parameters of [4 x ...
Definition: mmintrin.h:157
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit integer element of the second 64-bit integer vector of [8 x i8] from the corresp...
Definition: mmintrin.h:534
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 8-bit integer val...
Definition: mmintrin.h:1544
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
Definition: mmintrin.h:976
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu8(__m64 __m1, __m64 __m2)
Adds, with saturation, each 8-bit unsigned integer element of the first 64-bit integer vector of [8 x...
Definition: mmintrin.h:490
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi8(char __b)
Constructs a 64-bit integer vector of [8 x i8], with each of the 8-bit integer vector elements set to...
Definition: mmintrin.h:1469
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
Definition: mmintrin.h:1023
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
Definition: mmintrin.h:927
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit unsigned integer element of the second 64-bit integer vector of [8 x i8] from th...
Definition: mmintrin.h:648
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi8(__m64 __m1, __m64 __m2)
Adds, with saturation, each 8-bit signed integer element of the first 64-bit integer vector of [8 x i...
Definition: mmintrin.h:443
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi32(int __i)
Constructs a 64-bit integer vector of [2 x i32], with each of the 32-bit integer vector elements set ...
Definition: mmintrin.h:1432
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_or_si64(__m64 __m1, __m64 __m2)
Performs a bitwise OR of two 64-bit integer vectors.
Definition: mmintrin.h:1171
static __inline__ long long __DEFAULT_FN_ATTRS_SSE2 _mm_cvtm64_si64(__m64 __m)
Casts a 64-bit integer vector into a 64-bit signed integer value.
Definition: mmintrin.h:132
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setzero_si64(void)
Constructs a 64-bit integer vector initialized to zero.
Definition: mmintrin.h:1336
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
Definition: mmintrin.h:1070
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi16(__m64 __m1, __m64 __m2)
Adds, with saturation, each 16-bit signed integer element of the first 64-bit integer vector of [4 x ...
Definition: mmintrin.h:467
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi32_si64(int __i)
Constructs a 64-bit integer vector, setting the lower 32 bits to the value of the 32-bit integer para...
Definition: mmintrin.h:83
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_si64(__m64 __m, int __count)
Left-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the se...
Definition: mmintrin.h:878
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition: mmintrin.h:721
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sll_pi16(__m64 __m, __m64 __count)
Left-shifts each 16-bit signed integer element of the first parameter, which is a 64-bit integer vect...
Definition: mmintrin.h:766
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srl_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:1000
static __inline__ int __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_si32(__m64 __m)
Returns the lower 32 bits of a 64-bit integer vector as a 32-bit signed integer.
Definition: mmintrin.h:100
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
Definition: mmintrin.h:1277
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sra_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:952
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srl_si64(__m64 __m, __m64 __count)
Right-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit ...
Definition: mmintrin.h:1091
long long __m64 __attribute__((__vector_size__(8), __aligned__(8)))
Definition: mmintrin.h:17
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi8(__m64 __m1, __m64 __m2)
Subtracts, with saturation, each 8-bit signed integer element of the second 64-bit integer vector of ...
Definition: mmintrin.h:600
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mullo_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition: mmintrin.h:743
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
Definition: mmintrin.h:237
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pu16(__m64 __m1, __m64 __m2)
Converts, with saturation, 16-bit signed integers from both 64-bit integer vector parameters of [4 x ...
Definition: mmintrin.h:209
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_madd_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition: mmintrin.h:699
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srl_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:1047
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_xor_si64(__m64 __m1, __m64 __m2)
Performs a bitwise exclusive OR of two 64-bit integer vectors.
Definition: mmintrin.h:1189
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_m64(long long __i)
Casts a 64-bit signed integer value into a 64-bit integer vector.
Definition: mmintrin.h:116
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
Definition: mmintrin.h:1211
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sll_si64(__m64 __m, __m64 __count)
Left-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit i...
Definition: mmintrin.h:857
#define __trunc64(x)
Definition: mmintrin.h:52
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
Definition: mmintrin.h:283
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi32(__m64 __m1, __m64 __m2)
Subtracts each 32-bit integer element of the second 64-bit integer vector of [2 x i32] from the corre...
Definition: mmintrin.h:576
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
Definition: mmintrin.h:261
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi32(int __i1, int __i0)
Constructs a 64-bit integer vector initialized with the specified 32-bit integer values.
Definition: mmintrin.h:1357
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_si64(__m64 __m, int __count)
Right-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the s...
Definition: mmintrin.h:1113
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
Definition: mmintrin.h:310
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sra_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:903
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi16(__m64 __m1, __m64 __m2)
Subtracts, with saturation, each 16-bit signed integer element of the second 64-bit integer vector of...
Definition: mmintrin.h:624
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi32(__m64 __m, int __count)
Left-shifts each 32-bit signed integer element of a 64-bit integer vector of [2 x i32] by the number ...
Definition: mmintrin.h:836
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
Definition: mmintrin.h:1233
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi16(__m64 __m, int __count)
Left-shifts each 16-bit signed integer element of a 64-bit integer vector of [4 x i16] by the number ...
Definition: mmintrin.h:789
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sll_pi32(__m64 __m, __m64 __count)
Left-shifts each 32-bit signed integer element of the first parameter, which is a 64-bit integer vect...
Definition: mmintrin.h:813
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi32(int __i0, int __i1)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 32-bit integer va...
Definition: mmintrin.h:1490
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit unsigned integer element of the second 64-bit integer vector of [4 x i16] from ...
Definition: mmintrin.h:672
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 16-bit integer va...
Definition: mmintrin.h:1513
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit integer element of the second 64-bit integer vector of [4 x i16] from the corre...
Definition: mmintrin.h:555
#define __DEFAULT_FN_ATTRS_SSE2
Definition: mmintrin.h:47
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
Definition: mmintrin.h:1323
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
Definition: mmintrin.h:356
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
Constructs a 64-bit integer vector initialized with the specified 16-bit integer values.
Definition: mmintrin.h:1380
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
Definition: mmintrin.h:1301