clang 19.0.0git
mmintrin.h
Go to the documentation of this file.
1/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __MMINTRIN_H
11#define __MMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8)));
18
19typedef long long __v1di __attribute__((__vector_size__(8)));
20typedef int __v2si __attribute__((__vector_size__(8)));
21typedef short __v4hi __attribute__((__vector_size__(8)));
22typedef char __v8qi __attribute__((__vector_size__(8)));
23
24/* Define the default attributes for the functions in this file. */
25#define __DEFAULT_FN_ATTRS \
26 __attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \
27 __min_vector_width__(64)))
28
29/// Clears the MMX state by setting the state of the x87 stack registers
30/// to empty.
31///
32/// \headerfile <x86intrin.h>
33///
34/// This intrinsic corresponds to the <c> EMMS </c> instruction.
35///
36static __inline__ void __attribute__((__always_inline__, __nodebug__,
37 __target__("mmx,no-evex512")))
38_mm_empty(void) {
39 __builtin_ia32_emms();
40}
41
42/// Constructs a 64-bit integer vector, setting the lower 32 bits to the
43/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.
44///
45/// \headerfile <x86intrin.h>
46///
47/// This intrinsic corresponds to the <c> MOVD </c> instruction.
48///
49/// \param __i
50/// A 32-bit integer value.
51/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
52/// parameter. The upper 32 bits are set to 0.
53static __inline__ __m64 __DEFAULT_FN_ATTRS
55{
56 return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
57}
58
59/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
60/// signed integer.
61///
62/// \headerfile <x86intrin.h>
63///
64/// This intrinsic corresponds to the <c> MOVD </c> instruction.
65///
66/// \param __m
67/// A 64-bit integer vector.
68/// \returns A 32-bit signed integer value containing the lower 32 bits of the
69/// parameter.
70static __inline__ int __DEFAULT_FN_ATTRS
72{
73 return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
74}
75
76/// Casts a 64-bit signed integer value into a 64-bit integer vector.
77///
78/// \headerfile <x86intrin.h>
79///
80/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
81///
82/// \param __i
83/// A 64-bit signed integer.
84/// \returns A 64-bit integer vector containing the same bitwise pattern as the
85/// parameter.
86static __inline__ __m64 __DEFAULT_FN_ATTRS
87_mm_cvtsi64_m64(long long __i)
88{
89 return (__m64)__i;
90}
91
92/// Casts a 64-bit integer vector into a 64-bit signed integer value.
93///
94/// \headerfile <x86intrin.h>
95///
96/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
97///
98/// \param __m
99/// A 64-bit integer vector.
100/// \returns A 64-bit signed integer containing the same bitwise pattern as the
101/// parameter.
102static __inline__ long long __DEFAULT_FN_ATTRS
104{
105 return (long long)__m;
106}
107
108/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
109/// vector parameters of [4 x i16] into 8-bit signed integer values, and
110/// constructs a 64-bit integer vector of [8 x i8] as the result.
111///
112/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
113/// less than 0x80 are saturated to 0x80.
114///
115/// \headerfile <x86intrin.h>
116///
117/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
118///
119/// \param __m1
120/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
121/// written to the lower 32 bits of the result.
122/// \param __m2
123/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
124/// written to the upper 32 bits of the result.
125/// \returns A 64-bit integer vector of [8 x i8] containing the converted
126/// values.
127static __inline__ __m64 __DEFAULT_FN_ATTRS
128_mm_packs_pi16(__m64 __m1, __m64 __m2)
129{
130 return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
131}
132
133/// Converts, with saturation, 32-bit signed integers from both 64-bit integer
134/// vector parameters of [2 x i32] into 16-bit signed integer values, and
135/// constructs a 64-bit integer vector of [4 x i16] as the result.
136///
137/// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative
138/// values less than 0x8000 are saturated to 0x8000.
139///
140/// \headerfile <x86intrin.h>
141///
142/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
143///
144/// \param __m1
145/// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
146/// written to the lower 32 bits of the result.
147/// \param __m2
148/// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
149/// written to the upper 32 bits of the result.
150/// \returns A 64-bit integer vector of [4 x i16] containing the converted
151/// values.
152static __inline__ __m64 __DEFAULT_FN_ATTRS
153_mm_packs_pi32(__m64 __m1, __m64 __m2)
154{
155 return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
156}
157
158/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
159/// vector parameters of [4 x i16] into 8-bit unsigned integer values, and
160/// constructs a 64-bit integer vector of [8 x i8] as the result.
161///
162/// Values greater than 0xFF are saturated to 0xFF. Values less than 0 are
163/// saturated to 0.
164///
165/// \headerfile <x86intrin.h>
166///
167/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
168///
169/// \param __m1
170/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
171/// written to the lower 32 bits of the result.
172/// \param __m2
173/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
174/// written to the upper 32 bits of the result.
175/// \returns A 64-bit integer vector of [8 x i8] containing the converted
176/// values.
177static __inline__ __m64 __DEFAULT_FN_ATTRS
178_mm_packs_pu16(__m64 __m1, __m64 __m2)
179{
180 return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
181}
182
183/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
184/// and interleaves them into a 64-bit integer vector of [8 x i8].
185///
186/// \headerfile <x86intrin.h>
187///
188/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
189///
190/// \param __m1
191/// A 64-bit integer vector of [8 x i8]. \n
192/// Bits [39:32] are written to bits [7:0] of the result. \n
193/// Bits [47:40] are written to bits [23:16] of the result. \n
194/// Bits [55:48] are written to bits [39:32] of the result. \n
195/// Bits [63:56] are written to bits [55:48] of the result.
196/// \param __m2
197/// A 64-bit integer vector of [8 x i8].
198/// Bits [39:32] are written to bits [15:8] of the result. \n
199/// Bits [47:40] are written to bits [31:24] of the result. \n
200/// Bits [55:48] are written to bits [47:40] of the result. \n
201/// Bits [63:56] are written to bits [63:56] of the result.
202/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
203/// values.
204static __inline__ __m64 __DEFAULT_FN_ATTRS
205_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
206{
207 return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
208}
209
210/// Unpacks the upper 32 bits from two 64-bit integer vectors of
211/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
212///
213/// \headerfile <x86intrin.h>
214///
215/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.
216///
217/// \param __m1
218/// A 64-bit integer vector of [4 x i16].
219/// Bits [47:32] are written to bits [15:0] of the result. \n
220/// Bits [63:48] are written to bits [47:32] of the result.
221/// \param __m2
222/// A 64-bit integer vector of [4 x i16].
223/// Bits [47:32] are written to bits [31:16] of the result. \n
224/// Bits [63:48] are written to bits [63:48] of the result.
225/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
226/// values.
227static __inline__ __m64 __DEFAULT_FN_ATTRS
228_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
229{
230 return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
231}
232
233/// Unpacks the upper 32 bits from two 64-bit integer vectors of
234/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
235///
236/// \headerfile <x86intrin.h>
237///
238/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.
239///
240/// \param __m1
241/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
242/// the lower 32 bits of the result.
243/// \param __m2
244/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
245/// the upper 32 bits of the result.
246/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
247/// values.
248static __inline__ __m64 __DEFAULT_FN_ATTRS
249_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
250{
251 return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
252}
253
254/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
255/// and interleaves them into a 64-bit integer vector of [8 x i8].
256///
257/// \headerfile <x86intrin.h>
258///
259/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.
260///
261/// \param __m1
262/// A 64-bit integer vector of [8 x i8].
263/// Bits [7:0] are written to bits [7:0] of the result. \n
264/// Bits [15:8] are written to bits [23:16] of the result. \n
265/// Bits [23:16] are written to bits [39:32] of the result. \n
266/// Bits [31:24] are written to bits [55:48] of the result.
267/// \param __m2
268/// A 64-bit integer vector of [8 x i8].
269/// Bits [7:0] are written to bits [15:8] of the result. \n
270/// Bits [15:8] are written to bits [31:24] of the result. \n
271/// Bits [23:16] are written to bits [47:40] of the result. \n
272/// Bits [31:24] are written to bits [63:56] of the result.
273/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
274/// values.
275static __inline__ __m64 __DEFAULT_FN_ATTRS
276_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
277{
278 return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
279}
280
281/// Unpacks the lower 32 bits from two 64-bit integer vectors of
282/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
283///
284/// \headerfile <x86intrin.h>
285///
286/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.
287///
288/// \param __m1
289/// A 64-bit integer vector of [4 x i16].
290/// Bits [15:0] are written to bits [15:0] of the result. \n
291/// Bits [31:16] are written to bits [47:32] of the result.
292/// \param __m2
293/// A 64-bit integer vector of [4 x i16].
294/// Bits [15:0] are written to bits [31:16] of the result. \n
295/// Bits [31:16] are written to bits [63:48] of the result.
296/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
297/// values.
298static __inline__ __m64 __DEFAULT_FN_ATTRS
299_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
300{
301 return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
302}
303
304/// Unpacks the lower 32 bits from two 64-bit integer vectors of
305/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
306///
307/// \headerfile <x86intrin.h>
308///
309/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.
310///
311/// \param __m1
312/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
313/// the lower 32 bits of the result.
314/// \param __m2
315/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
316/// the upper 32 bits of the result.
317/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
318/// values.
319static __inline__ __m64 __DEFAULT_FN_ATTRS
320_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
321{
322 return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
323}
324
325/// Adds each 8-bit integer element of the first 64-bit integer vector
326/// of [8 x i8] to the corresponding 8-bit integer element of the second
327/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
328/// packed into a 64-bit integer vector of [8 x i8].
329///
330/// \headerfile <x86intrin.h>
331///
332/// This intrinsic corresponds to the <c> PADDB </c> instruction.
333///
334/// \param __m1
335/// A 64-bit integer vector of [8 x i8].
336/// \param __m2
337/// A 64-bit integer vector of [8 x i8].
338/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
339/// parameters.
340static __inline__ __m64 __DEFAULT_FN_ATTRS
341_mm_add_pi8(__m64 __m1, __m64 __m2)
342{
343 return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
344}
345
346/// Adds each 16-bit integer element of the first 64-bit integer vector
347/// of [4 x i16] to the corresponding 16-bit integer element of the second
348/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
349/// packed into a 64-bit integer vector of [4 x i16].
350///
351/// \headerfile <x86intrin.h>
352///
353/// This intrinsic corresponds to the <c> PADDW </c> instruction.
354///
355/// \param __m1
356/// A 64-bit integer vector of [4 x i16].
357/// \param __m2
358/// A 64-bit integer vector of [4 x i16].
359/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
360/// parameters.
361static __inline__ __m64 __DEFAULT_FN_ATTRS
362_mm_add_pi16(__m64 __m1, __m64 __m2)
363{
364 return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
365}
366
367/// Adds each 32-bit integer element of the first 64-bit integer vector
368/// of [2 x i32] to the corresponding 32-bit integer element of the second
369/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
370/// packed into a 64-bit integer vector of [2 x i32].
371///
372/// \headerfile <x86intrin.h>
373///
374/// This intrinsic corresponds to the <c> PADDD </c> instruction.
375///
376/// \param __m1
377/// A 64-bit integer vector of [2 x i32].
378/// \param __m2
379/// A 64-bit integer vector of [2 x i32].
380/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
381/// parameters.
382static __inline__ __m64 __DEFAULT_FN_ATTRS
383_mm_add_pi32(__m64 __m1, __m64 __m2)
384{
385 return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
386}
387
388/// Adds, with saturation, each 8-bit signed integer element of the first
389/// 64-bit integer vector of [8 x i8] to the corresponding 8-bit signed
390/// integer element of the second 64-bit integer vector of [8 x i8].
391///
392/// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums
393/// less than 0x80 are saturated to 0x80. The results are packed into a
394/// 64-bit integer vector of [8 x i8].
395///
396/// \headerfile <x86intrin.h>
397///
398/// This intrinsic corresponds to the <c> PADDSB </c> instruction.
399///
400/// \param __m1
401/// A 64-bit integer vector of [8 x i8].
402/// \param __m2
403/// A 64-bit integer vector of [8 x i8].
404/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
405/// of both parameters.
406static __inline__ __m64 __DEFAULT_FN_ATTRS
407_mm_adds_pi8(__m64 __m1, __m64 __m2)
408{
409 return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
410}
411
412/// Adds, with saturation, each 16-bit signed integer element of the first
413/// 64-bit integer vector of [4 x i16] to the corresponding 16-bit signed
414/// integer element of the second 64-bit integer vector of [4 x i16].
415///
416/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
417/// less than 0x8000 are saturated to 0x8000. The results are packed into a
418/// 64-bit integer vector of [4 x i16].
419///
420/// \headerfile <x86intrin.h>
421///
422/// This intrinsic corresponds to the <c> PADDSW </c> instruction.
423///
424/// \param __m1
425/// A 64-bit integer vector of [4 x i16].
426/// \param __m2
427/// A 64-bit integer vector of [4 x i16].
428/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
429/// of both parameters.
430static __inline__ __m64 __DEFAULT_FN_ATTRS
431_mm_adds_pi16(__m64 __m1, __m64 __m2)
432{
433 return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
434}
435
436/// Adds, with saturation, each 8-bit unsigned integer element of the first
437/// 64-bit integer vector of [8 x i8] to the corresponding 8-bit unsigned
438/// integer element of the second 64-bit integer vector of [8 x i8].
439///
440/// Sums greater than 0xFF are saturated to 0xFF. The results are packed
441/// into a 64-bit integer vector of [8 x i8].
442///
443/// \headerfile <x86intrin.h>
444///
445/// This intrinsic corresponds to the <c> PADDUSB </c> instruction.
446///
447/// \param __m1
448/// A 64-bit integer vector of [8 x i8].
449/// \param __m2
450/// A 64-bit integer vector of [8 x i8].
451/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
452/// unsigned sums of both parameters.
453static __inline__ __m64 __DEFAULT_FN_ATTRS
454_mm_adds_pu8(__m64 __m1, __m64 __m2)
455{
456 return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
457}
458
459/// Adds, with saturation, each 16-bit unsigned integer element of the first
460/// 64-bit integer vector of [4 x i16] to the corresponding 16-bit unsigned
461/// integer element of the second 64-bit integer vector of [4 x i16].
462///
463/// Sums greater than 0xFFFF are saturated to 0xFFFF. The results are packed
464/// into a 64-bit integer vector of [4 x i16].
465///
466/// \headerfile <x86intrin.h>
467///
468/// This intrinsic corresponds to the <c> PADDUSW </c> instruction.
469///
470/// \param __m1
471/// A 64-bit integer vector of [4 x i16].
472/// \param __m2
473/// A 64-bit integer vector of [4 x i16].
474/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
475/// unsigned sums of both parameters.
476static __inline__ __m64 __DEFAULT_FN_ATTRS
477_mm_adds_pu16(__m64 __m1, __m64 __m2)
478{
479 return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
480}
481
482/// Subtracts each 8-bit integer element of the second 64-bit integer
483/// vector of [8 x i8] from the corresponding 8-bit integer element of the
484/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
485/// are packed into a 64-bit integer vector of [8 x i8].
486///
487/// \headerfile <x86intrin.h>
488///
489/// This intrinsic corresponds to the <c> PSUBB </c> instruction.
490///
491/// \param __m1
492/// A 64-bit integer vector of [8 x i8] containing the minuends.
493/// \param __m2
494/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
495/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
496/// both parameters.
497static __inline__ __m64 __DEFAULT_FN_ATTRS
498_mm_sub_pi8(__m64 __m1, __m64 __m2)
499{
500 return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
501}
502
503/// Subtracts each 16-bit integer element of the second 64-bit integer
504/// vector of [4 x i16] from the corresponding 16-bit integer element of the
505/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
506/// results are packed into a 64-bit integer vector of [4 x i16].
507///
508/// \headerfile <x86intrin.h>
509///
510/// This intrinsic corresponds to the <c> PSUBW </c> instruction.
511///
512/// \param __m1
513/// A 64-bit integer vector of [4 x i16] containing the minuends.
514/// \param __m2
515/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
516/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
517/// both parameters.
518static __inline__ __m64 __DEFAULT_FN_ATTRS
519_mm_sub_pi16(__m64 __m1, __m64 __m2)
520{
521 return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
522}
523
524/// Subtracts each 32-bit integer element of the second 64-bit integer
525/// vector of [2 x i32] from the corresponding 32-bit integer element of the
526/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
527/// results are packed into a 64-bit integer vector of [2 x i32].
528///
529/// \headerfile <x86intrin.h>
530///
531/// This intrinsic corresponds to the <c> PSUBD </c> instruction.
532///
533/// \param __m1
534/// A 64-bit integer vector of [2 x i32] containing the minuends.
535/// \param __m2
536/// A 64-bit integer vector of [2 x i32] containing the subtrahends.
537/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
538/// both parameters.
539static __inline__ __m64 __DEFAULT_FN_ATTRS
540_mm_sub_pi32(__m64 __m1, __m64 __m2)
541{
542 return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
543}
544
545/// Subtracts, with saturation, each 8-bit signed integer element of the second
546/// 64-bit integer vector of [8 x i8] from the corresponding 8-bit signed
547/// integer element of the first 64-bit integer vector of [8 x i8].
548///
549/// Positive results greater than 0x7F are saturated to 0x7F. Negative
550/// results less than 0x80 are saturated to 0x80. The results are packed
551/// into a 64-bit integer vector of [8 x i8].
552///
553/// \headerfile <x86intrin.h>
554///
555/// This intrinsic corresponds to the <c> PSUBSB </c> instruction.
556///
557/// \param __m1
558/// A 64-bit integer vector of [8 x i8] containing the minuends.
559/// \param __m2
560/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
561/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
562/// differences of both parameters.
563static __inline__ __m64 __DEFAULT_FN_ATTRS
564_mm_subs_pi8(__m64 __m1, __m64 __m2)
565{
566 return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
567}
568
569/// Subtracts, with saturation, each 16-bit signed integer element of the
570/// second 64-bit integer vector of [4 x i16] from the corresponding 16-bit
571/// signed integer element of the first 64-bit integer vector of [4 x i16].
572///
573/// Positive results greater than 0x7FFF are saturated to 0x7FFF. Negative
574/// results less than 0x8000 are saturated to 0x8000. The results are packed
575/// into a 64-bit integer vector of [4 x i16].
576///
577/// \headerfile <x86intrin.h>
578///
579/// This intrinsic corresponds to the <c> PSUBSW </c> instruction.
580///
581/// \param __m1
582/// A 64-bit integer vector of [4 x i16] containing the minuends.
583/// \param __m2
584/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
585/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
586/// differences of both parameters.
587static __inline__ __m64 __DEFAULT_FN_ATTRS
588_mm_subs_pi16(__m64 __m1, __m64 __m2)
589{
590 return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
591}
592
593/// Subtracts each 8-bit unsigned integer element of the second 64-bit
594/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
595/// element of the first 64-bit integer vector of [8 x i8].
596///
597/// If an element of the first vector is less than the corresponding element
598/// of the second vector, the result is saturated to 0. The results are
599/// packed into a 64-bit integer vector of [8 x i8].
600///
601/// \headerfile <x86intrin.h>
602///
603/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.
604///
605/// \param __m1
606/// A 64-bit integer vector of [8 x i8] containing the minuends.
607/// \param __m2
608/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
609/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
610/// differences of both parameters.
611static __inline__ __m64 __DEFAULT_FN_ATTRS
612_mm_subs_pu8(__m64 __m1, __m64 __m2)
613{
614 return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
615}
616
617/// Subtracts each 16-bit unsigned integer element of the second 64-bit
618/// integer vector of [4 x i16] from the corresponding 16-bit unsigned
619/// integer element of the first 64-bit integer vector of [4 x i16].
620///
621/// If an element of the first vector is less than the corresponding element
622/// of the second vector, the result is saturated to 0. The results are
623/// packed into a 64-bit integer vector of [4 x i16].
624///
625/// \headerfile <x86intrin.h>
626///
627/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.
628///
629/// \param __m1
630/// A 64-bit integer vector of [4 x i16] containing the minuends.
631/// \param __m2
632/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
633/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
634/// differences of both parameters.
635static __inline__ __m64 __DEFAULT_FN_ATTRS
636_mm_subs_pu16(__m64 __m1, __m64 __m2)
637{
638 return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
639}
640
641/// Multiplies each 16-bit signed integer element of the first 64-bit
642/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
643/// element of the second 64-bit integer vector of [4 x i16] and get four
644/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
645/// The lower 32 bits of these two sums are packed into a 64-bit integer
646/// vector of [2 x i32].
647///
648/// For example, bits [15:0] of both parameters are multiplied, bits [31:16]
649/// of both parameters are multiplied, and the sum of both results is written
650/// to bits [31:0] of the result.
651///
652/// \headerfile <x86intrin.h>
653///
654/// This intrinsic corresponds to the <c> PMADDWD </c> instruction.
655///
656/// \param __m1
657/// A 64-bit integer vector of [4 x i16].
658/// \param __m2
659/// A 64-bit integer vector of [4 x i16].
660/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
661/// products of both parameters.
662static __inline__ __m64 __DEFAULT_FN_ATTRS
663_mm_madd_pi16(__m64 __m1, __m64 __m2)
664{
665 return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
666}
667
668/// Multiplies each 16-bit signed integer element of the first 64-bit
669/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
670/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper
671/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
672///
673/// \headerfile <x86intrin.h>
674///
675/// This intrinsic corresponds to the <c> PMULHW </c> instruction.
676///
677/// \param __m1
678/// A 64-bit integer vector of [4 x i16].
679/// \param __m2
680/// A 64-bit integer vector of [4 x i16].
681/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
682/// of the products of both parameters.
683static __inline__ __m64 __DEFAULT_FN_ATTRS
684_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
685{
686 return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
687}
688
689/// Multiplies each 16-bit signed integer element of the first 64-bit
690/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
691/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower
692/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
693///
694/// \headerfile <x86intrin.h>
695///
696/// This intrinsic corresponds to the <c> PMULLW </c> instruction.
697///
698/// \param __m1
699/// A 64-bit integer vector of [4 x i16].
700/// \param __m2
701/// A 64-bit integer vector of [4 x i16].
702/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
703/// of the products of both parameters.
704static __inline__ __m64 __DEFAULT_FN_ATTRS
705_mm_mullo_pi16(__m64 __m1, __m64 __m2)
706{
707 return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
708}
709
710/// Left-shifts each 16-bit signed integer element of the first
711/// parameter, which is a 64-bit integer vector of [4 x i16], by the number
712/// of bits specified by the second parameter, which is a 64-bit integer. The
713/// lower 16 bits of the results are packed into a 64-bit integer vector of
714/// [4 x i16].
715///
716/// \headerfile <x86intrin.h>
717///
718/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
719///
720/// \param __m
721/// A 64-bit integer vector of [4 x i16].
722/// \param __count
723/// A 64-bit integer vector interpreted as a single 64-bit integer.
724/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
725/// values. If \a __count is greater or equal to 16, the result is set to all
726/// 0.
727static __inline__ __m64 __DEFAULT_FN_ATTRS
728_mm_sll_pi16(__m64 __m, __m64 __count)
729{
730 return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
731}
732
733/// Left-shifts each 16-bit signed integer element of a 64-bit integer
734/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.
735/// The lower 16 bits of the results are packed into a 64-bit integer vector
736/// of [4 x i16].
737///
738/// \headerfile <x86intrin.h>
739///
740/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
741///
742/// \param __m
743/// A 64-bit integer vector of [4 x i16].
744/// \param __count
745/// A 32-bit integer value.
746/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
747/// values. If \a __count is greater or equal to 16, the result is set to all
748/// 0.
749static __inline__ __m64 __DEFAULT_FN_ATTRS
750_mm_slli_pi16(__m64 __m, int __count)
751{
752 return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
753}
754
755/// Left-shifts each 32-bit signed integer element of the first
756/// parameter, which is a 64-bit integer vector of [2 x i32], by the number
757/// of bits specified by the second parameter, which is a 64-bit integer. The
758/// lower 32 bits of the results are packed into a 64-bit integer vector of
759/// [2 x i32].
760///
761/// \headerfile <x86intrin.h>
762///
763/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
764///
765/// \param __m
766/// A 64-bit integer vector of [2 x i32].
767/// \param __count
768/// A 64-bit integer vector interpreted as a single 64-bit integer.
769/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
770/// values. If \a __count is greater or equal to 32, the result is set to all
771/// 0.
772static __inline__ __m64 __DEFAULT_FN_ATTRS
773_mm_sll_pi32(__m64 __m, __m64 __count)
774{
775 return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
776}
777
778/// Left-shifts each 32-bit signed integer element of a 64-bit integer
779/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.
780/// The lower 32 bits of the results are packed into a 64-bit integer vector
781/// of [2 x i32].
782///
783/// \headerfile <x86intrin.h>
784///
785/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
786///
787/// \param __m
788/// A 64-bit integer vector of [2 x i32].
789/// \param __count
790/// A 32-bit integer value.
791/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
792/// values. If \a __count is greater or equal to 32, the result is set to all
793/// 0.
794static __inline__ __m64 __DEFAULT_FN_ATTRS
795_mm_slli_pi32(__m64 __m, int __count)
796{
797 return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
798}
799
800/// Left-shifts the first 64-bit integer parameter by the number of bits
801/// specified by the second 64-bit integer parameter. The lower 64 bits of
802/// result are returned.
803///
804/// \headerfile <x86intrin.h>
805///
806/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
807///
808/// \param __m
809/// A 64-bit integer vector interpreted as a single 64-bit integer.
810/// \param __count
811/// A 64-bit integer vector interpreted as a single 64-bit integer.
812/// \returns A 64-bit integer vector containing the left-shifted value. If
813/// \a __count is greater or equal to 64, the result is set to 0.
814static __inline__ __m64 __DEFAULT_FN_ATTRS
815_mm_sll_si64(__m64 __m, __m64 __count)
816{
817 return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
818}
819
820/// Left-shifts the first parameter, which is a 64-bit integer, by the
821/// number of bits specified by the second parameter, which is a 32-bit
822/// integer. The lower 64 bits of result are returned.
823///
824/// \headerfile <x86intrin.h>
825///
826/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
827///
828/// \param __m
829/// A 64-bit integer vector interpreted as a single 64-bit integer.
830/// \param __count
831/// A 32-bit integer value.
832/// \returns A 64-bit integer vector containing the left-shifted value. If
833/// \a __count is greater or equal to 64, the result is set to 0.
834static __inline__ __m64 __DEFAULT_FN_ATTRS
835_mm_slli_si64(__m64 __m, int __count)
836{
837 return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
838}
839
840/// Right-shifts each 16-bit integer element of the first parameter,
841/// which is a 64-bit integer vector of [4 x i16], by the number of bits
842/// specified by the second parameter, which is a 64-bit integer.
843///
844/// High-order bits are filled with the sign bit of the initial value of each
845/// 16-bit element. The 16-bit results are packed into a 64-bit integer
846/// vector of [4 x i16].
847///
848/// \headerfile <x86intrin.h>
849///
850/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
851///
852/// \param __m
853/// A 64-bit integer vector of [4 x i16].
854/// \param __count
855/// A 64-bit integer vector interpreted as a single 64-bit integer.
856/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
857/// values.
858static __inline__ __m64 __DEFAULT_FN_ATTRS
859_mm_sra_pi16(__m64 __m, __m64 __count)
860{
861 return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
862}
863
864/// Right-shifts each 16-bit integer element of a 64-bit integer vector
865/// of [4 x i16] by the number of bits specified by a 32-bit integer.
866///
867/// High-order bits are filled with the sign bit of the initial value of each
868/// 16-bit element. The 16-bit results are packed into a 64-bit integer
869/// vector of [4 x i16].
870///
871/// \headerfile <x86intrin.h>
872///
873/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
874///
875/// \param __m
876/// A 64-bit integer vector of [4 x i16].
877/// \param __count
878/// A 32-bit integer value.
879/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
880/// values.
881static __inline__ __m64 __DEFAULT_FN_ATTRS
882_mm_srai_pi16(__m64 __m, int __count)
883{
884 return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
885}
886
887/// Right-shifts each 32-bit integer element of the first parameter,
888/// which is a 64-bit integer vector of [2 x i32], by the number of bits
889/// specified by the second parameter, which is a 64-bit integer.
890///
891/// High-order bits are filled with the sign bit of the initial value of each
892/// 32-bit element. The 32-bit results are packed into a 64-bit integer
893/// vector of [2 x i32].
894///
895/// \headerfile <x86intrin.h>
896///
897/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
898///
899/// \param __m
900/// A 64-bit integer vector of [2 x i32].
901/// \param __count
902/// A 64-bit integer vector interpreted as a single 64-bit integer.
903/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
904/// values.
905static __inline__ __m64 __DEFAULT_FN_ATTRS
906_mm_sra_pi32(__m64 __m, __m64 __count)
907{
908 return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
909}
910
911/// Right-shifts each 32-bit integer element of a 64-bit integer vector
912/// of [2 x i32] by the number of bits specified by a 32-bit integer.
913///
914/// High-order bits are filled with the sign bit of the initial value of each
915/// 32-bit element. The 32-bit results are packed into a 64-bit integer
916/// vector of [2 x i32].
917///
918/// \headerfile <x86intrin.h>
919///
920/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
921///
922/// \param __m
923/// A 64-bit integer vector of [2 x i32].
924/// \param __count
925/// A 32-bit integer value.
926/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
927/// values.
928static __inline__ __m64 __DEFAULT_FN_ATTRS
929_mm_srai_pi32(__m64 __m, int __count)
930{
931 return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
932}
933
934/// Right-shifts each 16-bit integer element of the first parameter,
935/// which is a 64-bit integer vector of [4 x i16], by the number of bits
936/// specified by the second parameter, which is a 64-bit integer.
937///
938/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
939/// integer vector of [4 x i16].
940///
941/// \headerfile <x86intrin.h>
942///
943/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
944///
945/// \param __m
946/// A 64-bit integer vector of [4 x i16].
947/// \param __count
948/// A 64-bit integer vector interpreted as a single 64-bit integer.
949/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
950/// values.
951static __inline__ __m64 __DEFAULT_FN_ATTRS
952_mm_srl_pi16(__m64 __m, __m64 __count)
953{
954 return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
955}
956
957/// Right-shifts each 16-bit integer element of a 64-bit integer vector
958/// of [4 x i16] by the number of bits specified by a 32-bit integer.
959///
960/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
961/// integer vector of [4 x i16].
962///
963/// \headerfile <x86intrin.h>
964///
965/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
966///
967/// \param __m
968/// A 64-bit integer vector of [4 x i16].
969/// \param __count
970/// A 32-bit integer value.
971/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
972/// values.
973static __inline__ __m64 __DEFAULT_FN_ATTRS
974_mm_srli_pi16(__m64 __m, int __count)
975{
976 return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
977}
978
979/// Right-shifts each 32-bit integer element of the first parameter,
980/// which is a 64-bit integer vector of [2 x i32], by the number of bits
981/// specified by the second parameter, which is a 64-bit integer.
982///
983/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
984/// integer vector of [2 x i32].
985///
986/// \headerfile <x86intrin.h>
987///
988/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
989///
990/// \param __m
991/// A 64-bit integer vector of [2 x i32].
992/// \param __count
993/// A 64-bit integer vector interpreted as a single 64-bit integer.
994/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
995/// values.
996static __inline__ __m64 __DEFAULT_FN_ATTRS
997_mm_srl_pi32(__m64 __m, __m64 __count)
998{
999 return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
1000}
1001
1002/// Right-shifts each 32-bit integer element of a 64-bit integer vector
1003/// of [2 x i32] by the number of bits specified by a 32-bit integer.
1004///
1005/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
1006/// integer vector of [2 x i32].
1007///
1008/// \headerfile <x86intrin.h>
1009///
1010/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
1011///
1012/// \param __m
1013/// A 64-bit integer vector of [2 x i32].
1014/// \param __count
1015/// A 32-bit integer value.
1016/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1017/// values.
1018static __inline__ __m64 __DEFAULT_FN_ATTRS
1019_mm_srli_pi32(__m64 __m, int __count)
1020{
1021 return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
1022}
1023
1024/// Right-shifts the first 64-bit integer parameter by the number of bits
1025/// specified by the second 64-bit integer parameter.
1026///
1027/// High-order bits are cleared.
1028///
1029/// \headerfile <x86intrin.h>
1030///
1031/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1032///
1033/// \param __m
1034/// A 64-bit integer vector interpreted as a single 64-bit integer.
1035/// \param __count
1036/// A 64-bit integer vector interpreted as a single 64-bit integer.
1037/// \returns A 64-bit integer vector containing the right-shifted value.
1038static __inline__ __m64 __DEFAULT_FN_ATTRS
1039_mm_srl_si64(__m64 __m, __m64 __count)
1040{
1041 return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
1042}
1043
1044/// Right-shifts the first parameter, which is a 64-bit integer, by the
1045/// number of bits specified by the second parameter, which is a 32-bit
1046/// integer.
1047///
1048/// High-order bits are cleared.
1049///
1050/// \headerfile <x86intrin.h>
1051///
1052/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1053///
1054/// \param __m
1055/// A 64-bit integer vector interpreted as a single 64-bit integer.
1056/// \param __count
1057/// A 32-bit integer value.
1058/// \returns A 64-bit integer vector containing the right-shifted value.
1059static __inline__ __m64 __DEFAULT_FN_ATTRS
1060_mm_srli_si64(__m64 __m, int __count)
1061{
1062 return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
1063}
1064
1065/// Performs a bitwise AND of two 64-bit integer vectors.
1066///
1067/// \headerfile <x86intrin.h>
1068///
1069/// This intrinsic corresponds to the <c> PAND </c> instruction.
1070///
1071/// \param __m1
1072/// A 64-bit integer vector.
1073/// \param __m2
1074/// A 64-bit integer vector.
1075/// \returns A 64-bit integer vector containing the bitwise AND of both
1076/// parameters.
1077static __inline__ __m64 __DEFAULT_FN_ATTRS
1078_mm_and_si64(__m64 __m1, __m64 __m2)
1079{
1080 return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
1081}
1082
1083/// Performs a bitwise NOT of the first 64-bit integer vector, and then
1084/// performs a bitwise AND of the intermediate result and the second 64-bit
1085/// integer vector.
1086///
1087/// \headerfile <x86intrin.h>
1088///
1089/// This intrinsic corresponds to the <c> PANDN </c> instruction.
1090///
1091/// \param __m1
1092/// A 64-bit integer vector. The one's complement of this parameter is used
1093/// in the bitwise AND.
1094/// \param __m2
1095/// A 64-bit integer vector.
1096/// \returns A 64-bit integer vector containing the bitwise AND of the second
1097/// parameter and the one's complement of the first parameter.
1098static __inline__ __m64 __DEFAULT_FN_ATTRS
1099_mm_andnot_si64(__m64 __m1, __m64 __m2)
1100{
1101 return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
1102}
1103
1104/// Performs a bitwise OR of two 64-bit integer vectors.
1105///
1106/// \headerfile <x86intrin.h>
1107///
1108/// This intrinsic corresponds to the <c> POR </c> instruction.
1109///
1110/// \param __m1
1111/// A 64-bit integer vector.
1112/// \param __m2
1113/// A 64-bit integer vector.
1114/// \returns A 64-bit integer vector containing the bitwise OR of both
1115/// parameters.
1116static __inline__ __m64 __DEFAULT_FN_ATTRS
1117_mm_or_si64(__m64 __m1, __m64 __m2)
1118{
1119 return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
1120}
1121
1122/// Performs a bitwise exclusive OR of two 64-bit integer vectors.
1123///
1124/// \headerfile <x86intrin.h>
1125///
1126/// This intrinsic corresponds to the <c> PXOR </c> instruction.
1127///
1128/// \param __m1
1129/// A 64-bit integer vector.
1130/// \param __m2
1131/// A 64-bit integer vector.
1132/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
1133/// parameters.
1134static __inline__ __m64 __DEFAULT_FN_ATTRS
1135_mm_xor_si64(__m64 __m1, __m64 __m2)
1136{
1137 return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
1138}
1139
1140/// Compares the 8-bit integer elements of two 64-bit integer vectors of
1141/// [8 x i8] to determine if the element of the first vector is equal to the
1142/// corresponding element of the second vector.
1143///
1144/// The comparison yields 0 for false, 0xFF for true.
1145///
1146/// \headerfile <x86intrin.h>
1147///
1148/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.
1149///
1150/// \param __m1
1151/// A 64-bit integer vector of [8 x i8].
1152/// \param __m2
1153/// A 64-bit integer vector of [8 x i8].
1154/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1155/// results.
1156static __inline__ __m64 __DEFAULT_FN_ATTRS
1157_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
1158{
1159 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
1160}
1161
1162/// Compares the 16-bit integer elements of two 64-bit integer vectors of
1163/// [4 x i16] to determine if the element of the first vector is equal to the
1164/// corresponding element of the second vector.
1165///
1166/// The comparison yields 0 for false, 0xFFFF for true.
1167///
1168/// \headerfile <x86intrin.h>
1169///
1170/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.
1171///
1172/// \param __m1
1173/// A 64-bit integer vector of [4 x i16].
1174/// \param __m2
1175/// A 64-bit integer vector of [4 x i16].
1176/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1177/// results.
1178static __inline__ __m64 __DEFAULT_FN_ATTRS
1179_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
1180{
1181 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
1182}
1183
1184/// Compares the 32-bit integer elements of two 64-bit integer vectors of
1185/// [2 x i32] to determine if the element of the first vector is equal to the
1186/// corresponding element of the second vector.
1187///
1188/// The comparison yields 0 for false, 0xFFFFFFFF for true.
1189///
1190/// \headerfile <x86intrin.h>
1191///
1192/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.
1193///
1194/// \param __m1
1195/// A 64-bit integer vector of [2 x i32].
1196/// \param __m2
1197/// A 64-bit integer vector of [2 x i32].
1198/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1199/// results.
1200static __inline__ __m64 __DEFAULT_FN_ATTRS
1201_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
1202{
1203 return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
1204}
1205
1206/// Compares the 8-bit integer elements of two 64-bit integer vectors of
1207/// [8 x i8] to determine if the element of the first vector is greater than
1208/// the corresponding element of the second vector.
1209///
1210/// The comparison yields 0 for false, 0xFF for true.
1211///
1212/// \headerfile <x86intrin.h>
1213///
1214/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.
1215///
1216/// \param __m1
1217/// A 64-bit integer vector of [8 x i8].
1218/// \param __m2
1219/// A 64-bit integer vector of [8 x i8].
1220/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1221/// results.
1222static __inline__ __m64 __DEFAULT_FN_ATTRS
1223_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
1224{
1225 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
1226}
1227
1228/// Compares the 16-bit integer elements of two 64-bit integer vectors of
1229/// [4 x i16] to determine if the element of the first vector is greater than
1230/// the corresponding element of the second vector.
1231///
1232/// The comparison yields 0 for false, 0xFFFF for true.
1233///
1234/// \headerfile <x86intrin.h>
1235///
1236/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.
1237///
1238/// \param __m1
1239/// A 64-bit integer vector of [4 x i16].
1240/// \param __m2
1241/// A 64-bit integer vector of [4 x i16].
1242/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1243/// results.
1244static __inline__ __m64 __DEFAULT_FN_ATTRS
1245_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
1246{
1247 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
1248}
1249
1250/// Compares the 32-bit integer elements of two 64-bit integer vectors of
1251/// [2 x i32] to determine if the element of the first vector is greater than
1252/// the corresponding element of the second vector.
1253///
1254/// The comparison yields 0 for false, 0xFFFFFFFF for true.
1255///
1256/// \headerfile <x86intrin.h>
1257///
1258/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.
1259///
1260/// \param __m1
1261/// A 64-bit integer vector of [2 x i32].
1262/// \param __m2
1263/// A 64-bit integer vector of [2 x i32].
1264/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1265/// results.
1266static __inline__ __m64 __DEFAULT_FN_ATTRS
1267_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
1268{
1269 return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
1270}
1271
1272/// Constructs a 64-bit integer vector initialized to zero.
1273///
1274/// \headerfile <x86intrin.h>
1275///
1276/// This intrinsic corresponds to the <c> PXOR </c> instruction.
1277///
1278/// \returns An initialized 64-bit integer vector with all elements set to zero.
1279static __inline__ __m64 __DEFAULT_FN_ATTRS
1281{
1282 return __extension__ (__m64){ 0LL };
1283}
1284
1285/// Constructs a 64-bit integer vector initialized with the specified
1286/// 32-bit integer values.
1287///
1288/// \headerfile <x86intrin.h>
1289///
1290/// This intrinsic is a utility function and does not correspond to a specific
1291/// instruction.
1292///
1293/// \param __i1
1294/// A 32-bit integer value used to initialize the upper 32 bits of the
1295/// result.
1296/// \param __i0
1297/// A 32-bit integer value used to initialize the lower 32 bits of the
1298/// result.
1299/// \returns An initialized 64-bit integer vector.
1300static __inline__ __m64 __DEFAULT_FN_ATTRS
1301_mm_set_pi32(int __i1, int __i0)
1302{
1303 return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
1304}
1305
1306/// Constructs a 64-bit integer vector initialized with the specified
1307/// 16-bit integer values.
1308///
1309/// \headerfile <x86intrin.h>
1310///
1311/// This intrinsic is a utility function and does not correspond to a specific
1312/// instruction.
1313///
1314/// \param __s3
1315/// A 16-bit integer value used to initialize bits [63:48] of the result.
1316/// \param __s2
1317/// A 16-bit integer value used to initialize bits [47:32] of the result.
1318/// \param __s1
1319/// A 16-bit integer value used to initialize bits [31:16] of the result.
1320/// \param __s0
1321/// A 16-bit integer value used to initialize bits [15:0] of the result.
1322/// \returns An initialized 64-bit integer vector.
1323static __inline__ __m64 __DEFAULT_FN_ATTRS
1324_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
1325{
1326 return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
1327}
1328
1329/// Constructs a 64-bit integer vector initialized with the specified
1330/// 8-bit integer values.
1331///
1332/// \headerfile <x86intrin.h>
1333///
1334/// This intrinsic is a utility function and does not correspond to a specific
1335/// instruction.
1336///
1337/// \param __b7
1338/// An 8-bit integer value used to initialize bits [63:56] of the result.
1339/// \param __b6
1340/// An 8-bit integer value used to initialize bits [55:48] of the result.
1341/// \param __b5
1342/// An 8-bit integer value used to initialize bits [47:40] of the result.
1343/// \param __b4
1344/// An 8-bit integer value used to initialize bits [39:32] of the result.
1345/// \param __b3
1346/// An 8-bit integer value used to initialize bits [31:24] of the result.
1347/// \param __b2
1348/// An 8-bit integer value used to initialize bits [23:16] of the result.
1349/// \param __b1
1350/// An 8-bit integer value used to initialize bits [15:8] of the result.
1351/// \param __b0
1352/// An 8-bit integer value used to initialize bits [7:0] of the result.
1353/// \returns An initialized 64-bit integer vector.
1354static __inline__ __m64 __DEFAULT_FN_ATTRS
1355_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
1356 char __b1, char __b0)
1357{
1358 return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
1359 __b4, __b5, __b6, __b7);
1360}
1361
1362/// Constructs a 64-bit integer vector of [2 x i32], with each of the
1363/// 32-bit integer vector elements set to the specified 32-bit integer
1364/// value.
1365///
1366/// \headerfile <x86intrin.h>
1367///
1368/// This intrinsic is a utility function and does not correspond to a specific
1369/// instruction.
1370///
1371/// \param __i
1372/// A 32-bit integer value used to initialize each vector element of the
1373/// result.
1374/// \returns An initialized 64-bit integer vector of [2 x i32].
1375static __inline__ __m64 __DEFAULT_FN_ATTRS
1377{
1378 return _mm_set_pi32(__i, __i);
1379}
1380
1381/// Constructs a 64-bit integer vector of [4 x i16], with each of the
1382/// 16-bit integer vector elements set to the specified 16-bit integer
1383/// value.
1384///
1385/// \headerfile <x86intrin.h>
1386///
1387/// This intrinsic is a utility function and does not correspond to a specific
1388/// instruction.
1389///
1390/// \param __w
1391/// A 16-bit integer value used to initialize each vector element of the
1392/// result.
1393/// \returns An initialized 64-bit integer vector of [4 x i16].
1394static __inline__ __m64 __DEFAULT_FN_ATTRS
1396{
1397 return _mm_set_pi16(__w, __w, __w, __w);
1398}
1399
1400/// Constructs a 64-bit integer vector of [8 x i8], with each of the
1401/// 8-bit integer vector elements set to the specified 8-bit integer value.
1402///
1403/// \headerfile <x86intrin.h>
1404///
1405/// This intrinsic is a utility function and does not correspond to a specific
1406/// instruction.
1407///
1408/// \param __b
1409/// An 8-bit integer value used to initialize each vector element of the
1410/// result.
1411/// \returns An initialized 64-bit integer vector of [8 x i8].
1412static __inline__ __m64 __DEFAULT_FN_ATTRS
1414{
1415 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
1416}
1417
1418/// Constructs a 64-bit integer vector, initialized in reverse order with
1419/// the specified 32-bit integer values.
1420///
1421/// \headerfile <x86intrin.h>
1422///
1423/// This intrinsic is a utility function and does not correspond to a specific
1424/// instruction.
1425///
1426/// \param __i0
1427/// A 32-bit integer value used to initialize the lower 32 bits of the
1428/// result.
1429/// \param __i1
1430/// A 32-bit integer value used to initialize the upper 32 bits of the
1431/// result.
1432/// \returns An initialized 64-bit integer vector.
1433static __inline__ __m64 __DEFAULT_FN_ATTRS
1434_mm_setr_pi32(int __i0, int __i1)
1435{
1436 return _mm_set_pi32(__i1, __i0);
1437}
1438
1439/// Constructs a 64-bit integer vector, initialized in reverse order with
1440/// the specified 16-bit integer values.
1441///
1442/// \headerfile <x86intrin.h>
1443///
1444/// This intrinsic is a utility function and does not correspond to a specific
1445/// instruction.
1446///
1447/// \param __w0
1448/// A 16-bit integer value used to initialize bits [15:0] of the result.
1449/// \param __w1
1450/// A 16-bit integer value used to initialize bits [31:16] of the result.
1451/// \param __w2
1452/// A 16-bit integer value used to initialize bits [47:32] of the result.
1453/// \param __w3
1454/// A 16-bit integer value used to initialize bits [63:48] of the result.
1455/// \returns An initialized 64-bit integer vector.
1456static __inline__ __m64 __DEFAULT_FN_ATTRS
1457_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
1458{
1459 return _mm_set_pi16(__w3, __w2, __w1, __w0);
1460}
1461
1462/// Constructs a 64-bit integer vector, initialized in reverse order with
1463/// the specified 8-bit integer values.
1464///
1465/// \headerfile <x86intrin.h>
1466///
1467/// This intrinsic is a utility function and does not correspond to a specific
1468/// instruction.
1469///
1470/// \param __b0
1471/// An 8-bit integer value used to initialize bits [7:0] of the result.
1472/// \param __b1
1473/// An 8-bit integer value used to initialize bits [15:8] of the result.
1474/// \param __b2
1475/// An 8-bit integer value used to initialize bits [23:16] of the result.
1476/// \param __b3
1477/// An 8-bit integer value used to initialize bits [31:24] of the result.
1478/// \param __b4
1479/// An 8-bit integer value used to initialize bits [39:32] of the result.
1480/// \param __b5
1481/// An 8-bit integer value used to initialize bits [47:40] of the result.
1482/// \param __b6
1483/// An 8-bit integer value used to initialize bits [55:48] of the result.
1484/// \param __b7
1485/// An 8-bit integer value used to initialize bits [63:56] of the result.
1486/// \returns An initialized 64-bit integer vector.
1487static __inline__ __m64 __DEFAULT_FN_ATTRS
1488_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
1489 char __b6, char __b7)
1490{
1491 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
1492}
1493
1494#undef __DEFAULT_FN_ATTRS
1495
1496/* Aliases for compatibility. */
1497#define _m_empty _mm_empty
1498#define _m_from_int _mm_cvtsi32_si64
1499#define _m_from_int64 _mm_cvtsi64_m64
1500#define _m_to_int _mm_cvtsi64_si32
1501#define _m_to_int64 _mm_cvtm64_si64
1502#define _m_packsswb _mm_packs_pi16
1503#define _m_packssdw _mm_packs_pi32
1504#define _m_packuswb _mm_packs_pu16
1505#define _m_punpckhbw _mm_unpackhi_pi8
1506#define _m_punpckhwd _mm_unpackhi_pi16
1507#define _m_punpckhdq _mm_unpackhi_pi32
1508#define _m_punpcklbw _mm_unpacklo_pi8
1509#define _m_punpcklwd _mm_unpacklo_pi16
1510#define _m_punpckldq _mm_unpacklo_pi32
1511#define _m_paddb _mm_add_pi8
1512#define _m_paddw _mm_add_pi16
1513#define _m_paddd _mm_add_pi32
1514#define _m_paddsb _mm_adds_pi8
1515#define _m_paddsw _mm_adds_pi16
1516#define _m_paddusb _mm_adds_pu8
1517#define _m_paddusw _mm_adds_pu16
1518#define _m_psubb _mm_sub_pi8
1519#define _m_psubw _mm_sub_pi16
1520#define _m_psubd _mm_sub_pi32
1521#define _m_psubsb _mm_subs_pi8
1522#define _m_psubsw _mm_subs_pi16
1523#define _m_psubusb _mm_subs_pu8
1524#define _m_psubusw _mm_subs_pu16
1525#define _m_pmaddwd _mm_madd_pi16
1526#define _m_pmulhw _mm_mulhi_pi16
1527#define _m_pmullw _mm_mullo_pi16
1528#define _m_psllw _mm_sll_pi16
1529#define _m_psllwi _mm_slli_pi16
1530#define _m_pslld _mm_sll_pi32
1531#define _m_pslldi _mm_slli_pi32
1532#define _m_psllq _mm_sll_si64
1533#define _m_psllqi _mm_slli_si64
1534#define _m_psraw _mm_sra_pi16
1535#define _m_psrawi _mm_srai_pi16
1536#define _m_psrad _mm_sra_pi32
1537#define _m_psradi _mm_srai_pi32
1538#define _m_psrlw _mm_srl_pi16
1539#define _m_psrlwi _mm_srli_pi16
1540#define _m_psrld _mm_srl_pi32
1541#define _m_psrldi _mm_srli_pi32
1542#define _m_psrlq _mm_srl_si64
1543#define _m_psrlqi _mm_srli_si64
1544#define _m_pand _mm_and_si64
1545#define _m_pandn _mm_andnot_si64
1546#define _m_por _mm_or_si64
1547#define _m_pxor _mm_xor_si64
1548#define _m_pcmpeqb _mm_cmpeq_pi8
1549#define _m_pcmpeqw _mm_cmpeq_pi16
1550#define _m_pcmpeqd _mm_cmpeq_pi32
1551#define _m_pcmpgtb _mm_cmpgt_pi8
1552#define _m_pcmpgtw _mm_cmpgt_pi16
1553#define _m_pcmpgtd _mm_cmpgt_pi32
1554
1555#endif /* __MMINTRIN_H */
1556
static __inline__ vector float vector float __b
Definition: altivec.h:578
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
Definition: mmintrin.h:228
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi16(short __w)
Constructs a 64-bit integer vector of [4 x i16], with each of the 16-bit integer vector elements set ...
Definition: mmintrin.h:1395
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi32(__m64 __m1, __m64 __m2)
Adds each 32-bit integer element of the first 64-bit integer vector of [2 x i32] to the corresponding...
Definition: mmintrin.h:383
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:952
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
Definition: mmintrin.h:929
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 16-bit integer va...
Definition: mmintrin.h:1457
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
Definition: mmintrin.h:205
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 8-bit integer val...
Definition: mmintrin.h:1488
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_madd_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition: mmintrin.h:663
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_si64(__m64 __m, __m64 __count)
Right-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit ...
Definition: mmintrin.h:1039
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit unsigned integer element of the second 64-bit integer vector of [4 x i16] from ...
Definition: mmintrin.h:636
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi8(__m64 __m1, __m64 __m2)
Adds, with saturation, each 8-bit signed integer element of the first 64-bit integer vector of [8 x i...
Definition: mmintrin.h:407
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
Definition: mmintrin.h:276
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi16(__m64 __m1, __m64 __m2)
Subtracts, with saturation, each 16-bit signed integer element of the second 64-bit integer vector of...
Definition: mmintrin.h:588
long long __m64 __attribute__((__vector_size__(8), __aligned__(8)))
Definition: mmintrin.h:17
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
Definition: mmintrin.h:1245
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_si64(__m64 __m, __m64 __count)
Left-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit i...
Definition: mmintrin.h:815
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit integer element of the second 64-bit integer vector of [8 x i8] from the corresp...
Definition: mmintrin.h:498
static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtm64_si64(__m64 __m)
Casts a 64-bit integer vector into a 64-bit signed integer value.
Definition: mmintrin.h:103
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi8(__m64 __m1, __m64 __m2)
Subtracts, with saturation, each 8-bit signed integer element of the second 64-bit integer vector of ...
Definition: mmintrin.h:564
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi16(__m64 __m1, __m64 __m2)
Adds, with saturation, each 16-bit signed integer element of the first 64-bit integer vector of [4 x ...
Definition: mmintrin.h:431
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
Definition: mmintrin.h:1267
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
Definition: mmintrin.h:1223
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pu16(__m64 __m1, __m64 __m2)
Converts, with saturation, 16-bit signed integers from both 64-bit integer vector parameters of [4 x ...
Definition: mmintrin.h:178
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi32_si64(int __i)
Constructs a 64-bit integer vector, setting the lower 32 bits to the value of the 32-bit integer para...
Definition: mmintrin.h:54
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi32(int __i)
Constructs a 64-bit integer vector of [2 x i32], with each of the 32-bit integer vector elements set ...
Definition: mmintrin.h:1376
#define __DEFAULT_FN_ATTRS
Definition: mmintrin.h:25
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi16(__m64 __m, int __count)
Left-shifts each 16-bit signed integer element of a 64-bit integer vector of [4 x i16] by the number ...
Definition: mmintrin.h:750
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi8(char __b)
Constructs a 64-bit integer vector of [8 x i8], with each of the 8-bit integer vector elements set to...
Definition: mmintrin.h:1413
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi32(__m64 __m1, __m64 __m2)
Converts, with saturation, 32-bit signed integers from both 64-bit integer vector parameters of [2 x ...
Definition: mmintrin.h:153
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi16(__m64 __m1, __m64 __m2)
Adds each 16-bit integer element of the first 64-bit integer vector of [4 x i16] to the corresponding...
Definition: mmintrin.h:362
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit unsigned integer element of the second 64-bit integer vector of [8 x i8] from th...
Definition: mmintrin.h:612
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu8(__m64 __m1, __m64 __m2)
Adds, with saturation, each 8-bit unsigned integer element of the first 64-bit integer vector of [8 x...
Definition: mmintrin.h:454
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_or_si64(__m64 __m1, __m64 __m2)
Performs a bitwise OR of two 64-bit integer vectors.
Definition: mmintrin.h:1117
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_si64(__m64 __m, int __count)
Left-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the se...
Definition: mmintrin.h:835
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:906
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:997
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
Definition: mmintrin.h:299
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
Definition: mmintrin.h:1179
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
Definition: mmintrin.h:974
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_si64(__m64 __m, int __count)
Right-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the s...
Definition: mmintrin.h:1060
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
Constructs a 64-bit integer vector initialized with the specified 16-bit integer values.
Definition: mmintrin.h:1324
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
Definition: mmintrin.h:1201
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi32(__m64 __m, __m64 __count)
Left-shifts each 32-bit signed integer element of the first parameter, which is a 64-bit integer vect...
Definition: mmintrin.h:773
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi32(int __i0, int __i1)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 32-bit integer va...
Definition: mmintrin.h:1434
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi8(__m64 __m1, __m64 __m2)
Adds each 8-bit integer element of the first 64-bit integer vector of [8 x i8] to the corresponding 8...
Definition: mmintrin.h:341
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
Definition: mmintrin.h:882
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi32(int __i1, int __i0)
Constructs a 64-bit integer vector initialized with the specified 32-bit integer values.
Definition: mmintrin.h:1301
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu16(__m64 __m1, __m64 __m2)
Adds, with saturation, each 16-bit unsigned integer element of the first 64-bit integer vector of [4 ...
Definition: mmintrin.h:477
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_xor_si64(__m64 __m1, __m64 __m2)
Performs a bitwise exclusive OR of two 64-bit integer vectors.
Definition: mmintrin.h:1135
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi64_m64(long long __i)
Casts a 64-bit signed integer value into a 64-bit integer vector.
Definition: mmintrin.h:87
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
Definition: mmintrin.h:249
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mullo_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition: mmintrin.h:705
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi32(__m64 __m, int __count)
Left-shifts each 32-bit signed integer element of a 64-bit integer vector of [2 x i32] by the number ...
Definition: mmintrin.h:795
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
Definition: mmintrin.h:320
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setzero_si64(void)
Constructs a 64-bit integer vector initialized to zero.
Definition: mmintrin.h:1280
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi16(__m64 __m, __m64 __count)
Left-shifts each 16-bit signed integer element of the first parameter, which is a 64-bit integer vect...
Definition: mmintrin.h:728
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:859
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi32(__m64 __m1, __m64 __m2)
Subtracts each 32-bit integer element of the second 64-bit integer vector of [2 x i32] from the corre...
Definition: mmintrin.h:540
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi64_si32(__m64 __m)
Returns the lower 32 bits of a 64-bit integer vector as a 32-bit signed integer.
Definition: mmintrin.h:71
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
Definition: mmintrin.h:1157
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Constructs a 64-bit integer vector initialized with the specified 8-bit integer values.
Definition: mmintrin.h:1355
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi16(__m64 __m1, __m64 __m2)
Converts, with saturation, 16-bit signed integers from both 64-bit integer vector parameters of [4 x ...
Definition: mmintrin.h:128
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
Definition: mmintrin.h:1019
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition: mmintrin.h:684
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_andnot_si64(__m64 __m1, __m64 __m2)
Performs a bitwise NOT of the first 64-bit integer vector, and then performs a bitwise AND of the int...
Definition: mmintrin.h:1099
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit integer element of the second 64-bit integer vector of [4 x i16] from the corre...
Definition: mmintrin.h:519
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_and_si64(__m64 __m1, __m64 __m2)
Performs a bitwise AND of two 64-bit integer vectors.
Definition: mmintrin.h:1078