clang  6.0.0svn
mmintrin.h
Go to the documentation of this file.
1 /*===---- mmintrin.h - MMX intrinsics --------------------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23 
24 #ifndef __MMINTRIN_H
25 #define __MMINTRIN_H
26 
27 typedef long long __m64 __attribute__((__vector_size__(8)));
28 
29 typedef long long __v1di __attribute__((__vector_size__(8)));
30 typedef int __v2si __attribute__((__vector_size__(8)));
31 typedef short __v4hi __attribute__((__vector_size__(8)));
32 typedef char __v8qi __attribute__((__vector_size__(8)));
33 
34 /* Define the default attributes for the functions in this file. */
35 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
36 
37 /// \brief Clears the MMX state by setting the state of the x87 stack registers
38 /// to empty.
39 ///
40 /// \headerfile <x86intrin.h>
41 ///
42 /// This intrinsic corresponds to the <c> EMMS </c> instruction.
43 ///
44 static __inline__ void __DEFAULT_FN_ATTRS
45 _mm_empty(void)
46 {
47  __builtin_ia32_emms();
48 }
49 
50 /// \brief Constructs a 64-bit integer vector, setting the lower 32 bits to the
51 /// value of the 32-bit integer parameter and setting the upper 32 bits to 0.
52 ///
53 /// \headerfile <x86intrin.h>
54 ///
55 /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
56 ///
57 /// \param __i
58 /// A 32-bit integer value.
59 /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
60 /// parameter. The upper 32 bits are set to 0.
61 static __inline__ __m64 __DEFAULT_FN_ATTRS
63 {
64  return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
65 }
66 
67 /// \brief Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
68 /// signed integer.
69 ///
70 /// \headerfile <x86intrin.h>
71 ///
72 /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
73 ///
74 /// \param __m
75 /// A 64-bit integer vector.
76 /// \returns A 32-bit signed integer value containing the lower 32 bits of the
77 /// parameter.
78 static __inline__ int __DEFAULT_FN_ATTRS
79 _mm_cvtsi64_si32(__m64 __m)
80 {
81  return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
82 }
83 
84 /// \brief Casts a 64-bit signed integer value into a 64-bit integer vector.
85 ///
86 /// \headerfile <x86intrin.h>
87 ///
88 /// This intrinsic corresponds to the <c> VMOVQ / MOVD </c> instruction.
89 ///
90 /// \param __i
91 /// A 64-bit signed integer.
92 /// \returns A 64-bit integer vector containing the same bitwise pattern as the
93 /// parameter.
94 static __inline__ __m64 __DEFAULT_FN_ATTRS
95 _mm_cvtsi64_m64(long long __i)
96 {
97  return (__m64)__i;
98 }
99 
100 /// \brief Casts a 64-bit integer vector into a 64-bit signed integer value.
101 ///
102 /// \headerfile <x86intrin.h>
103 ///
104 /// This intrinsic corresponds to the <c> VMOVQ / MOVD </c> instruction.
105 ///
106 /// \param __m
107 /// A 64-bit integer vector.
108 /// \returns A 64-bit signed integer containing the same bitwise pattern as the
109 /// parameter.
110 static __inline__ long long __DEFAULT_FN_ATTRS
111 _mm_cvtm64_si64(__m64 __m)
112 {
113  return (long long)__m;
114 }
115 
116 /// \brief Converts 16-bit signed integers from both 64-bit integer vector
117 /// parameters of [4 x i16] into 8-bit signed integer values, and constructs
118 /// a 64-bit integer vector of [8 x i8] as the result. Positive values
119 /// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
120 /// are saturated to 0x80.
121 ///
122 /// \headerfile <x86intrin.h>
123 ///
124 /// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
125 ///
126 /// \param __m1
127 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
128 /// 16-bit signed integer and is converted to an 8-bit signed integer with
129 /// saturation. Positive values greater than 0x7F are saturated to 0x7F.
130 /// Negative values less than 0x80 are saturated to 0x80. The converted
131 /// [4 x i8] values are written to the lower 32 bits of the result.
132 /// \param __m2
133 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
134 /// 16-bit signed integer and is converted to an 8-bit signed integer with
135 /// saturation. Positive values greater than 0x7F are saturated to 0x7F.
136 /// Negative values less than 0x80 are saturated to 0x80. The converted
137 /// [4 x i8] values are written to the upper 32 bits of the result.
138 /// \returns A 64-bit integer vector of [8 x i8] containing the converted
139 /// values.
140 static __inline__ __m64 __DEFAULT_FN_ATTRS
141 _mm_packs_pi16(__m64 __m1, __m64 __m2)
142 {
143  return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
144 }
145 
146 /// \brief Converts 32-bit signed integers from both 64-bit integer vector
147 /// parameters of [2 x i32] into 16-bit signed integer values, and constructs
148 /// a 64-bit integer vector of [4 x i16] as the result. Positive values
149 /// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
150 /// 0x8000 are saturated to 0x8000.
151 ///
152 /// \headerfile <x86intrin.h>
153 ///
154 /// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
155 ///
156 /// \param __m1
157 /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
158 /// 32-bit signed integer and is converted to a 16-bit signed integer with
159 /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
160 /// Negative values less than 0x8000 are saturated to 0x8000. The converted
161 /// [2 x i16] values are written to the lower 32 bits of the result.
162 /// \param __m2
163 /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
164 /// 32-bit signed integer and is converted to a 16-bit signed integer with
165 /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
166 /// Negative values less than 0x8000 are saturated to 0x8000. The converted
167 /// [2 x i16] values are written to the upper 32 bits of the result.
168 /// \returns A 64-bit integer vector of [4 x i16] containing the converted
169 /// values.
170 static __inline__ __m64 __DEFAULT_FN_ATTRS
171 _mm_packs_pi32(__m64 __m1, __m64 __m2)
172 {
173  return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
174 }
175 
176 /// \brief Converts 16-bit signed integers from both 64-bit integer vector
177 /// parameters of [4 x i16] into 8-bit unsigned integer values, and
178 /// constructs a 64-bit integer vector of [8 x i8] as the result. Values
179 /// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
180 /// to 0.
181 ///
182 /// \headerfile <x86intrin.h>
183 ///
184 /// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
185 ///
186 /// \param __m1
187 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
188 /// 16-bit signed integer and is converted to an 8-bit unsigned integer with
189 /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
190 /// than 0 are saturated to 0. The converted [4 x i8] values are written to
191 /// the lower 32 bits of the result.
192 /// \param __m2
193 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
194 /// 16-bit signed integer and is converted to an 8-bit unsigned integer with
195 /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
196 /// than 0 are saturated to 0. The converted [4 x i8] values are written to
197 /// the upper 32 bits of the result.
198 /// \returns A 64-bit integer vector of [8 x i8] containing the converted
199 /// values.
200 static __inline__ __m64 __DEFAULT_FN_ATTRS
201 _mm_packs_pu16(__m64 __m1, __m64 __m2)
202 {
203  return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
204 }
205 
206 /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
207 /// and interleaves them into a 64-bit integer vector of [8 x i8].
208 ///
209 /// \headerfile <x86intrin.h>
210 ///
211 /// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
212 ///
213 /// \param __m1
214 /// A 64-bit integer vector of [8 x i8]. \n
215 /// Bits [39:32] are written to bits [7:0] of the result. \n
216 /// Bits [47:40] are written to bits [23:16] of the result. \n
217 /// Bits [55:48] are written to bits [39:32] of the result. \n
218 /// Bits [63:56] are written to bits [55:48] of the result.
219 /// \param __m2
220 /// A 64-bit integer vector of [8 x i8].
221 /// Bits [39:32] are written to bits [15:8] of the result. \n
222 /// Bits [47:40] are written to bits [31:24] of the result. \n
223 /// Bits [55:48] are written to bits [47:40] of the result. \n
224 /// Bits [63:56] are written to bits [63:56] of the result.
225 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
226 /// values.
227 static __inline__ __m64 __DEFAULT_FN_ATTRS
228 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
229 {
230  return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
231 }
232 
233 /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
234 /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
235 ///
236 /// \headerfile <x86intrin.h>
237 ///
238 /// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.
239 ///
240 /// \param __m1
241 /// A 64-bit integer vector of [4 x i16].
242 /// Bits [47:32] are written to bits [15:0] of the result. \n
243 /// Bits [63:48] are written to bits [47:32] of the result.
244 /// \param __m2
245 /// A 64-bit integer vector of [4 x i16].
246 /// Bits [47:32] are written to bits [31:16] of the result. \n
247 /// Bits [63:48] are written to bits [63:48] of the result.
248 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
249 /// values.
250 static __inline__ __m64 __DEFAULT_FN_ATTRS
251 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
252 {
253  return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
254 }
255 
256 /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
257 /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
258 ///
259 /// \headerfile <x86intrin.h>
260 ///
261 /// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.
262 ///
263 /// \param __m1
264 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
265 /// the lower 32 bits of the result.
266 /// \param __m2
267 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
268 /// the upper 32 bits of the result.
269 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
270 /// values.
271 static __inline__ __m64 __DEFAULT_FN_ATTRS
272 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
273 {
274  return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
275 }
276 
277 /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
278 /// and interleaves them into a 64-bit integer vector of [8 x i8].
279 ///
280 /// \headerfile <x86intrin.h>
281 ///
282 /// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.
283 ///
284 /// \param __m1
285 /// A 64-bit integer vector of [8 x i8].
286 /// Bits [7:0] are written to bits [7:0] of the result. \n
287 /// Bits [15:8] are written to bits [23:16] of the result. \n
288 /// Bits [23:16] are written to bits [39:32] of the result. \n
289 /// Bits [31:24] are written to bits [55:48] of the result.
290 /// \param __m2
291 /// A 64-bit integer vector of [8 x i8].
292 /// Bits [7:0] are written to bits [15:8] of the result. \n
293 /// Bits [15:8] are written to bits [31:24] of the result. \n
294 /// Bits [23:16] are written to bits [47:40] of the result. \n
295 /// Bits [31:24] are written to bits [63:56] of the result.
296 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
297 /// values.
298 static __inline__ __m64 __DEFAULT_FN_ATTRS
299 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
300 {
301  return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
302 }
303 
304 /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
305 /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
306 ///
307 /// \headerfile <x86intrin.h>
308 ///
309 /// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.
310 ///
311 /// \param __m1
312 /// A 64-bit integer vector of [4 x i16].
313 /// Bits [15:0] are written to bits [15:0] of the result. \n
314 /// Bits [31:16] are written to bits [47:32] of the result.
315 /// \param __m2
316 /// A 64-bit integer vector of [4 x i16].
317 /// Bits [15:0] are written to bits [31:16] of the result. \n
318 /// Bits [31:16] are written to bits [63:48] of the result.
319 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
320 /// values.
321 static __inline__ __m64 __DEFAULT_FN_ATTRS
322 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
323 {
324  return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
325 }
326 
327 /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
328 /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
329 ///
330 /// \headerfile <x86intrin.h>
331 ///
332 /// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.
333 ///
334 /// \param __m1
335 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
336 /// the lower 32 bits of the result.
337 /// \param __m2
338 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
339 /// the upper 32 bits of the result.
340 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
341 /// values.
342 static __inline__ __m64 __DEFAULT_FN_ATTRS
343 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
344 {
345  return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
346 }
347 
348 /// \brief Adds each 8-bit integer element of the first 64-bit integer vector
349 /// of [8 x i8] to the corresponding 8-bit integer element of the second
350 /// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
351 /// packed into a 64-bit integer vector of [8 x i8].
352 ///
353 /// \headerfile <x86intrin.h>
354 ///
355 /// This intrinsic corresponds to the <c> PADDB </c> instruction.
356 ///
357 /// \param __m1
358 /// A 64-bit integer vector of [8 x i8].
359 /// \param __m2
360 /// A 64-bit integer vector of [8 x i8].
361 /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
362 /// parameters.
363 static __inline__ __m64 __DEFAULT_FN_ATTRS
364 _mm_add_pi8(__m64 __m1, __m64 __m2)
365 {
366  return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
367 }
368 
369 /// \brief Adds each 16-bit integer element of the first 64-bit integer vector
370 /// of [4 x i16] to the corresponding 16-bit integer element of the second
371 /// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
372 /// packed into a 64-bit integer vector of [4 x i16].
373 ///
374 /// \headerfile <x86intrin.h>
375 ///
376 /// This intrinsic corresponds to the <c> PADDW </c> instruction.
377 ///
378 /// \param __m1
379 /// A 64-bit integer vector of [4 x i16].
380 /// \param __m2
381 /// A 64-bit integer vector of [4 x i16].
382 /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
383 /// parameters.
384 static __inline__ __m64 __DEFAULT_FN_ATTRS
385 _mm_add_pi16(__m64 __m1, __m64 __m2)
386 {
387  return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
388 }
389 
390 /// \brief Adds each 32-bit integer element of the first 64-bit integer vector
391 /// of [2 x i32] to the corresponding 32-bit integer element of the second
392 /// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
393 /// packed into a 64-bit integer vector of [2 x i32].
394 ///
395 /// \headerfile <x86intrin.h>
396 ///
397 /// This intrinsic corresponds to the <c> PADDD </c> instruction.
398 ///
399 /// \param __m1
400 /// A 64-bit integer vector of [2 x i32].
401 /// \param __m2
402 /// A 64-bit integer vector of [2 x i32].
403 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
404 /// parameters.
405 static __inline__ __m64 __DEFAULT_FN_ATTRS
406 _mm_add_pi32(__m64 __m1, __m64 __m2)
407 {
408  return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
409 }
410 
411 /// \brief Adds each 8-bit signed integer element of the first 64-bit integer
412 /// vector of [8 x i8] to the corresponding 8-bit signed integer element of
413 /// the second 64-bit integer vector of [8 x i8]. Positive sums greater than
414 /// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
415 /// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].
416 ///
417 /// \headerfile <x86intrin.h>
418 ///
419 /// This intrinsic corresponds to the <c> PADDSB </c> instruction.
420 ///
421 /// \param __m1
422 /// A 64-bit integer vector of [8 x i8].
423 /// \param __m2
424 /// A 64-bit integer vector of [8 x i8].
425 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
426 /// of both parameters.
427 static __inline__ __m64 __DEFAULT_FN_ATTRS
428 _mm_adds_pi8(__m64 __m1, __m64 __m2)
429 {
430  return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
431 }
432 
433 /// \brief Adds each 16-bit signed integer element of the first 64-bit integer
434 /// vector of [4 x i16] to the corresponding 16-bit signed integer element of
435 /// the second 64-bit integer vector of [4 x i16]. Positive sums greater than
436 /// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
437 /// saturated to 0x8000. The results are packed into a 64-bit integer vector
438 /// of [4 x i16].
439 ///
440 /// \headerfile <x86intrin.h>
441 ///
442 /// This intrinsic corresponds to the <c> PADDSW </c> instruction.
443 ///
444 /// \param __m1
445 /// A 64-bit integer vector of [4 x i16].
446 /// \param __m2
447 /// A 64-bit integer vector of [4 x i16].
448 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
449 /// of both parameters.
450 static __inline__ __m64 __DEFAULT_FN_ATTRS
451 _mm_adds_pi16(__m64 __m1, __m64 __m2)
452 {
453  return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
454 }
455 
456 /// \brief Adds each 8-bit unsigned integer element of the first 64-bit integer
457 /// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
458 /// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
459 /// saturated to 0xFF. The results are packed into a 64-bit integer vector of
460 /// [8 x i8].
461 ///
462 /// \headerfile <x86intrin.h>
463 ///
464 /// This intrinsic corresponds to the <c> PADDUSB </c> instruction.
465 ///
466 /// \param __m1
467 /// A 64-bit integer vector of [8 x i8].
468 /// \param __m2
469 /// A 64-bit integer vector of [8 x i8].
470 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
471 /// unsigned sums of both parameters.
472 static __inline__ __m64 __DEFAULT_FN_ATTRS
473 _mm_adds_pu8(__m64 __m1, __m64 __m2)
474 {
475  return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
476 }
477 
478 /// \brief Adds each 16-bit unsigned integer element of the first 64-bit integer
479 /// vector of [4 x i16] to the corresponding 16-bit unsigned integer element
480 /// of the second 64-bit integer vector of [4 x i16]. Sums greater than
481 /// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
482 /// integer vector of [4 x i16].
483 ///
484 /// \headerfile <x86intrin.h>
485 ///
486 /// This intrinsic corresponds to the <c> PADDUSW </c> instruction.
487 ///
488 /// \param __m1
489 /// A 64-bit integer vector of [4 x i16].
490 /// \param __m2
491 /// A 64-bit integer vector of [4 x i16].
492 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
493 /// unsigned sums of both parameters.
494 static __inline__ __m64 __DEFAULT_FN_ATTRS
495 _mm_adds_pu16(__m64 __m1, __m64 __m2)
496 {
497  return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
498 }
499 
500 /// \brief Subtracts each 8-bit integer element of the second 64-bit integer
501 /// vector of [8 x i8] from the corresponding 8-bit integer element of the
502 /// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
503 /// are packed into a 64-bit integer vector of [8 x i8].
504 ///
505 /// \headerfile <x86intrin.h>
506 ///
507 /// This intrinsic corresponds to the <c> PSUBB </c> instruction.
508 ///
509 /// \param __m1
510 /// A 64-bit integer vector of [8 x i8] containing the minuends.
511 /// \param __m2
512 /// A 64-bit integer vector of [8 x i8] containing the subtrahends.
513 /// \returns A 64-bit integer vector of [8 x i8] containing the differences of
514 /// both parameters.
515 static __inline__ __m64 __DEFAULT_FN_ATTRS
516 _mm_sub_pi8(__m64 __m1, __m64 __m2)
517 {
518  return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
519 }
520 
521 /// \brief Subtracts each 16-bit integer element of the second 64-bit integer
522 /// vector of [4 x i16] from the corresponding 16-bit integer element of the
523 /// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
524 /// results are packed into a 64-bit integer vector of [4 x i16].
525 ///
526 /// \headerfile <x86intrin.h>
527 ///
528 /// This intrinsic corresponds to the <c> PSUBW </c> instruction.
529 ///
530 /// \param __m1
531 /// A 64-bit integer vector of [4 x i16] containing the minuends.
532 /// \param __m2
533 /// A 64-bit integer vector of [4 x i16] containing the subtrahends.
534 /// \returns A 64-bit integer vector of [4 x i16] containing the differences of
535 /// both parameters.
536 static __inline__ __m64 __DEFAULT_FN_ATTRS
537 _mm_sub_pi16(__m64 __m1, __m64 __m2)
538 {
539  return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
540 }
541 
542 /// \brief Subtracts each 32-bit integer element of the second 64-bit integer
543 /// vector of [2 x i32] from the corresponding 32-bit integer element of the
544 /// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
545 /// results are packed into a 64-bit integer vector of [2 x i32].
546 ///
547 /// \headerfile <x86intrin.h>
548 ///
549 /// This intrinsic corresponds to the <c> PSUBD </c> instruction.
550 ///
551 /// \param __m1
552 /// A 64-bit integer vector of [2 x i32] containing the minuends.
553 /// \param __m2
554 /// A 64-bit integer vector of [2 x i32] containing the subtrahends.
555 /// \returns A 64-bit integer vector of [2 x i32] containing the differences of
556 /// both parameters.
557 static __inline__ __m64 __DEFAULT_FN_ATTRS
558 _mm_sub_pi32(__m64 __m1, __m64 __m2)
559 {
560  return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
561 }
562 
563 /// \brief Subtracts each 8-bit signed integer element of the second 64-bit
564 /// integer vector of [8 x i8] from the corresponding 8-bit signed integer
565 /// element of the first 64-bit integer vector of [8 x i8]. Positive results
566 /// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
567 /// are saturated to 0x80. The results are packed into a 64-bit integer
568 /// vector of [8 x i8].
569 ///
570 /// \headerfile <x86intrin.h>
571 ///
572 /// This intrinsic corresponds to the <c> PSUBSB </c> instruction.
573 ///
574 /// \param __m1
575 /// A 64-bit integer vector of [8 x i8] containing the minuends.
576 /// \param __m2
577 /// A 64-bit integer vector of [8 x i8] containing the subtrahends.
578 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
579 /// differences of both parameters.
580 static __inline__ __m64 __DEFAULT_FN_ATTRS
581 _mm_subs_pi8(__m64 __m1, __m64 __m2)
582 {
583  return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
584 }
585 
586 /// \brief Subtracts each 16-bit signed integer element of the second 64-bit
587 /// integer vector of [4 x i16] from the corresponding 16-bit signed integer
588 /// element of the first 64-bit integer vector of [4 x i16]. Positive results
589 /// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
590 /// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit
591 /// integer vector of [4 x i16].
592 ///
593 /// \headerfile <x86intrin.h>
594 ///
595 /// This intrinsic corresponds to the <c> PSUBSW </c> instruction.
596 ///
597 /// \param __m1
598 /// A 64-bit integer vector of [4 x i16] containing the minuends.
599 /// \param __m2
600 /// A 64-bit integer vector of [4 x i16] containing the subtrahends.
601 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
602 /// differences of both parameters.
603 static __inline__ __m64 __DEFAULT_FN_ATTRS
604 _mm_subs_pi16(__m64 __m1, __m64 __m2)
605 {
606  return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
607 }
608 
609 /// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit
610 /// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
611 /// element of the first 64-bit integer vector of [8 x i8].
612 ///
613 /// If an element of the first vector is less than the corresponding element
614 /// of the second vector, the result is saturated to 0. The results are
615 /// packed into a 64-bit integer vector of [8 x i8].
616 ///
617 /// \headerfile <x86intrin.h>
618 ///
619 /// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.
620 ///
621 /// \param __m1
622 /// A 64-bit integer vector of [8 x i8] containing the minuends.
623 /// \param __m2
624 /// A 64-bit integer vector of [8 x i8] containing the subtrahends.
625 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
626 /// differences of both parameters.
627 static __inline__ __m64 __DEFAULT_FN_ATTRS
628 _mm_subs_pu8(__m64 __m1, __m64 __m2)
629 {
630  return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
631 }
632 
633 /// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit
634 /// integer vector of [4 x i16] from the corresponding 16-bit unsigned
635 /// integer element of the first 64-bit integer vector of [4 x i16].
636 ///
637 /// If an element of the first vector is less than the corresponding element
638 /// of the second vector, the result is saturated to 0. The results are
639 /// packed into a 64-bit integer vector of [4 x i16].
640 ///
641 /// \headerfile <x86intrin.h>
642 ///
643 /// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.
644 ///
645 /// \param __m1
646 /// A 64-bit integer vector of [4 x i16] containing the minuends.
647 /// \param __m2
648 /// A 64-bit integer vector of [4 x i16] containing the subtrahends.
649 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
650 /// differences of both parameters.
651 static __inline__ __m64 __DEFAULT_FN_ATTRS
652 _mm_subs_pu16(__m64 __m1, __m64 __m2)
653 {
654  return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
655 }
656 
657 /// \brief Multiplies each 16-bit signed integer element of the first 64-bit
658 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer
659 /// element of the second 64-bit integer vector of [4 x i16] and get four
660 /// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
661 /// The lower 32 bits of these two sums are packed into a 64-bit integer
662 /// vector of [2 x i32].
663 ///
664 /// For example, bits [15:0] of both parameters are multiplied, bits [31:16]
665 /// of both parameters are multiplied, and the sum of both results is written
666 /// to bits [31:0] of the result.
667 ///
668 /// \headerfile <x86intrin.h>
669 ///
670 /// This intrinsic corresponds to the <c> PMADDWD </c> instruction.
671 ///
672 /// \param __m1
673 /// A 64-bit integer vector of [4 x i16].
674 /// \param __m2
675 /// A 64-bit integer vector of [4 x i16].
676 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of
677 /// products of both parameters.
678 static __inline__ __m64 __DEFAULT_FN_ATTRS
679 _mm_madd_pi16(__m64 __m1, __m64 __m2)
680 {
681  return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
682 }
683 
684 /// \brief Multiplies each 16-bit signed integer element of the first 64-bit
685 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer
686 /// element of the second 64-bit integer vector of [4 x i16]. Packs the upper
687 /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
688 ///
689 /// \headerfile <x86intrin.h>
690 ///
691 /// This intrinsic corresponds to the <c> PMULHW </c> instruction.
692 ///
693 /// \param __m1
694 /// A 64-bit integer vector of [4 x i16].
695 /// \param __m2
696 /// A 64-bit integer vector of [4 x i16].
697 /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
698 /// of the products of both parameters.
699 static __inline__ __m64 __DEFAULT_FN_ATTRS
700 _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
701 {
702  return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
703 }
704 
705 /// \brief Multiplies each 16-bit signed integer element of the first 64-bit
706 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer
707 /// element of the second 64-bit integer vector of [4 x i16]. Packs the lower
708 /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
709 ///
710 /// \headerfile <x86intrin.h>
711 ///
712 /// This intrinsic corresponds to the <c> PMULLW </c> instruction.
713 ///
714 /// \param __m1
715 /// A 64-bit integer vector of [4 x i16].
716 /// \param __m2
717 /// A 64-bit integer vector of [4 x i16].
718 /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
719 /// of the products of both parameters.
720 static __inline__ __m64 __DEFAULT_FN_ATTRS
721 _mm_mullo_pi16(__m64 __m1, __m64 __m2)
722 {
723  return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
724 }
725 
726 /// \brief Left-shifts each 16-bit signed integer element of the first
727 /// parameter, which is a 64-bit integer vector of [4 x i16], by the number
728 /// of bits specified by the second parameter, which is a 64-bit integer. The
729 /// lower 16 bits of the results are packed into a 64-bit integer vector of
730 /// [4 x i16].
731 ///
732 /// \headerfile <x86intrin.h>
733 ///
734 /// This intrinsic corresponds to the <c> PSLLW </c> instruction.
735 ///
736 /// \param __m
737 /// A 64-bit integer vector of [4 x i16].
738 /// \param __count
739 /// A 64-bit integer vector interpreted as a single 64-bit integer.
740 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
741 /// values. If \a __count is greater or equal to 16, the result is set to all
742 /// 0.
743 static __inline__ __m64 __DEFAULT_FN_ATTRS
744 _mm_sll_pi16(__m64 __m, __m64 __count)
745 {
746  return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
747 }
748 
749 /// \brief Left-shifts each 16-bit signed integer element of a 64-bit integer
750 /// vector of [4 x i16] by the number of bits specified by a 32-bit integer.
751 /// The lower 16 bits of the results are packed into a 64-bit integer vector
752 /// of [4 x i16].
753 ///
754 /// \headerfile <x86intrin.h>
755 ///
756 /// This intrinsic corresponds to the <c> PSLLW </c> instruction.
757 ///
758 /// \param __m
759 /// A 64-bit integer vector of [4 x i16].
760 /// \param __count
761 /// A 32-bit integer value.
762 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
763 /// values. If \a __count is greater or equal to 16, the result is set to all
764 /// 0.
765 static __inline__ __m64 __DEFAULT_FN_ATTRS
766 _mm_slli_pi16(__m64 __m, int __count)
767 {
768  return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
769 }
770 
771 /// \brief Left-shifts each 32-bit signed integer element of the first
772 /// parameter, which is a 64-bit integer vector of [2 x i32], by the number
773 /// of bits specified by the second parameter, which is a 64-bit integer. The
774 /// lower 32 bits of the results are packed into a 64-bit integer vector of
775 /// [2 x i32].
776 ///
777 /// \headerfile <x86intrin.h>
778 ///
779 /// This intrinsic corresponds to the <c> PSLLD </c> instruction.
780 ///
781 /// \param __m
782 /// A 64-bit integer vector of [2 x i32].
783 /// \param __count
784 /// A 64-bit integer vector interpreted as a single 64-bit integer.
785 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
786 /// values. If \a __count is greater or equal to 32, the result is set to all
787 /// 0.
788 static __inline__ __m64 __DEFAULT_FN_ATTRS
789 _mm_sll_pi32(__m64 __m, __m64 __count)
790 {
791  return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
792 }
793 
794 /// \brief Left-shifts each 32-bit signed integer element of a 64-bit integer
795 /// vector of [2 x i32] by the number of bits specified by a 32-bit integer.
796 /// The lower 32 bits of the results are packed into a 64-bit integer vector
797 /// of [2 x i32].
798 ///
799 /// \headerfile <x86intrin.h>
800 ///
801 /// This intrinsic corresponds to the <c> PSLLD </c> instruction.
802 ///
803 /// \param __m
804 /// A 64-bit integer vector of [2 x i32].
805 /// \param __count
806 /// A 32-bit integer value.
807 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
808 /// values. If \a __count is greater or equal to 32, the result is set to all
809 /// 0.
810 static __inline__ __m64 __DEFAULT_FN_ATTRS
811 _mm_slli_pi32(__m64 __m, int __count)
812 {
813  return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
814 }
815 
816 /// \brief Left-shifts the first 64-bit integer parameter by the number of bits
817 /// specified by the second 64-bit integer parameter. The lower 64 bits of
818 /// result are returned.
819 ///
820 /// \headerfile <x86intrin.h>
821 ///
822 /// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
823 ///
824 /// \param __m
825 /// A 64-bit integer vector interpreted as a single 64-bit integer.
826 /// \param __count
827 /// A 64-bit integer vector interpreted as a single 64-bit integer.
828 /// \returns A 64-bit integer vector containing the left-shifted value. If
829 /// \a __count is greater or equal to 64, the result is set to 0.
830 static __inline__ __m64 __DEFAULT_FN_ATTRS
831 _mm_sll_si64(__m64 __m, __m64 __count)
832 {
833  return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
834 }
835 
836 /// \brief Left-shifts the first parameter, which is a 64-bit integer, by the
837 /// number of bits specified by the second parameter, which is a 32-bit
838 /// integer. The lower 64 bits of result are returned.
839 ///
840 /// \headerfile <x86intrin.h>
841 ///
842 /// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
843 ///
844 /// \param __m
845 /// A 64-bit integer vector interpreted as a single 64-bit integer.
846 /// \param __count
847 /// A 32-bit integer value.
848 /// \returns A 64-bit integer vector containing the left-shifted value. If
849 /// \a __count is greater or equal to 64, the result is set to 0.
850 static __inline__ __m64 __DEFAULT_FN_ATTRS
851 _mm_slli_si64(__m64 __m, int __count)
852 {
853  return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
854 }
855 
856 /// \brief Right-shifts each 16-bit integer element of the first parameter,
857 /// which is a 64-bit integer vector of [4 x i16], by the number of bits
858 /// specified by the second parameter, which is a 64-bit integer.
859 ///
860 /// High-order bits are filled with the sign bit of the initial value of each
861 /// 16-bit element. The 16-bit results are packed into a 64-bit integer
862 /// vector of [4 x i16].
863 ///
864 /// \headerfile <x86intrin.h>
865 ///
866 /// This intrinsic corresponds to the <c> PSRAW </c> instruction.
867 ///
868 /// \param __m
869 /// A 64-bit integer vector of [4 x i16].
870 /// \param __count
871 /// A 64-bit integer vector interpreted as a single 64-bit integer.
872 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
873 /// values.
874 static __inline__ __m64 __DEFAULT_FN_ATTRS
875 _mm_sra_pi16(__m64 __m, __m64 __count)
876 {
877  return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
878 }
879 
880 /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
881 /// of [4 x i16] by the number of bits specified by a 32-bit integer.
882 ///
883 /// High-order bits are filled with the sign bit of the initial value of each
884 /// 16-bit element. The 16-bit results are packed into a 64-bit integer
885 /// vector of [4 x i16].
886 ///
887 /// \headerfile <x86intrin.h>
888 ///
889 /// This intrinsic corresponds to the <c> PSRAW </c> instruction.
890 ///
891 /// \param __m
892 /// A 64-bit integer vector of [4 x i16].
893 /// \param __count
894 /// A 32-bit integer value.
895 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
896 /// values.
897 static __inline__ __m64 __DEFAULT_FN_ATTRS
898 _mm_srai_pi16(__m64 __m, int __count)
899 {
900  return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
901 }
902 
903 /// \brief Right-shifts each 32-bit integer element of the first parameter,
904 /// which is a 64-bit integer vector of [2 x i32], by the number of bits
905 /// specified by the second parameter, which is a 64-bit integer.
906 ///
907 /// High-order bits are filled with the sign bit of the initial value of each
908 /// 32-bit element. The 32-bit results are packed into a 64-bit integer
909 /// vector of [2 x i32].
910 ///
911 /// \headerfile <x86intrin.h>
912 ///
913 /// This intrinsic corresponds to the <c> PSRAD </c> instruction.
914 ///
915 /// \param __m
916 /// A 64-bit integer vector of [2 x i32].
917 /// \param __count
918 /// A 64-bit integer vector interpreted as a single 64-bit integer.
919 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
920 /// values.
921 static __inline__ __m64 __DEFAULT_FN_ATTRS
922 _mm_sra_pi32(__m64 __m, __m64 __count)
923 {
924  return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
925 }
926 
927 /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
928 /// of [2 x i32] by the number of bits specified by a 32-bit integer.
929 ///
930 /// High-order bits are filled with the sign bit of the initial value of each
931 /// 32-bit element. The 32-bit results are packed into a 64-bit integer
932 /// vector of [2 x i32].
933 ///
934 /// \headerfile <x86intrin.h>
935 ///
936 /// This intrinsic corresponds to the <c> PSRAD </c> instruction.
937 ///
938 /// \param __m
939 /// A 64-bit integer vector of [2 x i32].
940 /// \param __count
941 /// A 32-bit integer value.
942 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
943 /// values.
944 static __inline__ __m64 __DEFAULT_FN_ATTRS
945 _mm_srai_pi32(__m64 __m, int __count)
946 {
947  return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
948 }
949 
950 /// \brief Right-shifts each 16-bit integer element of the first parameter,
951 /// which is a 64-bit integer vector of [4 x i16], by the number of bits
952 /// specified by the second parameter, which is a 64-bit integer.
953 ///
954 /// High-order bits are cleared. The 16-bit results are packed into a 64-bit
955 /// integer vector of [4 x i16].
956 ///
957 /// \headerfile <x86intrin.h>
958 ///
959 /// This intrinsic corresponds to the <c> PSRLW </c> instruction.
960 ///
961 /// \param __m
962 /// A 64-bit integer vector of [4 x i16].
963 /// \param __count
964 /// A 64-bit integer vector interpreted as a single 64-bit integer.
965 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
966 /// values.
967 static __inline__ __m64 __DEFAULT_FN_ATTRS
968 _mm_srl_pi16(__m64 __m, __m64 __count)
969 {
970  return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
971 }
972 
973 /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
974 /// of [4 x i16] by the number of bits specified by a 32-bit integer.
975 ///
976 /// High-order bits are cleared. The 16-bit results are packed into a 64-bit
977 /// integer vector of [4 x i16].
978 ///
979 /// \headerfile <x86intrin.h>
980 ///
981 /// This intrinsic corresponds to the <c> PSRLW </c> instruction.
982 ///
983 /// \param __m
984 /// A 64-bit integer vector of [4 x i16].
985 /// \param __count
986 /// A 32-bit integer value.
987 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
988 /// values.
989 static __inline__ __m64 __DEFAULT_FN_ATTRS
990 _mm_srli_pi16(__m64 __m, int __count)
991 {
992  return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
993 }
994 
995 /// \brief Right-shifts each 32-bit integer element of the first parameter,
996 /// which is a 64-bit integer vector of [2 x i32], by the number of bits
997 /// specified by the second parameter, which is a 64-bit integer.
998 ///
999 /// High-order bits are cleared. The 32-bit results are packed into a 64-bit
1000 /// integer vector of [2 x i32].
1001 ///
1002 /// \headerfile <x86intrin.h>
1003 ///
1004 /// This intrinsic corresponds to the <c> PSRLD </c> instruction.
1005 ///
1006 /// \param __m
1007 /// A 64-bit integer vector of [2 x i32].
1008 /// \param __count
1009 /// A 64-bit integer vector interpreted as a single 64-bit integer.
1010 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1011 /// values.
1012 static __inline__ __m64 __DEFAULT_FN_ATTRS
1013 _mm_srl_pi32(__m64 __m, __m64 __count)
1014 {
1015  return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
1016 }
1017 
1018 /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
1019 /// of [2 x i32] by the number of bits specified by a 32-bit integer.
1020 ///
1021 /// High-order bits are cleared. The 32-bit results are packed into a 64-bit
1022 /// integer vector of [2 x i32].
1023 ///
1024 /// \headerfile <x86intrin.h>
1025 ///
1026 /// This intrinsic corresponds to the <c> PSRLD </c> instruction.
1027 ///
1028 /// \param __m
1029 /// A 64-bit integer vector of [2 x i32].
1030 /// \param __count
1031 /// A 32-bit integer value.
1032 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1033 /// values.
1034 static __inline__ __m64 __DEFAULT_FN_ATTRS
1035 _mm_srli_pi32(__m64 __m, int __count)
1036 {
1037  return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
1038 }
1039 
1040 /// \brief Right-shifts the first 64-bit integer parameter by the number of bits
1041 /// specified by the second 64-bit integer parameter.
1042 ///
1043 /// High-order bits are cleared.
1044 ///
1045 /// \headerfile <x86intrin.h>
1046 ///
1047 /// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1048 ///
1049 /// \param __m
1050 /// A 64-bit integer vector interpreted as a single 64-bit integer.
1051 /// \param __count
1052 /// A 64-bit integer vector interpreted as a single 64-bit integer.
1053 /// \returns A 64-bit integer vector containing the right-shifted value.
1054 static __inline__ __m64 __DEFAULT_FN_ATTRS
1055 _mm_srl_si64(__m64 __m, __m64 __count)
1056 {
1057  return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
1058 }
1059 
1060 /// \brief Right-shifts the first parameter, which is a 64-bit integer, by the
1061 /// number of bits specified by the second parameter, which is a 32-bit
1062 /// integer.
1063 ///
1064 /// High-order bits are cleared.
1065 ///
1066 /// \headerfile <x86intrin.h>
1067 ///
1068 /// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1069 ///
1070 /// \param __m
1071 /// A 64-bit integer vector interpreted as a single 64-bit integer.
1072 /// \param __count
1073 /// A 32-bit integer value.
1074 /// \returns A 64-bit integer vector containing the right-shifted value.
1075 static __inline__ __m64 __DEFAULT_FN_ATTRS
1076 _mm_srli_si64(__m64 __m, int __count)
1077 {
1078  return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
1079 }
1080 
1081 /// \brief Performs a bitwise AND of two 64-bit integer vectors.
1082 ///
1083 /// \headerfile <x86intrin.h>
1084 ///
1085 /// This intrinsic corresponds to the <c> PAND </c> instruction.
1086 ///
1087 /// \param __m1
1088 /// A 64-bit integer vector.
1089 /// \param __m2
1090 /// A 64-bit integer vector.
1091 /// \returns A 64-bit integer vector containing the bitwise AND of both
1092 /// parameters.
1093 static __inline__ __m64 __DEFAULT_FN_ATTRS
1094 _mm_and_si64(__m64 __m1, __m64 __m2)
1095 {
1096  return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
1097 }
1098 
1099 /// \brief Performs a bitwise NOT of the first 64-bit integer vector, and then
1100 /// performs a bitwise AND of the intermediate result and the second 64-bit
1101 /// integer vector.
1102 ///
1103 /// \headerfile <x86intrin.h>
1104 ///
1105 /// This intrinsic corresponds to the <c> PANDN </c> instruction.
1106 ///
1107 /// \param __m1
1108 /// A 64-bit integer vector. The one's complement of this parameter is used
1109 /// in the bitwise AND.
1110 /// \param __m2
1111 /// A 64-bit integer vector.
1112 /// \returns A 64-bit integer vector containing the bitwise AND of the second
1113 /// parameter and the one's complement of the first parameter.
1114 static __inline__ __m64 __DEFAULT_FN_ATTRS
1115 _mm_andnot_si64(__m64 __m1, __m64 __m2)
1116 {
1117  return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
1118 }
1119 
1120 /// \brief Performs a bitwise OR of two 64-bit integer vectors.
1121 ///
1122 /// \headerfile <x86intrin.h>
1123 ///
1124 /// This intrinsic corresponds to the <c> POR </c> instruction.
1125 ///
1126 /// \param __m1
1127 /// A 64-bit integer vector.
1128 /// \param __m2
1129 /// A 64-bit integer vector.
1130 /// \returns A 64-bit integer vector containing the bitwise OR of both
1131 /// parameters.
1132 static __inline__ __m64 __DEFAULT_FN_ATTRS
1133 _mm_or_si64(__m64 __m1, __m64 __m2)
1134 {
1135  return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
1136 }
1137 
1138 /// \brief Performs a bitwise exclusive OR of two 64-bit integer vectors.
1139 ///
1140 /// \headerfile <x86intrin.h>
1141 ///
1142 /// This intrinsic corresponds to the <c> PXOR </c> instruction.
1143 ///
1144 /// \param __m1
1145 /// A 64-bit integer vector.
1146 /// \param __m2
1147 /// A 64-bit integer vector.
1148 /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
1149 /// parameters.
1150 static __inline__ __m64 __DEFAULT_FN_ATTRS
1151 _mm_xor_si64(__m64 __m1, __m64 __m2)
1152 {
1153  return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
1154 }
1155 
1156 /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
1157 /// [8 x i8] to determine if the element of the first vector is equal to the
1158 /// corresponding element of the second vector.
1159 ///
1160 /// The comparison yields 0 for false, 0xFF for true.
1161 ///
1162 /// \headerfile <x86intrin.h>
1163 ///
1164 /// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.
1165 ///
1166 /// \param __m1
1167 /// A 64-bit integer vector of [8 x i8].
1168 /// \param __m2
1169 /// A 64-bit integer vector of [8 x i8].
1170 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1171 /// results.
1172 static __inline__ __m64 __DEFAULT_FN_ATTRS
1173 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
1174 {
1175  return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
1176 }
1177 
1178 /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
1179 /// [4 x i16] to determine if the element of the first vector is equal to the
1180 /// corresponding element of the second vector.
1181 ///
1182 /// The comparison yields 0 for false, 0xFFFF for true.
1183 ///
1184 /// \headerfile <x86intrin.h>
1185 ///
1186 /// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.
1187 ///
1188 /// \param __m1
1189 /// A 64-bit integer vector of [4 x i16].
1190 /// \param __m2
1191 /// A 64-bit integer vector of [4 x i16].
1192 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1193 /// results.
1194 static __inline__ __m64 __DEFAULT_FN_ATTRS
1195 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
1196 {
1197  return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
1198 }
1199 
1200 /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
1201 /// [2 x i32] to determine if the element of the first vector is equal to the
1202 /// corresponding element of the second vector.
1203 ///
1204 /// The comparison yields 0 for false, 0xFFFFFFFF for true.
1205 ///
1206 /// \headerfile <x86intrin.h>
1207 ///
1208 /// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.
1209 ///
1210 /// \param __m1
1211 /// A 64-bit integer vector of [2 x i32].
1212 /// \param __m2
1213 /// A 64-bit integer vector of [2 x i32].
1214 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1215 /// results.
1216 static __inline__ __m64 __DEFAULT_FN_ATTRS
1217 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
1218 {
1219  return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
1220 }
1221 
1222 /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
1223 /// [8 x i8] to determine if the element of the first vector is greater than
1224 /// the corresponding element of the second vector.
1225 ///
1226 /// The comparison yields 0 for false, 0xFF for true.
1227 ///
1228 /// \headerfile <x86intrin.h>
1229 ///
1230 /// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.
1231 ///
1232 /// \param __m1
1233 /// A 64-bit integer vector of [8 x i8].
1234 /// \param __m2
1235 /// A 64-bit integer vector of [8 x i8].
1236 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1237 /// results.
1238 static __inline__ __m64 __DEFAULT_FN_ATTRS
1239 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
1240 {
1241  return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
1242 }
1243 
1244 /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
1245 /// [4 x i16] to determine if the element of the first vector is greater than
1246 /// the corresponding element of the second vector.
1247 ///
1248 /// The comparison yields 0 for false, 0xFFFF for true.
1249 ///
1250 /// \headerfile <x86intrin.h>
1251 ///
1252 /// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.
1253 ///
1254 /// \param __m1
1255 /// A 64-bit integer vector of [4 x i16].
1256 /// \param __m2
1257 /// A 64-bit integer vector of [4 x i16].
1258 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1259 /// results.
1260 static __inline__ __m64 __DEFAULT_FN_ATTRS
1261 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
1262 {
1263  return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
1264 }
1265 
1266 /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
1267 /// [2 x i32] to determine if the element of the first vector is greater than
1268 /// the corresponding element of the second vector.
1269 ///
1270 /// The comparison yields 0 for false, 0xFFFFFFFF for true.
1271 ///
1272 /// \headerfile <x86intrin.h>
1273 ///
1274 /// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.
1275 ///
1276 /// \param __m1
1277 /// A 64-bit integer vector of [2 x i32].
1278 /// \param __m2
1279 /// A 64-bit integer vector of [2 x i32].
1280 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1281 /// results.
1282 static __inline__ __m64 __DEFAULT_FN_ATTRS
1283 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
1284 {
1285  return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
1286 }
1287 
1288 /// \brief Constructs a 64-bit integer vector initialized to zero.
1289 ///
1290 /// \headerfile <x86intrin.h>
1291 ///
1292 /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.
1293 ///
1294 /// \returns An initialized 64-bit integer vector with all elements set to zero.
1295 static __inline__ __m64 __DEFAULT_FN_ATTRS
1297 {
1298  return (__m64){ 0LL };
1299 }
1300 
1301 /// \brief Constructs a 64-bit integer vector initialized with the specified
1302 /// 32-bit integer values.
1303 ///
1304 /// \headerfile <x86intrin.h>
1305 ///
1306 /// This intrinsic is a utility function and does not correspond to a specific
1307 /// instruction.
1308 ///
1309 /// \param __i1
1310 /// A 32-bit integer value used to initialize the upper 32 bits of the
1311 /// result.
1312 /// \param __i0
1313 /// A 32-bit integer value used to initialize the lower 32 bits of the
1314 /// result.
1315 /// \returns An initialized 64-bit integer vector.
1316 static __inline__ __m64 __DEFAULT_FN_ATTRS
1317 _mm_set_pi32(int __i1, int __i0)
1318 {
1319  return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
1320 }
1321 
1322 /// \brief Constructs a 64-bit integer vector initialized with the specified
1323 /// 16-bit integer values.
1324 ///
1325 /// \headerfile <x86intrin.h>
1326 ///
1327 /// This intrinsic is a utility function and does not correspond to a specific
1328 /// instruction.
1329 ///
1330 /// \param __s3
1331 /// A 16-bit integer value used to initialize bits [63:48] of the result.
1332 /// \param __s2
1333 /// A 16-bit integer value used to initialize bits [47:32] of the result.
1334 /// \param __s1
1335 /// A 16-bit integer value used to initialize bits [31:16] of the result.
1336 /// \param __s0
1337 /// A 16-bit integer value used to initialize bits [15:0] of the result.
1338 /// \returns An initialized 64-bit integer vector.
1339 static __inline__ __m64 __DEFAULT_FN_ATTRS
1340 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
1341 {
1342  return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
1343 }
1344 
1345 /// \brief Constructs a 64-bit integer vector initialized with the specified
1346 /// 8-bit integer values.
1347 ///
1348 /// \headerfile <x86intrin.h>
1349 ///
1350 /// This intrinsic is a utility function and does not correspond to a specific
1351 /// instruction.
1352 ///
1353 /// \param __b7
1354 /// An 8-bit integer value used to initialize bits [63:56] of the result.
1355 /// \param __b6
1356 /// An 8-bit integer value used to initialize bits [55:48] of the result.
1357 /// \param __b5
1358 /// An 8-bit integer value used to initialize bits [47:40] of the result.
1359 /// \param __b4
1360 /// An 8-bit integer value used to initialize bits [39:32] of the result.
1361 /// \param __b3
1362 /// An 8-bit integer value used to initialize bits [31:24] of the result.
1363 /// \param __b2
1364 /// An 8-bit integer value used to initialize bits [23:16] of the result.
1365 /// \param __b1
1366 /// An 8-bit integer value used to initialize bits [15:8] of the result.
1367 /// \param __b0
1368 /// An 8-bit integer value used to initialize bits [7:0] of the result.
1369 /// \returns An initialized 64-bit integer vector.
1370 static __inline__ __m64 __DEFAULT_FN_ATTRS
1371 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
1372  char __b1, char __b0)
1373 {
1374  return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
1375  __b4, __b5, __b6, __b7);
1376 }
1377 
1378 /// \brief Constructs a 64-bit integer vector of [2 x i32], with each of the
1379 /// 32-bit integer vector elements set to the specified 32-bit integer
1380 /// value.
1381 ///
1382 /// \headerfile <x86intrin.h>
1383 ///
1384 /// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction.
1385 ///
1386 /// \param __i
1387 /// A 32-bit integer value used to initialize each vector element of the
1388 /// result.
1389 /// \returns An initialized 64-bit integer vector of [2 x i32].
1390 static __inline__ __m64 __DEFAULT_FN_ATTRS
1392 {
1393  return _mm_set_pi32(__i, __i);
1394 }
1395 
1396 /// \brief Constructs a 64-bit integer vector of [4 x i16], with each of the
1397 /// 16-bit integer vector elements set to the specified 16-bit integer
1398 /// value.
1399 ///
1400 /// \headerfile <x86intrin.h>
1401 ///
1402 /// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction.
1403 ///
1404 /// \param __w
1405 /// A 16-bit integer value used to initialize each vector element of the
1406 /// result.
1407 /// \returns An initialized 64-bit integer vector of [4 x i16].
1408 static __inline__ __m64 __DEFAULT_FN_ATTRS
1409 _mm_set1_pi16(short __w)
1410 {
1411  return _mm_set_pi16(__w, __w, __w, __w);
1412 }
1413 
1414 /// \brief Constructs a 64-bit integer vector of [8 x i8], with each of the
1415 /// 8-bit integer vector elements set to the specified 8-bit integer value.
1416 ///
1417 /// \headerfile <x86intrin.h>
1418 ///
1419 /// This intrinsic corresponds to the <c> VPUNPCKLBW + VPSHUFLW / PUNPCKLBW +
1420 /// PSHUFLW </c> instruction.
1421 ///
1422 /// \param __b
1423 /// An 8-bit integer value used to initialize each vector element of the
1424 /// result.
1425 /// \returns An initialized 64-bit integer vector of [8 x i8].
1426 static __inline__ __m64 __DEFAULT_FN_ATTRS
1428 {
1429  return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
1430 }
1431 
1432 /// \brief Constructs a 64-bit integer vector, initialized in reverse order with
1433 /// the specified 32-bit integer values.
1434 ///
1435 /// \headerfile <x86intrin.h>
1436 ///
1437 /// This intrinsic is a utility function and does not correspond to a specific
1438 /// instruction.
1439 ///
1440 /// \param __i0
1441 /// A 32-bit integer value used to initialize the lower 32 bits of the
1442 /// result.
1443 /// \param __i1
1444 /// A 32-bit integer value used to initialize the upper 32 bits of the
1445 /// result.
1446 /// \returns An initialized 64-bit integer vector.
1447 static __inline__ __m64 __DEFAULT_FN_ATTRS
1448 _mm_setr_pi32(int __i0, int __i1)
1449 {
1450  return _mm_set_pi32(__i1, __i0);
1451 }
1452 
1453 /// \brief Constructs a 64-bit integer vector, initialized in reverse order with
1454 /// the specified 16-bit integer values.
1455 ///
1456 /// \headerfile <x86intrin.h>
1457 ///
1458 /// This intrinsic is a utility function and does not correspond to a specific
1459 /// instruction.
1460 ///
1461 /// \param __w0
1462 /// A 16-bit integer value used to initialize bits [15:0] of the result.
1463 /// \param __w1
1464 /// A 16-bit integer value used to initialize bits [31:16] of the result.
1465 /// \param __w2
1466 /// A 16-bit integer value used to initialize bits [47:32] of the result.
1467 /// \param __w3
1468 /// A 16-bit integer value used to initialize bits [63:48] of the result.
1469 /// \returns An initialized 64-bit integer vector.
1470 static __inline__ __m64 __DEFAULT_FN_ATTRS
1471 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
1472 {
1473  return _mm_set_pi16(__w3, __w2, __w1, __w0);
1474 }
1475 
1476 /// \brief Constructs a 64-bit integer vector, initialized in reverse order with
1477 /// the specified 8-bit integer values.
1478 ///
1479 /// \headerfile <x86intrin.h>
1480 ///
1481 /// This intrinsic is a utility function and does not correspond to a specific
1482 /// instruction.
1483 ///
1484 /// \param __b0
1485 /// An 8-bit integer value used to initialize bits [7:0] of the result.
1486 /// \param __b1
1487 /// An 8-bit integer value used to initialize bits [15:8] of the result.
1488 /// \param __b2
1489 /// An 8-bit integer value used to initialize bits [23:16] of the result.
1490 /// \param __b3
1491 /// An 8-bit integer value used to initialize bits [31:24] of the result.
1492 /// \param __b4
1493 /// An 8-bit integer value used to initialize bits [39:32] of the result.
1494 /// \param __b5
1495 /// An 8-bit integer value used to initialize bits [47:40] of the result.
1496 /// \param __b6
1497 /// An 8-bit integer value used to initialize bits [55:48] of the result.
1498 /// \param __b7
1499 /// An 8-bit integer value used to initialize bits [63:56] of the result.
1500 /// \returns An initialized 64-bit integer vector.
1501 static __inline__ __m64 __DEFAULT_FN_ATTRS
1502 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
1503  char __b6, char __b7)
1504 {
1505  return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
1506 }
1507 
1508 #undef __DEFAULT_FN_ATTRS
1509 
1510 /* Aliases for compatibility. */
1511 #define _m_empty _mm_empty
1512 #define _m_from_int _mm_cvtsi32_si64
1513 #define _m_from_int64 _mm_cvtsi64_m64
1514 #define _m_to_int _mm_cvtsi64_si32
1515 #define _m_to_int64 _mm_cvtm64_si64
1516 #define _m_packsswb _mm_packs_pi16
1517 #define _m_packssdw _mm_packs_pi32
1518 #define _m_packuswb _mm_packs_pu16
1519 #define _m_punpckhbw _mm_unpackhi_pi8
1520 #define _m_punpckhwd _mm_unpackhi_pi16
1521 #define _m_punpckhdq _mm_unpackhi_pi32
1522 #define _m_punpcklbw _mm_unpacklo_pi8
1523 #define _m_punpcklwd _mm_unpacklo_pi16
1524 #define _m_punpckldq _mm_unpacklo_pi32
1525 #define _m_paddb _mm_add_pi8
1526 #define _m_paddw _mm_add_pi16
1527 #define _m_paddd _mm_add_pi32
1528 #define _m_paddsb _mm_adds_pi8
1529 #define _m_paddsw _mm_adds_pi16
1530 #define _m_paddusb _mm_adds_pu8
1531 #define _m_paddusw _mm_adds_pu16
1532 #define _m_psubb _mm_sub_pi8
1533 #define _m_psubw _mm_sub_pi16
1534 #define _m_psubd _mm_sub_pi32
1535 #define _m_psubsb _mm_subs_pi8
1536 #define _m_psubsw _mm_subs_pi16
1537 #define _m_psubusb _mm_subs_pu8
1538 #define _m_psubusw _mm_subs_pu16
1539 #define _m_pmaddwd _mm_madd_pi16
1540 #define _m_pmulhw _mm_mulhi_pi16
1541 #define _m_pmullw _mm_mullo_pi16
1542 #define _m_psllw _mm_sll_pi16
1543 #define _m_psllwi _mm_slli_pi16
1544 #define _m_pslld _mm_sll_pi32
1545 #define _m_pslldi _mm_slli_pi32
1546 #define _m_psllq _mm_sll_si64
1547 #define _m_psllqi _mm_slli_si64
1548 #define _m_psraw _mm_sra_pi16
1549 #define _m_psrawi _mm_srai_pi16
1550 #define _m_psrad _mm_sra_pi32
1551 #define _m_psradi _mm_srai_pi32
1552 #define _m_psrlw _mm_srl_pi16
1553 #define _m_psrlwi _mm_srli_pi16
1554 #define _m_psrld _mm_srl_pi32
1555 #define _m_psrldi _mm_srli_pi32
1556 #define _m_psrlq _mm_srl_si64
1557 #define _m_psrlqi _mm_srli_si64
1558 #define _m_pand _mm_and_si64
1559 #define _m_pandn _mm_andnot_si64
1560 #define _m_por _mm_or_si64
1561 #define _m_pxor _mm_xor_si64
1562 #define _m_pcmpeqb _mm_cmpeq_pi8
1563 #define _m_pcmpeqw _mm_cmpeq_pi16
1564 #define _m_pcmpeqd _mm_cmpeq_pi32
1565 #define _m_pcmpgtb _mm_cmpgt_pi8
1566 #define _m_pcmpgtw _mm_cmpgt_pi16
1567 #define _m_pcmpgtd _mm_cmpgt_pi32
1568 
1569 #endif /* __MMINTRIN_H */
1570 
static __inline__ void __DEFAULT_FN_ATTRS _mm_empty(void)
Clears the MMX state by setting the state of the x87 stack registers to empty.
Definition: mmintrin.h:45
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
Definition: mmintrin.h:322
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
Definition: mmintrin.h:272
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi16(short __w)
Constructs a 64-bit integer vector of [4 x i16], with each of the 16-bit integer vector elements set ...
Definition: mmintrin.h:1409
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setzero_si64(void)
Constructs a 64-bit integer vector initialized to zero.
Definition: mmintrin.h:1296
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi16(__m64 __m1, __m64 __m2)
Adds each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] to the corres...
Definition: mmintrin.h:451
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi16(__m64 __m1, __m64 __m2)
Converts 16-bit signed integers from both 64-bit integer vector parameters of [4 x i16] into 8-bit si...
Definition: mmintrin.h:141
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
Definition: mmintrin.h:251
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
Definition: mmintrin.h:1217
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
Definition: mmintrin.h:343
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pu16(__m64 __m1, __m64 __m2)
Converts 16-bit signed integers from both 64-bit integer vector parameters of [4 x i16] into 8-bit un...
Definition: mmintrin.h:201
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_xor_si64(__m64 __m1, __m64 __m2)
Performs a bitwise exclusive OR of two 64-bit integer vectors.
Definition: mmintrin.h:1151
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
Definition: mmintrin.h:1261
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi8(char __b)
Constructs a 64-bit integer vector of [8 x i8], with each of the 8-bit integer vector elements set to...
Definition: mmintrin.h:1427
#define __DEFAULT_FN_ATTRS
Definition: mmintrin.h:35
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
Definition: mmintrin.h:228
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit unsigned integer element of the second 64-bit integer vector of [8 x i8] from th...
Definition: mmintrin.h:628
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu16(__m64 __m1, __m64 __m2)
Adds each 16-bit unsigned integer element of the first 64-bit integer vector of [4 x i16] to the corr...
Definition: mmintrin.h:495
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
Constructs a 64-bit integer vector initialized with the specified 16-bit integer values.
Definition: mmintrin.h:1340
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi8(__m64 __m1, __m64 __m2)
Adds each 8-bit signed integer element of the first 64-bit integer vector of [8 x i8] to the correspo...
Definition: mmintrin.h:428
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi16(__m64 __m, __m64 __count)
Left-shifts each 16-bit signed integer element of the first parameter, which is a 64-bit integer vect...
Definition: mmintrin.h:744
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_si64(__m64 __m, int __count)
Right-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the s...
Definition: mmintrin.h:1076
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
Definition: mmintrin.h:1239
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit integer element of the second 64-bit integer vector of [8 x i8] from the corresp...
Definition: mmintrin.h:516
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:1013
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi32(int __i)
Constructs a 64-bit integer vector of [2 x i32], with each of the 32-bit integer vector elements set ...
Definition: mmintrin.h:1391
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit signed integer element of the second 64-bit integer vector of [8 x i8] from the ...
Definition: mmintrin.h:581
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi8(__m64 __m1, __m64 __m2)
Adds each 8-bit integer element of the first 64-bit integer vector of [8 x i8] to the corresponding 8...
Definition: mmintrin.h:364
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:922
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
Definition: mmintrin.h:990
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
Definition: mmintrin.h:1283
static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtm64_si64(__m64 __m)
Casts a 64-bit integer vector into a 64-bit signed integer value.
Definition: mmintrin.h:111
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu8(__m64 __m1, __m64 __m2)
Adds each 8-bit unsigned integer element of the first 64-bit integer vector of [8 x i8] to the corres...
Definition: mmintrin.h:473
static __inline__ vector float vector float __b
Definition: altivec.h:534
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_and_si64(__m64 __m1, __m64 __m2)
Performs a bitwise AND of two 64-bit integer vectors.
Definition: mmintrin.h:1094
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
Definition: mmintrin.h:299
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit signed integer element of the second 64-bit integer vector of [4 x i16] from th...
Definition: mmintrin.h:604
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_si64(__m64 __m, int __count)
Left-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the se...
Definition: mmintrin.h:851
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mullo_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition: mmintrin.h:721
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi32(__m64 __m, int __count)
Left-shifts each 32-bit signed integer element of a 64-bit integer vector of [2 x i32] by the number ...
Definition: mmintrin.h:811
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_madd_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition: mmintrin.h:679
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit unsigned integer element of the second 64-bit integer vector of [4 x i16] from ...
Definition: mmintrin.h:652
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit integer element of the second 64-bit integer vector of [4 x i16] from the corre...
Definition: mmintrin.h:537
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi32(int __i1, int __i0)
Constructs a 64-bit integer vector initialized with the specified 32-bit integer values.
Definition: mmintrin.h:1317
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi32(__m64 __m1, __m64 __m2)
Subtracts each 32-bit integer element of the second 64-bit integer vector of [2 x i32] from the corre...
Definition: mmintrin.h:558
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi32(int __i0, int __i1)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 32-bit integer va...
Definition: mmintrin.h:1448
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi32_si64(int __i)
Constructs a 64-bit integer vector, setting the lower 32 bits to the value of the 32-bit integer para...
Definition: mmintrin.h:62
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
Definition: mmintrin.h:1173
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
Definition: mmintrin.h:945
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_si64(__m64 __m, __m64 __count)
Right-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit ...
Definition: mmintrin.h:1055
long long __m64 __attribute__((__vector_size__(8)))
Definition: mmintrin.h:27
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi64_si32(__m64 __m)
Returns the lower 32 bits of a 64-bit integer vector as a 32-bit signed integer.
Definition: mmintrin.h:79
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:968
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
Definition: mmintrin.h:1035
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition: mmintrin.h:700
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi16(__m64 __m1, __m64 __m2)
Adds each 16-bit integer element of the first 64-bit integer vector of [4 x i16] to the corresponding...
Definition: mmintrin.h:385
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 8-bit integer val...
Definition: mmintrin.h:1502
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_andnot_si64(__m64 __m1, __m64 __m2)
Performs a bitwise NOT of the first 64-bit integer vector, and then performs a bitwise AND of the int...
Definition: mmintrin.h:1115
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
Definition: mmintrin.h:1195
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi16(__m64 __m, int __count)
Left-shifts each 16-bit signed integer element of a 64-bit integer vector of [4 x i16] by the number ...
Definition: mmintrin.h:766
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Constructs a 64-bit integer vector initialized with the specified 8-bit integer values.
Definition: mmintrin.h:1371
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi32(__m64 __m1, __m64 __m2)
Converts 32-bit signed integers from both 64-bit integer vector parameters of [2 x i32] into 16-bit s...
Definition: mmintrin.h:171
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 16-bit integer va...
Definition: mmintrin.h:1471
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi64_m64(long long __i)
Casts a 64-bit signed integer value into a 64-bit integer vector.
Definition: mmintrin.h:95
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
Definition: mmintrin.h:898
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:875
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi32(__m64 __m, __m64 __count)
Left-shifts each 32-bit signed integer element of the first parameter, which is a 64-bit integer vect...
Definition: mmintrin.h:789
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi32(__m64 __m1, __m64 __m2)
Adds each 32-bit integer element of the first 64-bit integer vector of [2 x i32] to the corresponding...
Definition: mmintrin.h:406
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_si64(__m64 __m, __m64 __count)
Left-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit i...
Definition: mmintrin.h:831
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_or_si64(__m64 __m1, __m64 __m2)
Performs a bitwise OR of two 64-bit integer vectors.
Definition: mmintrin.h:1133