clang  9.0.0svn
mmintrin.h
Go to the documentation of this file.
1 /*===---- mmintrin.h - Implementation of MMX intrinsics on PowerPC ---------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 /* Implemented from the specification included in the Intel C++ Compiler
11  User Guide and Reference, version 9.0. */
12 
13 #ifndef NO_WARN_X86_INTRINSICS
14 /* This header file is to help porting code using Intel intrinsics
15  explicitly from x86_64 to powerpc64/powerpc64le.
16 
17  Since PowerPC target doesn't support native 64-bit vector type, we
18  typedef __m64 to 64-bit unsigned long long in MMX intrinsics, which
19  works well for _si64 and some _pi32 operations.
20 
21  For _pi16 and _pi8 operations, it's better to transfer __m64 into
22  128-bit PowerPC vector first. Power8 introduced direct register
23  move instructions which helps for more efficient implementation.
24 
25  It's user's responsibility to determine if the results of such port
26  are acceptable or further changes are needed. Please note that much
27  code using Intel intrinsics CAN BE REWRITTEN in more portable and
28  efficient standard C or GNU C extensions with 64-bit scalar
29  operations, or 128-bit SSE/Altivec operations, which are more
30  recommended. */
31 #error \
32  "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
33 #endif
34 
35 #ifndef _MMINTRIN_H_INCLUDED
36 #define _MMINTRIN_H_INCLUDED
37 
38 #include <altivec.h>
39 /* The Intel API is flexible enough that we must allow aliasing with other
40  vector types, and their scalar components. */
41 typedef __attribute__((__aligned__(8))) unsigned long long __m64;
42 
43 typedef __attribute__((__aligned__(8))) union {
44  __m64 as_m64;
45  char as_char[8];
46  signed char as_signed_char[8];
47  short as_short[4];
48  int as_int[2];
49  long long as_long_long;
50  float as_float[2];
51  double as_double;
53 
54 /* Empty the multimedia state. */
55 extern __inline void
56  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
57  _mm_empty(void) {
58  /* nothing to do on PowerPC. */
59 }
60 
61 extern __inline void
62  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
63  _m_empty(void) {
64  /* nothing to do on PowerPC. */
65 }
66 
67 /* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
68 extern __inline __m64
69  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
70  _mm_cvtsi32_si64(int __i) {
71  return (__m64)(unsigned int)__i;
72 }
73 
74 extern __inline __m64
75  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
76  _m_from_int(int __i) {
77  return _mm_cvtsi32_si64(__i);
78 }
79 
80 /* Convert the lower 32 bits of the __m64 object into an integer. */
81 extern __inline int
82  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
83  _mm_cvtsi64_si32(__m64 __i) {
84  return ((int)__i);
85 }
86 
87 extern __inline int
88  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
89  _m_to_int(__m64 __i) {
90  return _mm_cvtsi64_si32(__i);
91 }
92 
93 /* Convert I to a __m64 object. */
94 
95 /* Intel intrinsic. */
96 extern __inline __m64
97  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
98  _m_from_int64(long long __i) {
99  return (__m64)__i;
100 }
101 
102 extern __inline __m64
103  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
104  _mm_cvtsi64_m64(long long __i) {
105  return (__m64)__i;
106 }
107 
108 /* Microsoft intrinsic. */
109 extern __inline __m64
110  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
111  _mm_cvtsi64x_si64(long long __i) {
112  return (__m64)__i;
113 }
114 
115 extern __inline __m64
116  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
117  _mm_set_pi64x(long long __i) {
118  return (__m64)__i;
119 }
120 
121 /* Convert the __m64 object to a 64bit integer. */
122 
123 /* Intel intrinsic. */
124 extern __inline long long
125  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
126  _m_to_int64(__m64 __i) {
127  return (long long)__i;
128 }
129 
130 extern __inline long long
131  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
132  _mm_cvtm64_si64(__m64 __i) {
133  return (long long)__i;
134 }
135 
136 /* Microsoft intrinsic. */
137 extern __inline long long
138  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
139  _mm_cvtsi64_si64x(__m64 __i) {
140  return (long long)__i;
141 }
142 
143 #ifdef _ARCH_PWR8
144 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
145  the result, and the four 16-bit values from M2 into the upper four 8-bit
146  values of the result, all with signed saturation. */
147 extern __inline __m64
148  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
149  _mm_packs_pi16(__m64 __m1, __m64 __m2) {
150  __vector signed short vm1;
151  __vector signed char vresult;
152 
153  vm1 = (__vector signed short)(__vector unsigned long long)
154 #ifdef __LITTLE_ENDIAN__
155  {__m1, __m2};
156 #else
157  {__m2, __m1};
158 #endif
159  vresult = vec_packs(vm1, vm1);
160  return (__m64)((__vector long long)vresult)[0];
161 }
162 
163 extern __inline __m64
164  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
165  _m_packsswb(__m64 __m1, __m64 __m2) {
166  return _mm_packs_pi16(__m1, __m2);
167 }
168 
169 /* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
170  the result, and the two 32-bit values from M2 into the upper two 16-bit
171  values of the result, all with signed saturation. */
172 extern __inline __m64
173  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
174  _mm_packs_pi32(__m64 __m1, __m64 __m2) {
175  __vector signed int vm1;
176  __vector signed short vresult;
177 
178  vm1 = (__vector signed int)(__vector unsigned long long)
179 #ifdef __LITTLE_ENDIAN__
180  {__m1, __m2};
181 #else
182  {__m2, __m1};
183 #endif
184  vresult = vec_packs(vm1, vm1);
185  return (__m64)((__vector long long)vresult)[0];
186 }
187 
188 extern __inline __m64
189  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
190  _m_packssdw(__m64 __m1, __m64 __m2) {
191  return _mm_packs_pi32(__m1, __m2);
192 }
193 
194 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
195  the result, and the four 16-bit values from M2 into the upper four 8-bit
196  values of the result, all with unsigned saturation. */
197 extern __inline __m64
198  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
199  _mm_packs_pu16(__m64 __m1, __m64 __m2) {
200  __vector unsigned char r;
201  __vector signed short vm1 = (__vector signed short)(__vector long long)
202 #ifdef __LITTLE_ENDIAN__
203  {__m1, __m2};
204 #else
205  {__m2, __m1};
206 #endif
207  const __vector signed short __zero = {0};
208  __vector __bool short __select = vec_cmplt(vm1, __zero);
209  r = vec_packs((__vector unsigned short)vm1, (__vector unsigned short)vm1);
210  __vector __bool char packsel = vec_pack(__select, __select);
211  r = vec_sel(r, (const __vector unsigned char)__zero, packsel);
212  return (__m64)((__vector long long)r)[0];
213 }
214 
215 extern __inline __m64
216  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
217  _m_packuswb(__m64 __m1, __m64 __m2) {
218  return _mm_packs_pu16(__m1, __m2);
219 }
220 #endif /* end ARCH_PWR8 */
221 
222 /* Interleave the four 8-bit values from the high half of M1 with the four
223  8-bit values from the high half of M2. */
224 extern __inline __m64
225  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
227 #if _ARCH_PWR8
228  __vector unsigned char a, b, c;
229 
230  a = (__vector unsigned char)vec_splats(__m1);
231  b = (__vector unsigned char)vec_splats(__m2);
232  c = vec_mergel(a, b);
233  return (__m64)((__vector long long)c)[1];
234 #else
235  __m64_union m1, m2, res;
236 
237  m1.as_m64 = __m1;
238  m2.as_m64 = __m2;
239 
240  res.as_char[0] = m1.as_char[4];
241  res.as_char[1] = m2.as_char[4];
242  res.as_char[2] = m1.as_char[5];
243  res.as_char[3] = m2.as_char[5];
244  res.as_char[4] = m1.as_char[6];
245  res.as_char[5] = m2.as_char[6];
246  res.as_char[6] = m1.as_char[7];
247  res.as_char[7] = m2.as_char[7];
248 
249  return (__m64)res.as_m64;
250 #endif
251 }
252 
253 extern __inline __m64
254  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
255  _m_punpckhbw(__m64 __m1, __m64 __m2) {
256  return _mm_unpackhi_pi8(__m1, __m2);
257 }
258 
259 /* Interleave the two 16-bit values from the high half of M1 with the two
260  16-bit values from the high half of M2. */
261 extern __inline __m64
262  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
263  _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) {
264  __m64_union m1, m2, res;
265 
266  m1.as_m64 = __m1;
267  m2.as_m64 = __m2;
268 
269  res.as_short[0] = m1.as_short[2];
270  res.as_short[1] = m2.as_short[2];
271  res.as_short[2] = m1.as_short[3];
272  res.as_short[3] = m2.as_short[3];
273 
274  return (__m64)res.as_m64;
275 }
276 
277 extern __inline __m64
278  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
279  _m_punpckhwd(__m64 __m1, __m64 __m2) {
280  return _mm_unpackhi_pi16(__m1, __m2);
281 }
282 /* Interleave the 32-bit value from the high half of M1 with the 32-bit
283  value from the high half of M2. */
284 extern __inline __m64
285  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
286  _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) {
287  __m64_union m1, m2, res;
288 
289  m1.as_m64 = __m1;
290  m2.as_m64 = __m2;
291 
292  res.as_int[0] = m1.as_int[1];
293  res.as_int[1] = m2.as_int[1];
294 
295  return (__m64)res.as_m64;
296 }
297 
298 extern __inline __m64
299  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
300  _m_punpckhdq(__m64 __m1, __m64 __m2) {
301  return _mm_unpackhi_pi32(__m1, __m2);
302 }
303 /* Interleave the four 8-bit values from the low half of M1 with the four
304  8-bit values from the low half of M2. */
305 extern __inline __m64
306  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
307  _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) {
308 #if _ARCH_PWR8
309  __vector unsigned char a, b, c;
310 
311  a = (__vector unsigned char)vec_splats(__m1);
312  b = (__vector unsigned char)vec_splats(__m2);
313  c = vec_mergel(a, b);
314  return (__m64)((__vector long long)c)[0];
315 #else
316  __m64_union m1, m2, res;
317 
318  m1.as_m64 = __m1;
319  m2.as_m64 = __m2;
320 
321  res.as_char[0] = m1.as_char[0];
322  res.as_char[1] = m2.as_char[0];
323  res.as_char[2] = m1.as_char[1];
324  res.as_char[3] = m2.as_char[1];
325  res.as_char[4] = m1.as_char[2];
326  res.as_char[5] = m2.as_char[2];
327  res.as_char[6] = m1.as_char[3];
328  res.as_char[7] = m2.as_char[3];
329 
330  return (__m64)res.as_m64;
331 #endif
332 }
333 
334 extern __inline __m64
335  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
336  _m_punpcklbw(__m64 __m1, __m64 __m2) {
337  return _mm_unpacklo_pi8(__m1, __m2);
338 }
339 /* Interleave the two 16-bit values from the low half of M1 with the two
340  16-bit values from the low half of M2. */
341 extern __inline __m64
342  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
343  _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) {
344  __m64_union m1, m2, res;
345 
346  m1.as_m64 = __m1;
347  m2.as_m64 = __m2;
348 
349  res.as_short[0] = m1.as_short[0];
350  res.as_short[1] = m2.as_short[0];
351  res.as_short[2] = m1.as_short[1];
352  res.as_short[3] = m2.as_short[1];
353 
354  return (__m64)res.as_m64;
355 }
356 
357 extern __inline __m64
358  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
359  _m_punpcklwd(__m64 __m1, __m64 __m2) {
360  return _mm_unpacklo_pi16(__m1, __m2);
361 }
362 
363 /* Interleave the 32-bit value from the low half of M1 with the 32-bit
364  value from the low half of M2. */
365 extern __inline __m64
366  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
367  _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) {
368  __m64_union m1, m2, res;
369 
370  m1.as_m64 = __m1;
371  m2.as_m64 = __m2;
372 
373  res.as_int[0] = m1.as_int[0];
374  res.as_int[1] = m2.as_int[0];
375 
376  return (__m64)res.as_m64;
377 }
378 
379 extern __inline __m64
380  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
381  _m_punpckldq(__m64 __m1, __m64 __m2) {
382  return _mm_unpacklo_pi32(__m1, __m2);
383 }
384 
385 /* Add the 8-bit values in M1 to the 8-bit values in M2. */
386 extern __inline __m64
387  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
388  _mm_add_pi8(__m64 __m1, __m64 __m2) {
389 #if _ARCH_PWR8
390  __vector signed char a, b, c;
391 
392  a = (__vector signed char)vec_splats(__m1);
393  b = (__vector signed char)vec_splats(__m2);
394  c = vec_add(a, b);
395  return (__m64)((__vector long long)c)[0];
396 #else
397  __m64_union m1, m2, res;
398 
399  m1.as_m64 = __m1;
400  m2.as_m64 = __m2;
401 
402  res.as_char[0] = m1.as_char[0] + m2.as_char[0];
403  res.as_char[1] = m1.as_char[1] + m2.as_char[1];
404  res.as_char[2] = m1.as_char[2] + m2.as_char[2];
405  res.as_char[3] = m1.as_char[3] + m2.as_char[3];
406  res.as_char[4] = m1.as_char[4] + m2.as_char[4];
407  res.as_char[5] = m1.as_char[5] + m2.as_char[5];
408  res.as_char[6] = m1.as_char[6] + m2.as_char[6];
409  res.as_char[7] = m1.as_char[7] + m2.as_char[7];
410 
411  return (__m64)res.as_m64;
412 #endif
413 }
414 
415 extern __inline __m64
416  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
417  _m_paddb(__m64 __m1, __m64 __m2) {
418  return _mm_add_pi8(__m1, __m2);
419 }
420 
421 /* Add the 16-bit values in M1 to the 16-bit values in M2. */
422 extern __inline __m64
423  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
424  _mm_add_pi16(__m64 __m1, __m64 __m2) {
425 #if _ARCH_PWR8
426  __vector signed short a, b, c;
427 
428  a = (__vector signed short)vec_splats(__m1);
429  b = (__vector signed short)vec_splats(__m2);
430  c = vec_add(a, b);
431  return (__m64)((__vector long long)c)[0];
432 #else
433  __m64_union m1, m2, res;
434 
435  m1.as_m64 = __m1;
436  m2.as_m64 = __m2;
437 
438  res.as_short[0] = m1.as_short[0] + m2.as_short[0];
439  res.as_short[1] = m1.as_short[1] + m2.as_short[1];
440  res.as_short[2] = m1.as_short[2] + m2.as_short[2];
441  res.as_short[3] = m1.as_short[3] + m2.as_short[3];
442 
443  return (__m64)res.as_m64;
444 #endif
445 }
446 
447 extern __inline __m64
448  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
449  _m_paddw(__m64 __m1, __m64 __m2) {
450  return _mm_add_pi16(__m1, __m2);
451 }
452 
453 /* Add the 32-bit values in M1 to the 32-bit values in M2. */
454 extern __inline __m64
455  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
456  _mm_add_pi32(__m64 __m1, __m64 __m2) {
457 #if _ARCH_PWR9
458  __vector signed int a, b, c;
459 
460  a = (__vector signed int)vec_splats(__m1);
461  b = (__vector signed int)vec_splats(__m2);
462  c = vec_add(a, b);
463  return (__m64)((__vector long long)c)[0];
464 #else
465  __m64_union m1, m2, res;
466 
467  m1.as_m64 = __m1;
468  m2.as_m64 = __m2;
469 
470  res.as_int[0] = m1.as_int[0] + m2.as_int[0];
471  res.as_int[1] = m1.as_int[1] + m2.as_int[1];
472 
473  return (__m64)res.as_m64;
474 #endif
475 }
476 
477 extern __inline __m64
478  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
479  _m_paddd(__m64 __m1, __m64 __m2) {
480  return _mm_add_pi32(__m1, __m2);
481 }
482 
483 /* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
484 extern __inline __m64
485  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
486  _mm_sub_pi8(__m64 __m1, __m64 __m2) {
487 #if _ARCH_PWR8
488  __vector signed char a, b, c;
489 
490  a = (__vector signed char)vec_splats(__m1);
491  b = (__vector signed char)vec_splats(__m2);
492  c = vec_sub(a, b);
493  return (__m64)((__vector long long)c)[0];
494 #else
495  __m64_union m1, m2, res;
496 
497  m1.as_m64 = __m1;
498  m2.as_m64 = __m2;
499 
500  res.as_char[0] = m1.as_char[0] - m2.as_char[0];
501  res.as_char[1] = m1.as_char[1] - m2.as_char[1];
502  res.as_char[2] = m1.as_char[2] - m2.as_char[2];
503  res.as_char[3] = m1.as_char[3] - m2.as_char[3];
504  res.as_char[4] = m1.as_char[4] - m2.as_char[4];
505  res.as_char[5] = m1.as_char[5] - m2.as_char[5];
506  res.as_char[6] = m1.as_char[6] - m2.as_char[6];
507  res.as_char[7] = m1.as_char[7] - m2.as_char[7];
508 
509  return (__m64)res.as_m64;
510 #endif
511 }
512 
513 extern __inline __m64
514  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
515  _m_psubb(__m64 __m1, __m64 __m2) {
516  return _mm_sub_pi8(__m1, __m2);
517 }
518 
519 /* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
520 extern __inline __m64
521  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
522  _mm_sub_pi16(__m64 __m1, __m64 __m2) {
523 #if _ARCH_PWR8
524  __vector signed short a, b, c;
525 
526  a = (__vector signed short)vec_splats(__m1);
527  b = (__vector signed short)vec_splats(__m2);
528  c = vec_sub(a, b);
529  return (__m64)((__vector long long)c)[0];
530 #else
531  __m64_union m1, m2, res;
532 
533  m1.as_m64 = __m1;
534  m2.as_m64 = __m2;
535 
536  res.as_short[0] = m1.as_short[0] - m2.as_short[0];
537  res.as_short[1] = m1.as_short[1] - m2.as_short[1];
538  res.as_short[2] = m1.as_short[2] - m2.as_short[2];
539  res.as_short[3] = m1.as_short[3] - m2.as_short[3];
540 
541  return (__m64)res.as_m64;
542 #endif
543 }
544 
545 extern __inline __m64
546  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
547  _m_psubw(__m64 __m1, __m64 __m2) {
548  return _mm_sub_pi16(__m1, __m2);
549 }
550 
551 /* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
552 extern __inline __m64
553  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
554  _mm_sub_pi32(__m64 __m1, __m64 __m2) {
555 #if _ARCH_PWR9
556  __vector signed int a, b, c;
557 
558  a = (__vector signed int)vec_splats(__m1);
559  b = (__vector signed int)vec_splats(__m2);
560  c = vec_sub(a, b);
561  return (__m64)((__vector long long)c)[0];
562 #else
563  __m64_union m1, m2, res;
564 
565  m1.as_m64 = __m1;
566  m2.as_m64 = __m2;
567 
568  res.as_int[0] = m1.as_int[0] - m2.as_int[0];
569  res.as_int[1] = m1.as_int[1] - m2.as_int[1];
570 
571  return (__m64)res.as_m64;
572 #endif
573 }
574 
575 extern __inline __m64
576  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
577  _m_psubd(__m64 __m1, __m64 __m2) {
578  return _mm_sub_pi32(__m1, __m2);
579 }
580 
581 extern __inline __m64
582  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
583  _mm_add_si64(__m64 __m1, __m64 __m2) {
584  return (__m1 + __m2);
585 }
586 
587 extern __inline __m64
588  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
589  _mm_sub_si64(__m64 __m1, __m64 __m2) {
590  return (__m1 - __m2);
591 }
592 
593 /* Shift the 64-bit value in M left by COUNT. */
594 extern __inline __m64
595  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
597  return (__m << __count);
598 }
599 
600 extern __inline __m64
601  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
602  _m_psllq(__m64 __m, __m64 __count) {
603  return _mm_sll_si64(__m, __count);
604 }
605 
606 extern __inline __m64
607  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
608  _mm_slli_si64(__m64 __m, const int __count) {
609  return (__m << __count);
610 }
611 
612 extern __inline __m64
613  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
614  _m_psllqi(__m64 __m, const int __count) {
615  return _mm_slli_si64(__m, __count);
616 }
617 
618 /* Shift the 64-bit value in M left by COUNT; shift in zeros. */
619 extern __inline __m64
620  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
621  _mm_srl_si64(__m64 __m, __m64 __count) {
622  return (__m >> __count);
623 }
624 
625 extern __inline __m64
626  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
627  _m_psrlq(__m64 __m, __m64 __count) {
628  return _mm_srl_si64(__m, __count);
629 }
630 
631 extern __inline __m64
632  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
633  _mm_srli_si64(__m64 __m, const int __count) {
634  return (__m >> __count);
635 }
636 
637 extern __inline __m64
638  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
639  _m_psrlqi(__m64 __m, const int __count) {
640  return _mm_srli_si64(__m, __count);
641 }
642 
643 /* Bit-wise AND the 64-bit values in M1 and M2. */
644 extern __inline __m64
645  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
646  _mm_and_si64(__m64 __m1, __m64 __m2) {
647  return (__m1 & __m2);
648 }
649 
650 extern __inline __m64
651  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
652  _m_pand(__m64 __m1, __m64 __m2) {
653  return _mm_and_si64(__m1, __m2);
654 }
655 
656 /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
657  64-bit value in M2. */
658 extern __inline __m64
659  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
660  _mm_andnot_si64(__m64 __m1, __m64 __m2) {
661  return (~__m1 & __m2);
662 }
663 
664 extern __inline __m64
665  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
666  _m_pandn(__m64 __m1, __m64 __m2) {
667  return _mm_andnot_si64(__m1, __m2);
668 }
669 
670 /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
671 extern __inline __m64
672  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
673  _mm_or_si64(__m64 __m1, __m64 __m2) {
674  return (__m1 | __m2);
675 }
676 
677 extern __inline __m64
678  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
679  _m_por(__m64 __m1, __m64 __m2) {
680  return _mm_or_si64(__m1, __m2);
681 }
682 
683 /* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
684 extern __inline __m64
685  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
686  _mm_xor_si64(__m64 __m1, __m64 __m2) {
687  return (__m1 ^ __m2);
688 }
689 
690 extern __inline __m64
691  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
692  _m_pxor(__m64 __m1, __m64 __m2) {
693  return _mm_xor_si64(__m1, __m2);
694 }
695 
696 /* Creates a 64-bit zero. */
697 extern __inline __m64
698  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
699  _mm_setzero_si64(void) {
700  return (__m64)0;
701 }
702 
703 /* Compare eight 8-bit values. The result of the comparison is 0xFF if the
704  test is true and zero if false. */
705 extern __inline __m64
706  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
707  _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) {
708 #if defined(_ARCH_PWR6) && defined(__powerpc64__)
709  __m64 res;
710  __asm__("cmpb %0,%1,%2;\n" : "=r"(res) : "r"(__m1), "r"(__m2) :);
711  return (res);
712 #else
713  __m64_union m1, m2, res;
714 
715  m1.as_m64 = __m1;
716  m2.as_m64 = __m2;
717 
718  res.as_char[0] = (m1.as_char[0] == m2.as_char[0]) ? -1 : 0;
719  res.as_char[1] = (m1.as_char[1] == m2.as_char[1]) ? -1 : 0;
720  res.as_char[2] = (m1.as_char[2] == m2.as_char[2]) ? -1 : 0;
721  res.as_char[3] = (m1.as_char[3] == m2.as_char[3]) ? -1 : 0;
722  res.as_char[4] = (m1.as_char[4] == m2.as_char[4]) ? -1 : 0;
723  res.as_char[5] = (m1.as_char[5] == m2.as_char[5]) ? -1 : 0;
724  res.as_char[6] = (m1.as_char[6] == m2.as_char[6]) ? -1 : 0;
725  res.as_char[7] = (m1.as_char[7] == m2.as_char[7]) ? -1 : 0;
726 
727  return (__m64)res.as_m64;
728 #endif
729 }
730 
731 extern __inline __m64
732  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
733  _m_pcmpeqb(__m64 __m1, __m64 __m2) {
734  return _mm_cmpeq_pi8(__m1, __m2);
735 }
736 
737 extern __inline __m64
738  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
739  _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) {
740 #if _ARCH_PWR8
741  __vector signed char a, b, c;
742 
743  a = (__vector signed char)vec_splats(__m1);
744  b = (__vector signed char)vec_splats(__m2);
745  c = (__vector signed char)vec_cmpgt(a, b);
746  return (__m64)((__vector long long)c)[0];
747 #else
748  __m64_union m1, m2, res;
749 
750  m1.as_m64 = __m1;
751  m2.as_m64 = __m2;
752 
753  res.as_char[0] = (m1.as_char[0] > m2.as_char[0]) ? -1 : 0;
754  res.as_char[1] = (m1.as_char[1] > m2.as_char[1]) ? -1 : 0;
755  res.as_char[2] = (m1.as_char[2] > m2.as_char[2]) ? -1 : 0;
756  res.as_char[3] = (m1.as_char[3] > m2.as_char[3]) ? -1 : 0;
757  res.as_char[4] = (m1.as_char[4] > m2.as_char[4]) ? -1 : 0;
758  res.as_char[5] = (m1.as_char[5] > m2.as_char[5]) ? -1 : 0;
759  res.as_char[6] = (m1.as_char[6] > m2.as_char[6]) ? -1 : 0;
760  res.as_char[7] = (m1.as_char[7] > m2.as_char[7]) ? -1 : 0;
761 
762  return (__m64)res.as_m64;
763 #endif
764 }
765 
766 extern __inline __m64
767  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
768  _m_pcmpgtb(__m64 __m1, __m64 __m2) {
769  return _mm_cmpgt_pi8(__m1, __m2);
770 }
771 
772 /* Compare four 16-bit values. The result of the comparison is 0xFFFF if
773  the test is true and zero if false. */
774 extern __inline __m64
775  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
776  _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) {
777 #if _ARCH_PWR8
778  __vector signed short a, b, c;
779 
780  a = (__vector signed short)vec_splats(__m1);
781  b = (__vector signed short)vec_splats(__m2);
782  c = (__vector signed short)vec_cmpeq(a, b);
783  return (__m64)((__vector long long)c)[0];
784 #else
785  __m64_union m1, m2, res;
786 
787  m1.as_m64 = __m1;
788  m2.as_m64 = __m2;
789 
790  res.as_short[0] = (m1.as_short[0] == m2.as_short[0]) ? -1 : 0;
791  res.as_short[1] = (m1.as_short[1] == m2.as_short[1]) ? -1 : 0;
792  res.as_short[2] = (m1.as_short[2] == m2.as_short[2]) ? -1 : 0;
793  res.as_short[3] = (m1.as_short[3] == m2.as_short[3]) ? -1 : 0;
794 
795  return (__m64)res.as_m64;
796 #endif
797 }
798 
799 extern __inline __m64
800  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
801  _m_pcmpeqw(__m64 __m1, __m64 __m2) {
802  return _mm_cmpeq_pi16(__m1, __m2);
803 }
804 
805 extern __inline __m64
806  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
807  _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) {
808 #if _ARCH_PWR8
809  __vector signed short a, b, c;
810 
811  a = (__vector signed short)vec_splats(__m1);
812  b = (__vector signed short)vec_splats(__m2);
813  c = (__vector signed short)vec_cmpgt(a, b);
814  return (__m64)((__vector long long)c)[0];
815 #else
816  __m64_union m1, m2, res;
817 
818  m1.as_m64 = __m1;
819  m2.as_m64 = __m2;
820 
821  res.as_short[0] = (m1.as_short[0] > m2.as_short[0]) ? -1 : 0;
822  res.as_short[1] = (m1.as_short[1] > m2.as_short[1]) ? -1 : 0;
823  res.as_short[2] = (m1.as_short[2] > m2.as_short[2]) ? -1 : 0;
824  res.as_short[3] = (m1.as_short[3] > m2.as_short[3]) ? -1 : 0;
825 
826  return (__m64)res.as_m64;
827 #endif
828 }
829 
830 extern __inline __m64
831  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
832  _m_pcmpgtw(__m64 __m1, __m64 __m2) {
833  return _mm_cmpgt_pi16(__m1, __m2);
834 }
835 
836 /* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
837  the test is true and zero if false. */
838 extern __inline __m64
839  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
840  _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) {
841 #if _ARCH_PWR9
842  __vector signed int a, b, c;
843 
844  a = (__vector signed int)vec_splats(__m1);
845  b = (__vector signed int)vec_splats(__m2);
846  c = (__vector signed int)vec_cmpeq(a, b);
847  return (__m64)((__vector long long)c)[0];
848 #else
849  __m64_union m1, m2, res;
850 
851  m1.as_m64 = __m1;
852  m2.as_m64 = __m2;
853 
854  res.as_int[0] = (m1.as_int[0] == m2.as_int[0]) ? -1 : 0;
855  res.as_int[1] = (m1.as_int[1] == m2.as_int[1]) ? -1 : 0;
856 
857  return (__m64)res.as_m64;
858 #endif
859 }
860 
861 extern __inline __m64
862  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
863  _m_pcmpeqd(__m64 __m1, __m64 __m2) {
864  return _mm_cmpeq_pi32(__m1, __m2);
865 }
866 
867 extern __inline __m64
868  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
869  _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) {
870 #if _ARCH_PWR9
871  __vector signed int a, b, c;
872 
873  a = (__vector signed int)vec_splats(__m1);
874  b = (__vector signed int)vec_splats(__m2);
875  c = (__vector signed int)vec_cmpgt(a, b);
876  return (__m64)((__vector long long)c)[0];
877 #else
878  __m64_union m1, m2, res;
879 
880  m1.as_m64 = __m1;
881  m2.as_m64 = __m2;
882 
883  res.as_int[0] = (m1.as_int[0] > m2.as_int[0]) ? -1 : 0;
884  res.as_int[1] = (m1.as_int[1] > m2.as_int[1]) ? -1 : 0;
885 
886  return (__m64)res.as_m64;
887 #endif
888 }
889 
890 extern __inline __m64
891  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
892  _m_pcmpgtd(__m64 __m1, __m64 __m2) {
893  return _mm_cmpgt_pi32(__m1, __m2);
894 }
895 
896 #if _ARCH_PWR8
897 /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
898  saturated arithmetic. */
899 extern __inline __m64
900  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
901  _mm_adds_pi8(__m64 __m1, __m64 __m2) {
902  __vector signed char a, b, c;
903 
904  a = (__vector signed char)vec_splats(__m1);
905  b = (__vector signed char)vec_splats(__m2);
906  c = vec_adds(a, b);
907  return (__m64)((__vector long long)c)[0];
908 }
909 
910 extern __inline __m64
911  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
912  _m_paddsb(__m64 __m1, __m64 __m2) {
913  return _mm_adds_pi8(__m1, __m2);
914 }
915 /* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
916  saturated arithmetic. */
917 extern __inline __m64
918  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
919  _mm_adds_pi16(__m64 __m1, __m64 __m2) {
920  __vector signed short a, b, c;
921 
922  a = (__vector signed short)vec_splats(__m1);
923  b = (__vector signed short)vec_splats(__m2);
924  c = vec_adds(a, b);
925  return (__m64)((__vector long long)c)[0];
926 }
927 
928 extern __inline __m64
929  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
930  _m_paddsw(__m64 __m1, __m64 __m2) {
931  return _mm_adds_pi16(__m1, __m2);
932 }
933 /* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
934  saturated arithmetic. */
935 extern __inline __m64
936  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
937  _mm_adds_pu8(__m64 __m1, __m64 __m2) {
938  __vector unsigned char a, b, c;
939 
940  a = (__vector unsigned char)vec_splats(__m1);
941  b = (__vector unsigned char)vec_splats(__m2);
942  c = vec_adds(a, b);
943  return (__m64)((__vector long long)c)[0];
944 }
945 
946 extern __inline __m64
947  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
948  _m_paddusb(__m64 __m1, __m64 __m2) {
949  return _mm_adds_pu8(__m1, __m2);
950 }
951 
952 /* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
953  saturated arithmetic. */
954 extern __inline __m64
955  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
956  _mm_adds_pu16(__m64 __m1, __m64 __m2) {
957  __vector unsigned short a, b, c;
958 
959  a = (__vector unsigned short)vec_splats(__m1);
960  b = (__vector unsigned short)vec_splats(__m2);
961  c = vec_adds(a, b);
962  return (__m64)((__vector long long)c)[0];
963 }
964 
965 extern __inline __m64
966  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
967  _m_paddusw(__m64 __m1, __m64 __m2) {
968  return _mm_adds_pu16(__m1, __m2);
969 }
970 
971 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
972  saturating arithmetic. */
973 extern __inline __m64
974  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
975  _mm_subs_pi8(__m64 __m1, __m64 __m2) {
976  __vector signed char a, b, c;
977 
978  a = (__vector signed char)vec_splats(__m1);
979  b = (__vector signed char)vec_splats(__m2);
980  c = vec_subs(a, b);
981  return (__m64)((__vector long long)c)[0];
982 }
983 
984 extern __inline __m64
985  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
986  _m_psubsb(__m64 __m1, __m64 __m2) {
987  return _mm_subs_pi8(__m1, __m2);
988 }
989 
990 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
991  signed saturating arithmetic. */
992 extern __inline __m64
993  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
994  _mm_subs_pi16(__m64 __m1, __m64 __m2) {
995  __vector signed short a, b, c;
996 
997  a = (__vector signed short)vec_splats(__m1);
998  b = (__vector signed short)vec_splats(__m2);
999  c = vec_subs(a, b);
1000  return (__m64)((__vector long long)c)[0];
1001 }
1002 
1003 extern __inline __m64
1004  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1005  _m_psubsw(__m64 __m1, __m64 __m2) {
1006  return _mm_subs_pi16(__m1, __m2);
1007 }
1008 
1009 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
1010  unsigned saturating arithmetic. */
1011 extern __inline __m64
1012  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1013  _mm_subs_pu8(__m64 __m1, __m64 __m2) {
1014  __vector unsigned char a, b, c;
1015 
1016  a = (__vector unsigned char)vec_splats(__m1);
1017  b = (__vector unsigned char)vec_splats(__m2);
1018  c = vec_subs(a, b);
1019  return (__m64)((__vector long long)c)[0];
1020 }
1021 
1022 extern __inline __m64
1023  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1024  _m_psubusb(__m64 __m1, __m64 __m2) {
1025  return _mm_subs_pu8(__m1, __m2);
1026 }
1027 
1028 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
1029  unsigned saturating arithmetic. */
1030 extern __inline __m64
1031  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1032  _mm_subs_pu16(__m64 __m1, __m64 __m2) {
1033  __vector unsigned short a, b, c;
1034 
1035  a = (__vector unsigned short)vec_splats(__m1);
1036  b = (__vector unsigned short)vec_splats(__m2);
1037  c = vec_subs(a, b);
1038  return (__m64)((__vector long long)c)[0];
1039 }
1040 
1041 extern __inline __m64
1042  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1043  _m_psubusw(__m64 __m1, __m64 __m2) {
1044  return _mm_subs_pu16(__m1, __m2);
1045 }
1046 
1047 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
1048  four 32-bit intermediate results, which are then summed by pairs to
1049  produce two 32-bit results. */
1050 extern __inline __m64
1051  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1052  _mm_madd_pi16(__m64 __m1, __m64 __m2) {
1053  __vector signed short a, b;
1054  __vector signed int c;
1055  __vector signed int zero = {0, 0, 0, 0};
1056 
1057  a = (__vector signed short)vec_splats(__m1);
1058  b = (__vector signed short)vec_splats(__m2);
1059  c = vec_vmsumshm(a, b, zero);
1060  return (__m64)((__vector long long)c)[0];
1061 }
1062 
1063 extern __inline __m64
1064  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1065  _m_pmaddwd(__m64 __m1, __m64 __m2) {
1066  return _mm_madd_pi16(__m1, __m2);
1067 }
1068 /* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
1069  M2 and produce the high 16 bits of the 32-bit results. */
1070 extern __inline __m64
1071  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1072  _mm_mulhi_pi16(__m64 __m1, __m64 __m2) {
1073  __vector signed short a, b;
1074  __vector signed short c;
1075  __vector signed int w0, w1;
1076  __vector unsigned char xform1 = {
1077 #ifdef __LITTLE_ENDIAN__
1078  0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A,
1079  0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
1080 #else
1081  0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x00,
1082  0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15
1083 #endif
1084  };
1085 
1086  a = (__vector signed short)vec_splats(__m1);
1087  b = (__vector signed short)vec_splats(__m2);
1088 
1089  w0 = vec_vmulesh(a, b);
1090  w1 = vec_vmulosh(a, b);
1091  c = (__vector signed short)vec_perm(w0, w1, xform1);
1092 
1093  return (__m64)((__vector long long)c)[0];
1094 }
1095 
1096 extern __inline __m64
1097  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1098  _m_pmulhw(__m64 __m1, __m64 __m2) {
1099  return _mm_mulhi_pi16(__m1, __m2);
1100 }
1101 
1102 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
1103  the low 16 bits of the results. */
1104 extern __inline __m64
1105  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1106  _mm_mullo_pi16(__m64 __m1, __m64 __m2) {
1107  __vector signed short a, b, c;
1108 
1109  a = (__vector signed short)vec_splats(__m1);
1110  b = (__vector signed short)vec_splats(__m2);
1111  c = a * b;
1112  return (__m64)((__vector long long)c)[0];
1113 }
1114 
1115 extern __inline __m64
1116  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1117  _m_pmullw(__m64 __m1, __m64 __m2) {
1118  return _mm_mullo_pi16(__m1, __m2);
1119 }
1120 
1121 /* Shift four 16-bit values in M left by COUNT. */
1122 extern __inline __m64
1123  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1124  _mm_sll_pi16(__m64 __m, __m64 __count) {
1125  __vector signed short m, r;
1126  __vector unsigned short c;
1127 
1128  if (__count <= 15) {
1129  m = (__vector signed short)vec_splats(__m);
1130  c = (__vector unsigned short)vec_splats((unsigned short)__count);
1131  r = vec_sl(m, (__vector unsigned short)c);
1132  return (__m64)((__vector long long)r)[0];
1133  } else
1134  return (0);
1135 }
1136 
1137 extern __inline __m64
1138  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1139  _m_psllw(__m64 __m, __m64 __count) {
1140  return _mm_sll_pi16(__m, __count);
1141 }
1142 
1143 extern __inline __m64
1144  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1145  _mm_slli_pi16(__m64 __m, int __count) {
1146  /* Promote int to long then invoke mm_sll_pi16. */
1147  return _mm_sll_pi16(__m, __count);
1148 }
1149 
1150 extern __inline __m64
1151  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1152  _m_psllwi(__m64 __m, int __count) {
1153  return _mm_slli_pi16(__m, __count);
1154 }
1155 
1156 /* Shift two 32-bit values in M left by COUNT. */
1157 extern __inline __m64
1158  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1159  _mm_sll_pi32(__m64 __m, __m64 __count) {
1160  __m64_union m, res;
1161 
1162  m.as_m64 = __m;
1163 
1164  res.as_int[0] = m.as_int[0] << __count;
1165  res.as_int[1] = m.as_int[1] << __count;
1166  return (res.as_m64);
1167 }
1168 
1169 extern __inline __m64
1170  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1171  _m_pslld(__m64 __m, __m64 __count) {
1172  return _mm_sll_pi32(__m, __count);
1173 }
1174 
1175 extern __inline __m64
1176  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1177  _mm_slli_pi32(__m64 __m, int __count) {
1178  /* Promote int to long then invoke mm_sll_pi32. */
1179  return _mm_sll_pi32(__m, __count);
1180 }
1181 
1182 extern __inline __m64
1183  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1184  _m_pslldi(__m64 __m, int __count) {
1185  return _mm_slli_pi32(__m, __count);
1186 }
1187 
1188 /* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
1189 extern __inline __m64
1190  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1191  _mm_sra_pi16(__m64 __m, __m64 __count) {
1192  __vector signed short m, r;
1193  __vector unsigned short c;
1194 
1195  if (__count <= 15) {
1196  m = (__vector signed short)vec_splats(__m);
1197  c = (__vector unsigned short)vec_splats((unsigned short)__count);
1198  r = vec_sra(m, (__vector unsigned short)c);
1199  return (__m64)((__vector long long)r)[0];
1200  } else
1201  return (0);
1202 }
1203 
1204 extern __inline __m64
1205  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1206  _m_psraw(__m64 __m, __m64 __count) {
1207  return _mm_sra_pi16(__m, __count);
1208 }
1209 
1210 extern __inline __m64
1211  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1212  _mm_srai_pi16(__m64 __m, int __count) {
1213  /* Promote int to long then invoke mm_sra_pi32. */
1214  return _mm_sra_pi16(__m, __count);
1215 }
1216 
1217 extern __inline __m64
1218  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1219  _m_psrawi(__m64 __m, int __count) {
1220  return _mm_srai_pi16(__m, __count);
1221 }
1222 
1223 /* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
1224 extern __inline __m64
1225  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1226  _mm_sra_pi32(__m64 __m, __m64 __count) {
1227  __m64_union m, res;
1228 
1229  m.as_m64 = __m;
1230 
1231  res.as_int[0] = m.as_int[0] >> __count;
1232  res.as_int[1] = m.as_int[1] >> __count;
1233  return (res.as_m64);
1234 }
1235 
1236 extern __inline __m64
1237  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1238  _m_psrad(__m64 __m, __m64 __count) {
1239  return _mm_sra_pi32(__m, __count);
1240 }
1241 
1242 extern __inline __m64
1243  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1244  _mm_srai_pi32(__m64 __m, int __count) {
1245  /* Promote int to long then invoke mm_sra_pi32. */
1246  return _mm_sra_pi32(__m, __count);
1247 }
1248 
1249 extern __inline __m64
1250  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1251  _m_psradi(__m64 __m, int __count) {
1252  return _mm_srai_pi32(__m, __count);
1253 }
1254 
1255 /* Shift four 16-bit values in M right by COUNT; shift in zeros. */
1256 extern __inline __m64
1257  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1258  _mm_srl_pi16(__m64 __m, __m64 __count) {
1259  __vector unsigned short m, r;
1260  __vector unsigned short c;
1261 
1262  if (__count <= 15) {
1263  m = (__vector unsigned short)vec_splats(__m);
1264  c = (__vector unsigned short)vec_splats((unsigned short)__count);
1265  r = vec_sr(m, (__vector unsigned short)c);
1266  return (__m64)((__vector long long)r)[0];
1267  } else
1268  return (0);
1269 }
1270 
1271 extern __inline __m64
1272  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1273  _m_psrlw(__m64 __m, __m64 __count) {
1274  return _mm_srl_pi16(__m, __count);
1275 }
1276 
1277 extern __inline __m64
1278  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1279  _mm_srli_pi16(__m64 __m, int __count) {
1280  /* Promote int to long then invoke mm_sra_pi32. */
1281  return _mm_srl_pi16(__m, __count);
1282 }
1283 
1284 extern __inline __m64
1285  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1286  _m_psrlwi(__m64 __m, int __count) {
1287  return _mm_srli_pi16(__m, __count);
1288 }
1289 
1290 /* Shift two 32-bit values in M right by COUNT; shift in zeros. */
1291 extern __inline __m64
1292  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1293  _mm_srl_pi32(__m64 __m, __m64 __count) {
1294  __m64_union m, res;
1295 
1296  m.as_m64 = __m;
1297 
1298  res.as_int[0] = (unsigned int)m.as_int[0] >> __count;
1299  res.as_int[1] = (unsigned int)m.as_int[1] >> __count;
1300  return (res.as_m64);
1301 }
1302 
1303 extern __inline __m64
1304  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1305  _m_psrld(__m64 __m, __m64 __count) {
1306  return _mm_srl_pi32(__m, __count);
1307 }
1308 
1309 extern __inline __m64
1310  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1311  _mm_srli_pi32(__m64 __m, int __count) {
1312  /* Promote int to long then invoke mm_srl_pi32. */
1313  return _mm_srl_pi32(__m, __count);
1314 }
1315 
1316 extern __inline __m64
1317  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1318  _m_psrldi(__m64 __m, int __count) {
1319  return _mm_srli_pi32(__m, __count);
1320 }
1321 #endif /* _ARCH_PWR8 */
1322 
1323 /* Creates a vector of two 32-bit values; I0 is least significant. */
1324 extern __inline __m64
1325  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1326  _mm_set_pi32(int __i1, int __i0) {
1327  __m64_union res;
1328 
1329  res.as_int[0] = __i0;
1330  res.as_int[1] = __i1;
1331  return (res.as_m64);
1332 }
1333 
1334 /* Creates a vector of four 16-bit values; W0 is least significant. */
1335 extern __inline __m64
1336  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1337  _mm_set_pi16(short __w3, short __w2, short __w1, short __w0) {
1338  __m64_union res;
1339 
1340  res.as_short[0] = __w0;
1341  res.as_short[1] = __w1;
1342  res.as_short[2] = __w2;
1343  res.as_short[3] = __w3;
1344  return (res.as_m64);
1345 }
1346 
1347 /* Creates a vector of eight 8-bit values; B0 is least significant. */
1348 extern __inline __m64
1349  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1350  _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3,
1351  char __b2, char __b1, char __b0) {
1352  __m64_union res;
1353 
1354  res.as_char[0] = __b0;
1355  res.as_char[1] = __b1;
1356  res.as_char[2] = __b2;
1357  res.as_char[3] = __b3;
1358  res.as_char[4] = __b4;
1359  res.as_char[5] = __b5;
1360  res.as_char[6] = __b6;
1361  res.as_char[7] = __b7;
1362  return (res.as_m64);
1363 }
1364 
1365 /* Similar, but with the arguments in reverse order. */
1366 extern __inline __m64
1367  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1369  __m64_union res;
1370 
1371  res.as_int[0] = __i0;
1372  res.as_int[1] = __i1;
1373  return (res.as_m64);
1374 }
1375 
1376 extern __inline __m64
1377  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1378  _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) {
1379  return _mm_set_pi16(__w3, __w2, __w1, __w0);
1380 }
1381 
1382 extern __inline __m64
1383  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1384  _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4,
1385  char __b5, char __b6, char __b7) {
1386  return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
1387 }
1388 
1389 /* Creates a vector of two 32-bit values, both elements containing I. */
1390 extern __inline __m64
1391  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1392  _mm_set1_pi32(int __i) {
1393  __m64_union res;
1394 
1395  res.as_int[0] = __i;
1396  res.as_int[1] = __i;
1397  return (res.as_m64);
1398 }
1399 
1400 /* Creates a vector of four 16-bit values, all elements containing W. */
1401 extern __inline __m64
1402  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1403  _mm_set1_pi16(short __w) {
1404 #if _ARCH_PWR9
1405  __vector signed short w;
1406 
1407  w = (__vector signed short)vec_splats(__w);
1408  return (__m64)((__vector long long)w)[0];
1409 #else
1410  __m64_union res;
1411 
1412  res.as_short[0] = __w;
1413  res.as_short[1] = __w;
1414  res.as_short[2] = __w;
1415  res.as_short[3] = __w;
1416  return (res.as_m64);
1417 #endif
1418 }
1419 
1420 /* Creates a vector of eight 8-bit values, all elements containing B. */
1421 extern __inline __m64
1422  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1423  _mm_set1_pi8(signed char __b) {
1424 #if _ARCH_PWR8
1425  __vector signed char b;
1426 
1427  b = (__vector signed char)vec_splats(__b);
1428  return (__m64)((__vector long long)b)[0];
1429 #else
1430  __m64_union res;
1431 
1432  res.as_char[0] = __b;
1433  res.as_char[1] = __b;
1434  res.as_char[2] = __b;
1435  res.as_char[3] = __b;
1436  res.as_char[4] = __b;
1437  res.as_char[5] = __b;
1438  res.as_char[6] = __b;
1439  res.as_char[7] = __b;
1440  return (res.as_m64);
1441 #endif
1442 }
1443 #endif /* _MMINTRIN_H_INCLUDED */
static __inline__ vector signed char __ATTRS_o_ai vec_packs(vector short __a, vector short __b)
Definition: altivec.h:7014
__inline __m64 __m64 __count
Definition: mmintrin.h:596
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
Definition: mmintrin.h:308
#define _m_pcmpgtb
Definition: mmintrin.h:1553
#define _m_empty
Definition: mmintrin.h:1499
#define _m_psllwi
Definition: mmintrin.h:1531
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
Definition: mmintrin.h:258
return vec_perm((__v4sf) __A,(__v4sf) __B,(__vector unsigned char) t)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi16(short __w)
Constructs a 64-bit integer vector of [4 x i16], with each of the 16-bit integer vector elements set ...
Definition: mmintrin.h:1397
__v8hi zero
Definition: emmintrin.h:1397
#define _m_punpckldq
Definition: mmintrin.h:1512
m1 as_m64
Definition: mmintrin.h:237
#define _m_pcmpeqw
Definition: mmintrin.h:1551
__m64_union
Definition: mmintrin.h:52
return vec_sel(__B, __A, m)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setzero_si64(void)
Constructs a 64-bit integer vector initialized to zero.
Definition: mmintrin.h:1282
#define _m_paddsb
Definition: mmintrin.h:1516
static __inline__ vector signed char __ATTRS_o_ai vec_mergel(vector signed char __a, vector signed char __b)
Definition: altivec.h:4746
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi16(__m64 __m1, __m64 __m2)
Adds each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] to the corres...
Definition: mmintrin.h:437
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi16(__m64 __m1, __m64 __m2)
Converts 16-bit signed integers from both 64-bit integer vector parameters of [4 x i16] into 8-bit si...
Definition: mmintrin.h:127
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
Definition: mmintrin.h:237
#define _m_punpckhwd
Definition: mmintrin.h:1508
#define _m_psrld
Definition: mmintrin.h:1542
#define _m_punpckhbw
Definition: mmintrin.h:1507
#define _m_packsswb
Definition: mmintrin.h:1504
#define _m_punpcklbw
Definition: mmintrin.h:1510
static __inline__ vector signed char __ATTRS_o_ai vec_adds(vector signed char __a, vector signed char __b)
Definition: altivec.h:560
#define _m_paddusw
Definition: mmintrin.h:1519
__inline __m64 char char char char char char char __b0
Definition: mmintrin.h:1351
__inline __m64 char char char char char char char __b7
Definition: mmintrin.h:1385
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
Definition: mmintrin.h:1203
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
Definition: mmintrin.h:329
#define _m_pcmpgtd
Definition: mmintrin.h:1555
#define _m_psrlw
Definition: mmintrin.h:1540
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pu16(__m64 __m1, __m64 __m2)
Converts 16-bit signed integers from both 64-bit integer vector parameters of [4 x i16] into 8-bit un...
Definition: mmintrin.h:187
static __inline__ vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a, vector signed char __b)
Definition: altivec.h:2140
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_xor_si64(__m64 __m1, __m64 __m2)
Performs a bitwise exclusive OR of two 64-bit integer vectors.
Definition: mmintrin.h:1137
#define _m_pand
Definition: mmintrin.h:1546
__inline __m64 int __i0
Definition: mmintrin.h:1326
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
Definition: mmintrin.h:1247
#define _m_psubb
Definition: mmintrin.h:1520
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi8(char __b)
Constructs a 64-bit integer vector of [8 x i8], with each of the 8-bit integer vector elements set to...
Definition: mmintrin.h:1415
#define _m_psllq
Definition: mmintrin.h:1534
c
Definition: emmintrin.h:306
static __inline__ vector signed char __ATTRS_o_ai vec_add(vector signed char __a, vector signed char __b)
Definition: altivec.h:198
#define _m_to_int
Definition: mmintrin.h:1502
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
Definition: mmintrin.h:214
w0
Definition: emmintrin.h:1415
#define _m_pmaddwd
Definition: mmintrin.h:1527
#define _m_psubw
Definition: mmintrin.h:1521
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit unsigned integer element of the second 64-bit integer vector of [8 x i8] from th...
Definition: mmintrin.h:614
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu16(__m64 __m1, __m64 __m2)
Adds each 16-bit unsigned integer element of the first 64-bit integer vector of [4 x i16] to the corr...
Definition: mmintrin.h:481
b
Definition: emmintrin.h:321
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
Constructs a 64-bit integer vector initialized with the specified 16-bit integer values.
Definition: mmintrin.h:1326
#define _m_psrlqi
Definition: mmintrin.h:1545
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi8(__m64 __m1, __m64 __m2)
Adds each 8-bit signed integer element of the first 64-bit integer vector of [8 x i8] to the correspo...
Definition: mmintrin.h:414
__inline __m64 char char char char char char __b1
Definition: mmintrin.h:1350
__inline __m64 int __i1
Definition: mmintrin.h:1368
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi16(__m64 __m, __m64 __count)
Left-shifts each 16-bit signed integer element of the first parameter, which is a 64-bit integer vect...
Definition: mmintrin.h:730
static __inline__ vector unsigned char __ATTRS_o_ai vec_sl(vector unsigned char __a, vector unsigned char __b)
Definition: altivec.h:8029
#define _m_psradi
Definition: mmintrin.h:1539
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_si64(__m64 __m, int __count)
Right-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the s...
Definition: mmintrin.h:1062
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
Definition: mmintrin.h:1225
#define _m_paddw
Definition: mmintrin.h:1514
res as_int[0]
Definition: mmintrin.h:292
__vector unsigned char xform1
Definition: emmintrin.h:1405
__asm__("vmuleuw %0,%1,%2" :"=v"(result) :"v"(__A), "v"(__B) :)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit integer element of the second 64-bit integer vector of [8 x i8] from the corresp...
Definition: mmintrin.h:502
static __inline__ vector signed char __ATTRS_o_ai vec_subs(vector signed char __a, vector signed char __b)
Definition: altivec.h:11173
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:999
#define _m_psubsw
Definition: mmintrin.h:1524
#define _m_packssdw
Definition: mmintrin.h:1505
#define _m_pmullw
Definition: mmintrin.h:1529
return() __m64((__vector long long) c)[0]
#define _m_pslld
Definition: mmintrin.h:1532
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi32(int __i)
Constructs a 64-bit integer vector of [2 x i32], with each of the 32-bit integer vector elements set ...
Definition: mmintrin.h:1378
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit signed integer element of the second 64-bit integer vector of [8 x i8] from the ...
Definition: mmintrin.h:567
#define _m_paddsw
Definition: mmintrin.h:1517
#define _m_psubusb
Definition: mmintrin.h:1525
static __inline__ vector bool char __ATTRS_o_ai vec_cmpeq(vector signed char __a, vector signed char __b)
Definition: altivec.h:1625
#define _m_psubd
Definition: mmintrin.h:1522
static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a)
Definition: altivec.h:13651
static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector signed char __a, vector signed char __b)
Definition: altivec.h:10904
static __inline__ vector signed char __ATTRS_o_ai vec_sra(vector signed char __a, vector unsigned char __b)
Definition: altivec.h:9574
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi8(__m64 __m1, __m64 __m2)
Adds each 8-bit integer element of the first 64-bit integer vector of [8 x i8] to the corresponding 8...
Definition: mmintrin.h:350
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a, __m64 __b)
Subtracts signed or unsigned 64-bit integer values and writes the difference to the corresponding bit...
Definition: emmintrin.h:2625
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:908
__inline __m64 char __b6
Definition: mmintrin.h:1350
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
Definition: mmintrin.h:976
__inline __m64 short short __w1
Definition: mmintrin.h:1337
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
Definition: mmintrin.h:1269
static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtm64_si64(__m64 __m)
Casts a 64-bit integer vector into a 64-bit signed integer value.
Definition: mmintrin.h:97
__inline __m64 char char __b5
Definition: mmintrin.h:1350
#define _m_por
Definition: mmintrin.h:1548
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu8(__m64 __m1, __m64 __m2)
Adds each 8-bit unsigned integer element of the first 64-bit integer vector of [8 x i8] to the corres...
Definition: mmintrin.h:459
#define _m_psrawi
Definition: mmintrin.h:1537
vm1
Definition: xmmintrin.h:1086
static __inline__ vector float vector float __b
Definition: altivec.h:520
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_and_si64(__m64 __m1, __m64 __m2)
Performs a bitwise AND of two 64-bit integer vectors.
Definition: mmintrin.h:1080
__inline __m64 short short short __w0
Definition: mmintrin.h:1337
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
Definition: mmintrin.h:285
#define _m_paddd
Definition: mmintrin.h:1515
r
Definition: emmintrin.h:563
__inline __m64 char char char __b4
Definition: mmintrin.h:1350
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit signed integer element of the second 64-bit integer vector of [4 x i16] from th...
Definition: mmintrin.h:590
#define _m_to_int64
Definition: mmintrin.h:1503
#define _m_pxor
Definition: mmintrin.h:1549
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_si64(__m64 __m, int __count)
Left-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the se...
Definition: mmintrin.h:837
__inline __m64 char char char char char __b2
Definition: mmintrin.h:1350
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mullo_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition: mmintrin.h:707
#define _m_pmulhw
Definition: mmintrin.h:1528
#define _m_from_int64
Definition: mmintrin.h:1501
w1
Definition: emmintrin.h:1416
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi32(__m64 __m, int __count)
Left-shifts each 32-bit signed integer element of a 64-bit integer vector of [2 x i32] by the number ...
Definition: mmintrin.h:797
#define _m_pslldi
Definition: mmintrin.h:1533
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_madd_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition: mmintrin.h:665
#define as_float(x)
Definition: opencl-c.h:6647
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit unsigned integer element of the second 64-bit integer vector of [4 x i16] from ...
Definition: mmintrin.h:638
#define _m_psubsb
Definition: mmintrin.h:1523
#define _m_punpcklwd
Definition: mmintrin.h:1511
#define _m_pcmpgtw
Definition: mmintrin.h:1554
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit integer element of the second 64-bit integer vector of [4 x i16] from the corre...
Definition: mmintrin.h:523
static __inline__ vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a, vector signed char __b)
Definition: altivec.h:1908
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi32(int __i1, int __i0)
Constructs a 64-bit integer vector initialized with the specified 32-bit integer values.
Definition: mmintrin.h:1303
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi32(__m64 __m1, __m64 __m2)
Subtracts each 32-bit integer element of the second 64-bit integer vector of [2 x i32] from the corre...
Definition: mmintrin.h:544
#define _m_from_int
Definition: mmintrin.h:1500
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi32(int __i0, int __i1)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 32-bit integer va...
Definition: mmintrin.h:1436
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi32_si64(int __i)
Constructs a 64-bit integer vector, setting the lower 32 bits to the value of the 32-bit integer para...
Definition: mmintrin.h:48
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
Definition: mmintrin.h:1159
#define _m_psraw
Definition: mmintrin.h:1536
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
Definition: mmintrin.h:931
res as_short[0]
Definition: mmintrin.h:269
long long __m64 __attribute__((__vector_size__(8), __aligned__(8)))
Definition: mmintrin.h:13
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_si64(__m64 __m, __m64 __count)
Right-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit ...
Definition: mmintrin.h:1041
#define _m_pandn
Definition: mmintrin.h:1547
#define _m_psrldi
Definition: mmintrin.h:1543
#define _m_psrlwi
Definition: mmintrin.h:1541
#define _m_psrad
Definition: mmintrin.h:1538
__inline __m64 __m64 __m2
Definition: mmintrin.h:226
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi64_si32(__m64 __m)
Returns the lower 32 bits of a 64-bit integer vector as a 32-bit signed integer.
Definition: mmintrin.h:65
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:954
res as_char[0]
Definition: mmintrin.h:240
#define _m_punpckhdq
Definition: mmintrin.h:1509
static __inline__ vector signed char __ATTRS_o_ai vec_pack(vector signed short __a, vector signed short __b)
Definition: altivec.h:6688
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
Definition: mmintrin.h:1021
#define _m_psllw
Definition: mmintrin.h:1530
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
Definition: mmintrin.h:686
#define _m_pcmpeqd
Definition: mmintrin.h:1552
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi16(__m64 __m1, __m64 __m2)
Adds each 16-bit integer element of the first 64-bit integer vector of [4 x i16] to the corresponding...
Definition: mmintrin.h:371
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 8-bit integer val...
Definition: mmintrin.h:1490
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_andnot_si64(__m64 __m1, __m64 __m2)
Performs a bitwise NOT of the first 64-bit integer vector, and then performs a bitwise AND of the int...
Definition: mmintrin.h:1101
#define _m_paddusb
Definition: mmintrin.h:1518
__inline __m64 short __w2
Definition: mmintrin.h:1337
#define _m_paddb
Definition: mmintrin.h:1513
static __inline__ vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, vector unsigned char __b)
Definition: altivec.h:9484
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
Definition: mmintrin.h:1181
__inline __m64 short short short __w3
Definition: mmintrin.h:1378
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi16(__m64 __m, int __count)
Left-shifts each 16-bit signed integer element of a 64-bit integer vector of [4 x i16] by the number ...
Definition: mmintrin.h:752
#define _m_psllqi
Definition: mmintrin.h:1535
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Constructs a 64-bit integer vector initialized with the specified 8-bit integer values.
Definition: mmintrin.h:1357
#define _m_packuswb
Definition: mmintrin.h:1506
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi32(__m64 __m1, __m64 __m2)
Converts 32-bit signed integers from both 64-bit integer vector parameters of [2 x i32] into 16-bit s...
Definition: mmintrin.h:157
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 16-bit integer va...
Definition: mmintrin.h:1459
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi64_m64(long long __i)
Casts a 64-bit signed integer value into a 64-bit integer vector.
Definition: mmintrin.h:81
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a, __m64 __b)
Adds two signed or unsigned 64-bit integer values, returning the lower 64 bits of the sum...
Definition: emmintrin.h:2179
a
Definition: emmintrin.h:320
#define _m_psrlq
Definition: mmintrin.h:1544
#define _m_pcmpeqb
Definition: mmintrin.h:1550
__inline __m64 char char char char __b3
Definition: mmintrin.h:1350
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
Definition: mmintrin.h:884
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
Definition: mmintrin.h:861
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi32(__m64 __m, __m64 __count)
Left-shifts each 32-bit signed integer element of the first parameter, which is a 64-bit integer vect...
Definition: mmintrin.h:775
#define _m_psubusw
Definition: mmintrin.h:1526
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi32(__m64 __m1, __m64 __m2)
Adds each 32-bit integer element of the first 64-bit integer vector of [2 x i32] to the corresponding...
Definition: mmintrin.h:392
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_si64(__m64 __m, __m64 __count)
Left-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit i...
Definition: mmintrin.h:817
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_or_si64(__m64 __m1, __m64 __m2)
Performs a bitwise OR of two 64-bit integer vectors.
Definition: mmintrin.h:1119
res[0]
Definition: emmintrin.h:1141