13#ifndef NO_WARN_X86_INTRINSICS
33 "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
39#if defined(__powerpc64__) && \
40 (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
48typedef __vector
double __v2df;
49typedef __vector
float __v4f;
50typedef __vector
long long __v2di;
51typedef __vector
unsigned long long __v2du;
52typedef __vector
int __v4si;
53typedef __vector
unsigned int __v4su;
54typedef __vector
short __v8hi;
55typedef __vector
unsigned short __v8hu;
56typedef __vector
signed char __v16qi;
57typedef __vector
unsigned char __v16qu;
61typedef long long __m128i
__attribute__((__vector_size__(16), __may_alias__));
62typedef double __m128d
__attribute__((__vector_size__(16), __may_alias__));
65typedef long long __m128i_u
66 __attribute__((__vector_size__(16), __may_alias__, __aligned__(1)));
67typedef double __m128d_u
68 __attribute__((__vector_size__(16), __may_alias__, __aligned__(1)));
71#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
74extern __inline __m128d
75 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
77 return __extension__(__m128d){__F, 0.0};
81extern __inline __m128d
82 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84 return __extension__(__m128d){__F, __F};
87extern __inline __m128d
88 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
94extern __inline __m128d
95 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
97 return __extension__(__m128d){__X, __W};
101extern __inline __m128d
102 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
104 return __extension__(__m128d){__W, __X};
108extern __inline __m128d
109 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
116extern __inline __m128d
117 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
123extern __inline __m128d
124 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
126 __v2df __result = (__v2df)__A;
127 __result[0] = ((__v2df)__B)[0];
128 return (__m128d)__result;
132extern __inline __m128d
133 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
135 return ((__m128d)
vec_ld(0, (__v16qu *)
__P));
139extern __inline __m128d
140 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
142 return (vec_vsx_ld(0,
__P));
146extern __inline __m128d
147 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
153extern __inline __m128d
154 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
159extern __inline __m128d
160 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
166extern __inline __m128d
167 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
170 return (__m128d)vec_xxpermdi(__tmp, __tmp, 2);
175 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
182 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
184 *(__m128d_u *)
__P = __A;
189 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
191 *
__P = ((__v2df)__A)[0];
194extern __inline
double
195 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
197 return ((__v2df)__A)[0];
201 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
208 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
210 *
__P = ((__v2df)__A)[1];
215 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
221 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
228 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
234extern __inline
long long
235 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
237 return ((__v2di)__A)[0];
241extern __inline
long long
242 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
243 _mm_cvtsi128_si64x(__m128i __A) {
244 return ((__v2di)__A)[0];
247extern __inline __m128d
248 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
250 return (__m128d)((__v2df)__A + (__v2df)__B);
256extern __inline __m128d
257 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
259 __A[0] = __A[0] + __B[0];
263extern __inline __m128d
264 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
266 return (__m128d)((__v2df)__A - (__v2df)__B);
269extern __inline __m128d
270 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
272 __A[0] = __A[0] - __B[0];
276extern __inline __m128d
277 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
279 return (__m128d)((__v2df)__A * (__v2df)__B);
282extern __inline __m128d
283 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
285 __A[0] = __A[0] * __B[0];
289extern __inline __m128d
290 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
292 return (__m128d)((__v2df)__A / (__v2df)__B);
295extern __inline __m128d
296 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
298 __A[0] = __A[0] / __B[0];
302extern __inline __m128d
303 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
305 return (vec_sqrt(__A));
309extern __inline __m128d
310 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
317extern __inline __m128d
318 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
323extern __inline __m128d
324 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
333extern __inline __m128d
334 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
339extern __inline __m128d
340 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
349extern __inline __m128d
350 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
352 return ((__m128d)
vec_cmpeq((__v2df)__A, (__v2df)__B));
355extern __inline __m128d
356 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
358 return ((__m128d)
vec_cmplt((__v2df)__A, (__v2df)__B));
361extern __inline __m128d
362 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
364 return ((__m128d)
vec_cmple((__v2df)__A, (__v2df)__B));
367extern __inline __m128d
368 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
370 return ((__m128d)
vec_cmpgt((__v2df)__A, (__v2df)__B));
373extern __inline __m128d
374 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
376 return ((__m128d)
vec_cmpge((__v2df)__A, (__v2df)__B));
379extern __inline __m128d
380 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
382 __v2df __temp = (__v2df)
vec_cmpeq((__v2df)__A, (__v2df)__B);
383 return ((__m128d)
vec_nor(__temp, __temp));
386extern __inline __m128d
387 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
389 return ((__m128d)
vec_cmpge((__v2df)__A, (__v2df)__B));
392extern __inline __m128d
393 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
395 return ((__m128d)
vec_cmpgt((__v2df)__A, (__v2df)__B));
398extern __inline __m128d
399 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
401 return ((__m128d)
vec_cmple((__v2df)__A, (__v2df)__B));
404extern __inline __m128d
405 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
407 return ((__m128d)
vec_cmplt((__v2df)__A, (__v2df)__B));
410extern __inline __m128d
411 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
421extern __inline __m128d
422 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
428 __d = (__v2du)
vec_cmpeq((__v2df)__B, (__v2df)__B);
432 return ((__m128d)vec_orc(
__c, __d));
437 __d = (__v2du)
vec_cmpeq((__v2df)__B, (__v2df)__B);
445extern __inline __m128d
446 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
461extern __inline __m128d
462 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
471extern __inline __m128d
472 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
481extern __inline __m128d
482 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
491extern __inline __m128d
492 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
501extern __inline __m128d
502 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
512extern __inline __m128d
513 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
523extern __inline __m128d
524 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
534extern __inline __m128d
535 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
545extern __inline __m128d
546 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
556extern __inline __m128d
557 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
561 return (__m128d)
_mm_setr_pd(__r[0], ((__v2df)__A)[1]);
564extern __inline __m128d
565 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
580 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
582 return (__A[0] == __B[0]);
586 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
588 return (__A[0] < __B[0]);
592 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
594 return (__A[0] <= __B[0]);
598 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
600 return (__A[0] > __B[0]);
604 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
606 return (__A[0] >= __B[0]);
610 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
612 return (__A[0] != __B[0]);
616 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
618 return (__A[0] == __B[0]);
622 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
624 return (__A[0] < __B[0]);
628 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
630 return (__A[0] <= __B[0]);
634 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
636 return (__A[0] > __B[0]);
640 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
642 return (__A[0] >= __B[0]);
646 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
648 return (__A[0] != __B[0]);
652extern __inline __m128i
653 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
655 return __extension__(__m128i)(__v2di){__q0, __q1};
658extern __inline __m128i
659 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
664extern __inline __m128i
665 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
667 return __extension__(__m128i)(__v4si){__q0, __q1, __q2, __q3};
670extern __inline __m128i
671 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
672 _mm_set_epi16(
short __q7,
short __q6,
short __q5,
short __q4,
short __q3,
673 short __q2,
short __q1,
short __q0) {
674 return __extension__(__m128i)(__v8hi){__q0, __q1, __q2, __q3,
675 __q4, __q5, __q6, __q7};
678extern __inline __m128i
679 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
680 _mm_set_epi8(
char __q15,
char __q14,
char __q13,
char __q12,
char __q11,
681 char __q10,
char __q09,
char __q08,
char __q07,
char __q06,
682 char __q05,
char __q04,
char __q03,
char __q02,
char __q01,
684 return __extension__(__m128i)(__v16qi){
685 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
686 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15};
690extern __inline __m128i
691 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
696extern __inline __m128i
697 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
702extern __inline __m128i
703 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
708extern __inline __m128i
709 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
711 return _mm_set_epi16(__A, __A, __A, __A, __A, __A, __A, __A);
714extern __inline __m128i
715 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
717 return _mm_set_epi8(__A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A,
718 __A, __A, __A, __A, __A);
723extern __inline __m128i
724 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
729extern __inline __m128i
730 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
735extern __inline __m128i
736 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
737 _mm_setr_epi16(
short __q0,
short __q1,
short __q2,
short __q3,
short __q4,
738 short __q5,
short __q6,
short __q7) {
739 return _mm_set_epi16(__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
742extern __inline __m128i
743 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
744 _mm_setr_epi8(
char __q00,
char __q01,
char __q02,
char __q03,
char __q04,
745 char __q05,
char __q06,
char __q07,
char __q08,
char __q09,
746 char __q10,
char __q11,
char __q12,
char __q13,
char __q14,
748 return _mm_set_epi8(__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
749 __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
753extern __inline __m128i
754 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
759extern __inline __m128i
760 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
762 return (__m128i)(vec_vsx_ld(0, (
signed int const *)
__P));
765extern __inline __m128i
766 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
772 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
778 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
784 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
786 *(
long long *)
__P = ((__v2di)__B)[0];
790 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
792 return (__m64)((__v2di)__B)[0];
795extern __inline __m128i
796 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
801extern __inline __m128i
802 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
808extern __inline __m128i
809 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
816extern __inline __m128i
817 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
819 return __extension__(__m128i)(__v4si){0, 0, 0, 0};
823extern __inline __m128d
824 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
831 return (__m128d)
vec_ctf(__val, 0);
835extern __inline __m128
836 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
838 return ((__m128)
vec_ctf((__v4si)__A, 0));
841extern __inline __m128i
842 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
844 __v2df __rounded = vec_rint(__A);
845 __v4si __result, __temp;
846 const __v4si __vzero = {0, 0, 0, 0};
850 __asm__(
"xvcvdpsxws %x0,%x1" :
"=wa"(__temp) :
"wa"(__rounded) :);
853#ifdef __LITTLE_ENDIAN__
854 __temp = vec_mergeo(__temp, __temp);
856 __temp = vec_mergee(__temp, __temp);
858 __result = (__v4si)vec_vpkudum((__vector
long long)__temp,
859 (__vector
long long)__vzero);
862 const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
863 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f};
864 __result = (__v4si)
vec_perm((__v16qu)__temp, (__v16qu)__vzero, __pkperm);
867 return (__m128i)__result;
871 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
875 return (__m64)__result[0];
878extern __inline __m128
879 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
883 const __v4si __vzero = {0, 0, 0, 0};
885 __asm__(
"xvcvdpsp %x0,%x1" :
"=wa"(__temp) :
"wa"(__A) :);
888#ifdef __LITTLE_ENDIAN__
889 __temp = vec_mergeo(__temp, __temp);
891 __temp = vec_mergee(__temp, __temp);
893 __result = (__v4sf)vec_vpkudum((__vector
long long)__temp,
894 (__vector
long long)__vzero);
897 const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
898 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f};
899 __result = (__v4sf)
vec_perm((__v16qu)__temp, (__v16qu)__vzero, __pkperm);
902 return ((__m128)__result);
905extern __inline __m128i
906 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
910 const __v4si __vzero = {0, 0, 0, 0};
914 __asm__(
"xvcvdpsxws %x0,%x1" :
"=wa"(__temp) :
"wa"(__A) :);
917#ifdef __LITTLE_ENDIAN__
918 __temp = vec_mergeo(__temp, __temp);
920 __temp = vec_mergee(__temp, __temp);
922 __result = (__v4si)vec_vpkudum((__vector
long long)__temp,
923 (__vector
long long)__vzero);
926 const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
927 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f};
928 __result = (__v4si)
vec_perm((__v16qu)__temp, (__v16qu)__vzero, __pkperm);
932 return ((__m128i)__result);
936 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
940 return (__m64)__result[0];
944 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
946 return ((__v4si)__A)[0];
950extern __inline __m128d
951 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
959 __result =
vec_ctf((__vector
signed long long)__tmp2, 0);
960 return (__m128d)__result;
964extern __inline __m128i
965 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
970 __rounded = vec_rint((__v4sf)__A);
971 __result =
vec_cts(__rounded, 0);
972 return (__m128i)__result;
975extern __inline __m128i
976 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
980 __result =
vec_cts((__v4sf)__A, 0);
981 return (__m128i)__result;
984extern __inline __m128d
985 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
989 return (__m128d)vec_doubleh((__v4sf)__A);
993 __v4sf
__a = (__v4sf)__A;
996#ifdef __LITTLE_ENDIAN__
1001 __temp = __builtin_vsx_xxsldwi(
__a,
__a, 3);
1002 __temp = __builtin_vsx_xxsldwi(
__a, __temp, 2);
1010 __asm__(
" xvcvspdp %x0,%x1" :
"=wa"(__result) :
"wa"(__temp) :);
1011 return (__m128d)__result;
1016 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1018 __v2df __rounded = vec_rint((__v2df)__A);
1019 int __result = ((__v2df)__rounded)[0];
1024extern __inline
long long
1025 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1026 _mm_cvtsd_si64(__m128d __A) {
1027 __v2df __rounded = vec_rint((__v2df)__A);
1028 long long __result = ((__v2df)__rounded)[0];
1034extern __inline
long long
1035 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1036 _mm_cvtsd_si64x(__m128d __A) {
1037 return _mm_cvtsd_si64((__v2df)__A);
1041 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1043 int __result = ((__v2df)__A)[0];
1049extern __inline
long long
1050 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1051 _mm_cvttsd_si64(__m128d __A) {
1052 long long __result = ((__v2df)__A)[0];
1058extern __inline
long long
1059 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1060 _mm_cvttsd_si64x(__m128d __A) {
1061 return _mm_cvttsd_si64(__A);
1064extern __inline __m128
1065 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1067 __v4sf __result = (__v4sf)__A;
1069#ifdef __LITTLE_ENDIAN__
1072 __v2df __temp_b =
vec_splat((__v2df)__B, 0);
1075 __result = __builtin_vsx_xxsldwi(__result, __result, 3);
1077 __asm__(
"xscvdpsp %x0,%x1" :
"=wa"(__temp_s) :
"wa"(__temp_b) :);
1079 __result = __builtin_vsx_xxsldwi(__result, __temp_s, 1);
1081 __result[0] = ((__v2df)__B)[0];
1083 return (__m128)__result;
1086extern __inline __m128d
1087 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1089 __v2df __result = (__v2df)__A;
1092 return (__m128d)__result;
1096extern __inline __m128d
1097 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1098 _mm_cvtsi64_sd(__m128d __A,
long long __B) {
1099 __v2df __result = (__v2df)__A;
1102 return (__m128d)__result;
1106extern __inline __m128d
1107 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1108 _mm_cvtsi64x_sd(__m128d __A,
long long __B) {
1109 return _mm_cvtsi64_sd(__A, __B);
1112extern __inline __m128d
1113 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1115#ifdef __LITTLE_ENDIAN__
1117 __v4sf __temp =
vec_splat((__v4sf)__B, 0);
1120 __asm__(
"xscvspdp %x0,%x1" :
"=wa"(__res) :
"wa"(__temp) :);
1121 return (__m128d)
vec_mergel(__res, (__v2df)__A);
1123 __v2df __res = (__v2df)__A;
1124 __res[0] = ((__v4sf)__B)[0];
1125 return (__m128d)__res;
1129extern __inline __m128d
1130 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1132 __vector
double __result;
1133 const int __litmsk = __mask & 0x3;
1138 else if (__litmsk == 1)
1139 __result = vec_xxpermdi(__B, __A, 2);
1140 else if (__litmsk == 2)
1141 __result = vec_xxpermdi(__B, __A, 1);
1143 else if (__litmsk == 1)
1144 __result = vec_xxpermdi(__A, __B, 2);
1145 else if (__litmsk == 2)
1146 __result = vec_xxpermdi(__A, __B, 1);
1154extern __inline __m128d
1155 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1157 return (__m128d)
vec_mergel((__v2df)__A, (__v2df)__B);
1160extern __inline __m128d
1161 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1163 return (__m128d)
vec_mergeh((__v2df)__A, (__v2df)__B);
1166extern __inline __m128d
1167 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1169 __v2df __result = (__v2df)__A;
1171 return (__m128d)__result;
1174extern __inline __m128d
1175 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1177 __v2df __result = (__v2df)__A;
1179 return (__m128d)__result;
1187 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1190 return vec_extractm((__v2du)__A);
1192 __vector
unsigned long long __result;
1193 static const __vector
unsigned int __perm_mask = {
1194#ifdef __LITTLE_ENDIAN__
1195 0x80800040, 0x80808080, 0x80808080, 0x80808080
1197 0x80808080, 0x80808080, 0x80808080, 0x80804000
1201 __result = ((__vector
unsigned long long)vec_vbpermq(
1202 (__vector
unsigned char)__A, (__vector
unsigned char)__perm_mask));
1204#ifdef __LITTLE_ENDIAN__
1213extern __inline __m128i
1214 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1216 return (__m128i)
vec_packs((__v8hi)__A, (__v8hi)__B);
1219extern __inline __m128i
1220 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1222 return (__m128i)
vec_packs((__v4si)__A, (__v4si)__B);
1225extern __inline __m128i
1226 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1228 return (__m128i)
vec_packsu((__v8hi)__A, (__v8hi)__B);
1231extern __inline __m128i
1232 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1234 return (__m128i)
vec_mergel((__v16qu)__A, (__v16qu)__B);
1237extern __inline __m128i
1238 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1240 return (__m128i)
vec_mergel((__v8hu)__A, (__v8hu)__B);
1243extern __inline __m128i
1244 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1246 return (__m128i)
vec_mergel((__v4su)__A, (__v4su)__B);
1249extern __inline __m128i
1250 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1252 return (__m128i)
vec_mergel((__vector
long long)__A, (__vector
long long)__B);
1255extern __inline __m128i
1256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1258 return (__m128i)
vec_mergeh((__v16qu)__A, (__v16qu)__B);
1261extern __inline __m128i
1262 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1264 return (__m128i)
vec_mergeh((__v8hi)__A, (__v8hi)__B);
1267extern __inline __m128i
1268 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1270 return (__m128i)
vec_mergeh((__v4si)__A, (__v4si)__B);
1273extern __inline __m128i
1274 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1276 return (__m128i)
vec_mergeh((__vector
long long)__A, (__vector
long long)__B);
1279extern __inline __m128i
1280 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1282 return (__m128i)((__v16qu)__A + (__v16qu)__B);
1285extern __inline __m128i
1286 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1288 return (__m128i)((__v8hu)__A + (__v8hu)__B);
1291extern __inline __m128i
1292 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1294 return (__m128i)((__v4su)__A + (__v4su)__B);
1297extern __inline __m128i
1298 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1300 return (__m128i)((__v2du)__A + (__v2du)__B);
1303extern __inline __m128i
1304 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1306 return (__m128i)
vec_adds((__v16qi)__A, (__v16qi)__B);
1309extern __inline __m128i
1310 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1312 return (__m128i)
vec_adds((__v8hi)__A, (__v8hi)__B);
1315extern __inline __m128i
1316 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1318 return (__m128i)
vec_adds((__v16qu)__A, (__v16qu)__B);
1321extern __inline __m128i
1322 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1324 return (__m128i)
vec_adds((__v8hu)__A, (__v8hu)__B);
1327extern __inline __m128i
1328 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1330 return (__m128i)((__v16qu)__A - (__v16qu)__B);
1333extern __inline __m128i
1334 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1336 return (__m128i)((__v8hu)__A - (__v8hu)__B);
1339extern __inline __m128i
1340 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1342 return (__m128i)((__v4su)__A - (__v4su)__B);
1345extern __inline __m128i
1346 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1348 return (__m128i)((__v2du)__A - (__v2du)__B);
1351extern __inline __m128i
1352 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1354 return (__m128i)
vec_subs((__v16qi)__A, (__v16qi)__B);
1357extern __inline __m128i
1358 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1360 return (__m128i)
vec_subs((__v8hi)__A, (__v8hi)__B);
1363extern __inline __m128i
1364 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1366 return (__m128i)
vec_subs((__v16qu)__A, (__v16qu)__B);
1369extern __inline __m128i
1370 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1372 return (__m128i)
vec_subs((__v8hu)__A, (__v8hu)__B);
1375extern __inline __m128i
1376 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1378 __vector
signed int __zero = {0, 0, 0, 0};
1380 return (__m128i)vec_vmsumshm((__v8hi)__A, (__v8hi)__B, __zero);
1383extern __inline __m128i
1384 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1386 __vector
signed int __w0, __w1;
1388 __vector
unsigned char __xform1 = {
1389#ifdef __LITTLE_ENDIAN__
1390 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A,
1391 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
1393 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x08,
1394 0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D
1398 __w0 = vec_vmulesh((__v8hi)__A, (__v8hi)__B);
1399 __w1 = vec_vmulosh((__v8hi)__A, (__v8hi)__B);
1400 return (__m128i)
vec_perm(__w0, __w1, __xform1);
1403extern __inline __m128i
1404 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1406 return (__m128i)((__v8hi)__A * (__v8hi)__B);
1409extern __inline __m64
1410 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1412 unsigned int __a = __A;
1413 unsigned int __b = __B;
1415 return ((__m64)
__a * (__m64)
__b);
1419extern __inline __m128i
1420 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1425#ifdef __LITTLE_ENDIAN__
1427 __asm__(
"vmulouw %0,%1,%2" :
"=v"(__result) :
"v"(__A),
"v"(__B) :);
1430 __asm__(
"vmuleuw %0,%1,%2" :
"=v"(__result) :
"v"(__A),
"v"(__B) :);
1432 return (__m128i)__result;
1434 return (__m128i)
vec_mule((__v4su)__A, (__v4su)__B);
1439extern __inline __m128i
1440 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1443 __v8hi __result = {0, 0, 0, 0, 0, 0, 0, 0};
1445 if (__B >= 0 && __B < 16) {
1446 if (__builtin_constant_p(__B))
1451 __result =
vec_sl((__v8hi)__A, __lshift);
1454 return (__m128i)__result;
1457extern __inline __m128i
1458 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1461 __v4si __result = {0, 0, 0, 0};
1463 if (__B >= 0 && __B < 32) {
1464 if (__builtin_constant_p(__B) && __B < 16)
1469 __result =
vec_sl((__v4si)__A, __lshift);
1472 return (__m128i)__result;
1476extern __inline __m128i
1477 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1480 __v2di __result = {0, 0};
1482 if (__B >= 0 && __B < 64) {
1483 if (__builtin_constant_p(__B) && __B < 16)
1486 __lshift = (__v2du)
vec_splats((
unsigned int)__B);
1488 __result =
vec_sl((__v2di)__A, __lshift);
1491 return (__m128i)__result;
1495extern __inline __m128i
1496 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1498 __v8hu __rshift = {15, 15, 15, 15, 15, 15, 15, 15};
1502 if (__builtin_constant_p(__B))
1507 __result =
vec_sra((__v8hi)__A, __rshift);
1509 return (__m128i)__result;
1512extern __inline __m128i
1513 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1515 __v4su __rshift = {31, 31, 31, 31};
1519 if (__builtin_constant_p(__B)) {
1523 __rshift = (__v4su)
vec_splats((
unsigned int)__B);
1527 __result =
vec_sra((__v4si)__A, __rshift);
1529 return (__m128i)__result;
1532extern __inline __m128i
1533 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1536 const __v16qu __zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1539 __result =
vec_sld((__v16qu)__A, __zeros, __N);
1543 return (__m128i)__result;
1546extern __inline __m128i
1547 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1550 const __v16qu __zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1553#ifdef __LITTLE_ENDIAN__
1554 if (__builtin_constant_p(__N))
1558 __result =
vec_sld(__zeros, (__v16qu)__A, (16 - __N));
1562 __v16qu __shift =
vec_splats((
unsigned char)(__N * 8));
1563#ifdef __LITTLE_ENDIAN__
1564 __result =
vec_sro((__v16qu)__A, __shift);
1566 __result =
vec_slo((__v16qu)__A, __shift);
1572 return (__m128i)__result;
1575extern __inline __m128i
1576 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1581extern __inline __m128i
1582 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1585 const __v16qu __zeros = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1588#ifdef __LITTLE_ENDIAN__
1589 __result =
vec_sld((__v16qu)__A, __zeros, _imm5);
1591 __result =
vec_sld(__zeros, (__v16qu)__A, (16 - _imm5));
1596 return (__m128i)__result;
1599extern __inline __m128i
1600 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1604 __v8hi __result = {0, 0, 0, 0, 0, 0, 0, 0};
1607 if (__builtin_constant_p(__B))
1612 __result =
vec_sr((__v8hi)__A, __rshift);
1615 return (__m128i)__result;
1618extern __inline __m128i
1619 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1622 __v4si __result = {0, 0, 0, 0};
1625 if (__builtin_constant_p(__B)) {
1629 __rshift = (__v4su)
vec_splats((
unsigned int)__B);
1633 __result =
vec_sr((__v4si)__A, __rshift);
1636 return (__m128i)__result;
1640extern __inline __m128i
1641 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1644 __v2di __result = {0, 0};
1647 if (__builtin_constant_p(__B)) {
1651 __rshift = (__v2du)
vec_splats((
unsigned long long)__B);
1653 __rshift = (__v2du)
vec_splats((
unsigned int)__B);
1655 __result =
vec_sr((__v2di)__A, __rshift);
1658 return (__m128i)__result;
1662extern __inline __m128i
1663 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1666 __vector __bool
short __shmask;
1667 const __v8hu __shmax = {15, 15, 15, 15, 15, 15, 15, 15};
1670#ifdef __LITTLE_ENDIAN__
1675 __shmask =
vec_cmple(__lshift, __shmax);
1676 __result =
vec_sl((__v8hu)__A, __lshift);
1677 __result =
vec_sel((__v8hu)__shmask, __result, __shmask);
1679 return (__m128i)__result;
1682extern __inline __m128i
1683 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1686 __vector __bool
int __shmask;
1687 const __v4su __shmax = {32, 32, 32, 32};
1689#ifdef __LITTLE_ENDIAN__
1694 __shmask =
vec_cmplt(__lshift, __shmax);
1695 __result =
vec_sl((__v4su)__A, __lshift);
1696 __result =
vec_sel((__v4su)__shmask, __result, __shmask);
1698 return (__m128i)__result;
1702extern __inline __m128i
1703 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1706 __vector __bool
long long __shmask;
1707 const __v2du __shmax = {64, 64};
1711 __shmask =
vec_cmplt(__lshift, __shmax);
1712 __result =
vec_sl((__v2du)__A, __lshift);
1713 __result =
vec_sel((__v2du)__shmask, __result, __shmask);
1715 return (__m128i)__result;
1719extern __inline __m128i
1720 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1722 const __v8hu __rshmax = {15, 15, 15, 15, 15, 15, 15, 15};
1726#ifdef __LITTLE_ENDIAN__
1731 __rshift =
vec_min(__rshift, __rshmax);
1732 __result =
vec_sra((__v8hi)__A, __rshift);
1734 return (__m128i)__result;
1737extern __inline __m128i
1738 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1740 const __v4su __rshmax = {31, 31, 31, 31};
1744#ifdef __LITTLE_ENDIAN__
1749 __rshift =
vec_min(__rshift, __rshmax);
1750 __result =
vec_sra((__v4si)__A, __rshift);
1752 return (__m128i)__result;
1755extern __inline __m128i
1756 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1759 __vector __bool
short __shmask;
1760 const __v8hu __shmax = {15, 15, 15, 15, 15, 15, 15, 15};
1763#ifdef __LITTLE_ENDIAN__
1768 __shmask =
vec_cmple(__rshift, __shmax);
1769 __result =
vec_sr((__v8hu)__A, __rshift);
1770 __result =
vec_sel((__v8hu)__shmask, __result, __shmask);
1772 return (__m128i)__result;
1775extern __inline __m128i
1776 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1779 __vector __bool
int __shmask;
1780 const __v4su __shmax = {32, 32, 32, 32};
1783#ifdef __LITTLE_ENDIAN__
1788 __shmask =
vec_cmplt(__rshift, __shmax);
1789 __result =
vec_sr((__v4su)__A, __rshift);
1790 __result =
vec_sel((__v4su)__shmask, __result, __shmask);
1792 return (__m128i)__result;
1796extern __inline __m128i
1797 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1800 __vector __bool
long long __shmask;
1801 const __v2du __shmax = {64, 64};
1805 __shmask =
vec_cmplt(__rshift, __shmax);
1806 __result =
vec_sr((__v2du)__A, __rshift);
1807 __result =
vec_sel((__v2du)__shmask, __result, __shmask);
1809 return (__m128i)__result;
1813extern __inline __m128d
1814 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1816 return (
vec_and((__v2df)__A, (__v2df)__B));
1819extern __inline __m128d
1820 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1822 return (
vec_andc((__v2df)__B, (__v2df)__A));
1825extern __inline __m128d
1826 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1828 return (
vec_or((__v2df)__A, (__v2df)__B));
1831extern __inline __m128d
1832 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1834 return (
vec_xor((__v2df)__A, (__v2df)__B));
1837extern __inline __m128i
1838 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1840 return (__m128i)
vec_and((__v2di)__A, (__v2di)__B);
1843extern __inline __m128i
1844 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1846 return (__m128i)
vec_andc((__v2di)__B, (__v2di)__A);
1849extern __inline __m128i
1850 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1852 return (__m128i)
vec_or((__v2di)__A, (__v2di)__B);
1855extern __inline __m128i
1856 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1858 return (__m128i)
vec_xor((__v2di)__A, (__v2di)__B);
1861extern __inline __m128i
1862 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1864 return (__m128i)
vec_cmpeq((__v16qi)__A, (__v16qi)__B);
1867extern __inline __m128i
1868 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1870 return (__m128i)
vec_cmpeq((__v8hi)__A, (__v8hi)__B);
1873extern __inline __m128i
1874 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1876 return (__m128i)
vec_cmpeq((__v4si)__A, (__v4si)__B);
1879extern __inline __m128i
1880 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1882 return (__m128i)
vec_cmplt((__v16qi)__A, (__v16qi)__B);
1885extern __inline __m128i
1886 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1888 return (__m128i)
vec_cmplt((__v8hi)__A, (__v8hi)__B);
1891extern __inline __m128i
1892 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1894 return (__m128i)
vec_cmplt((__v4si)__A, (__v4si)__B);
1897extern __inline __m128i
1898 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1900 return (__m128i)
vec_cmpgt((__v16qi)__A, (__v16qi)__B);
1903extern __inline __m128i
1904 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1906 return (__m128i)
vec_cmpgt((__v8hi)__A, (__v8hi)__B);
1909extern __inline __m128i
1910 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1912 return (__m128i)
vec_cmpgt((__v4si)__A, (__v4si)__B);
1916 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1918 return (
unsigned short)((__v8hi)__A)[__N & 7];
1921extern __inline __m128i
1922 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1924 __v8hi __result = (__v8hi)__A;
1926 __result[(__N & 7)] =
__D;
1928 return (__m128i)__result;
1931extern __inline __m128i
1932 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1934 return (__m128i)
vec_max((__v8hi)__A, (__v8hi)__B);
1937extern __inline __m128i
1938 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1940 return (__m128i)
vec_max((__v16qu)__A, (__v16qu)__B);
1943extern __inline __m128i
1944 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1946 return (__m128i)
vec_min((__v8hi)__A, (__v8hi)__B);
1949extern __inline __m128i
1950 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1952 return (__m128i)
vec_min((__v16qu)__A, (__v16qu)__B);
1961 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1964 return vec_extractm((__v16qu)__A);
1966 __vector
unsigned long long __result;
1967 static const __vector
unsigned char __perm_mask = {
1968 0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40,
1969 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00};
1971 __result = ((__vector
unsigned long long)vec_vbpermq(
1972 (__vector
unsigned char)__A, (__vector
unsigned char)__perm_mask));
1974#ifdef __LITTLE_ENDIAN__
1983extern __inline __m128i
1984 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1987 __v16qu __xform1 = {
1988#ifdef __LITTLE_ENDIAN__
1989 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A,
1990 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
1992 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x08,
1993 0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D
1997 __w0 = vec_vmuleuh((__v8hu)__A, (__v8hu)__B);
1998 __w1 = vec_vmulouh((__v8hu)__A, (__v8hu)__B);
1999 return (__m128i)
vec_perm(__w0, __w1, __xform1);
2002extern __inline __m128i
2003 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2005 unsigned long __element_selector_98 = __mask & 0x03;
2006 unsigned long __element_selector_BA = (__mask >> 2) & 0x03;
2007 unsigned long __element_selector_DC = (__mask >> 4) & 0x03;
2008 unsigned long __element_selector_FE = (__mask >> 6) & 0x03;
2009 static const unsigned short __permute_selectors[4] = {
2010#ifdef __LITTLE_ENDIAN__
2011 0x0908, 0x0B0A, 0x0D0C, 0x0F0E
2013 0x0809, 0x0A0B, 0x0C0D, 0x0E0F
2017#ifdef __LITTLE_ENDIAN__
2018 {0x1716151413121110UL, 0UL};
2020 {0x1011121314151617UL, 0UL};
2025 __t.as_short[0] = __permute_selectors[__element_selector_98];
2026 __t.as_short[1] = __permute_selectors[__element_selector_BA];
2027 __t.as_short[2] = __permute_selectors[__element_selector_DC];
2028 __t.as_short[3] = __permute_selectors[__element_selector_FE];
2029 __pmask[1] = __t.as_m64;
2032 return (__m128i)__r;
2035extern __inline __m128i
2036 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2038 unsigned long __element_selector_10 = __mask & 0x03;
2039 unsigned long __element_selector_32 = (__mask >> 2) & 0x03;
2040 unsigned long __element_selector_54 = (__mask >> 4) & 0x03;
2041 unsigned long __element_selector_76 = (__mask >> 6) & 0x03;
2042 static const unsigned short __permute_selectors[4] = {
2043#ifdef __LITTLE_ENDIAN__
2044 0x0100, 0x0302, 0x0504, 0x0706
2046 0x0001, 0x0203, 0x0405, 0x0607
2050#ifdef __LITTLE_ENDIAN__
2051 {0UL, 0x1f1e1d1c1b1a1918UL};
2053 {0UL, 0x18191a1b1c1d1e1fUL};
2057 __t.as_short[0] = __permute_selectors[__element_selector_10];
2058 __t.as_short[1] = __permute_selectors[__element_selector_32];
2059 __t.as_short[2] = __permute_selectors[__element_selector_54];
2060 __t.as_short[3] = __permute_selectors[__element_selector_76];
2061 __pmask[0] = __t.as_m64;
2064 return (__m128i)__r;
2067extern __inline __m128i
2068 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2070 unsigned long __element_selector_10 = __mask & 0x03;
2071 unsigned long __element_selector_32 = (__mask >> 2) & 0x03;
2072 unsigned long __element_selector_54 = (__mask >> 4) & 0x03;
2073 unsigned long __element_selector_76 = (__mask >> 6) & 0x03;
2074 static const unsigned int __permute_selectors[4] = {
2075#ifdef __LITTLE_ENDIAN__
2076 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
2078 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F
2083 __t[0] = __permute_selectors[__element_selector_10];
2084 __t[1] = __permute_selectors[__element_selector_32];
2085 __t[2] = __permute_selectors[__element_selector_54] + 0x10101010;
2086 __t[3] = __permute_selectors[__element_selector_76] + 0x10101010;
2087 return (__m128i)
vec_perm((__v4si)__A, (__v4si)__A,
2088 (__vector
unsigned char)__t);
2092 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2094 __v2du __hibit = {0x7f7f7f7f7f7f7f7fUL, 0x7f7f7f7f7f7f7f7fUL};
2095 __v16qu __mask, __tmp;
2096 __m128i_u *
__p = (__m128i_u *)__C;
2099 __mask = (__v16qu)
vec_cmpgt((__v16qu)__B, (__v16qu)__hibit);
2100 __tmp =
vec_sel(__tmp, (__v16qu)__A, __mask);
2104extern __inline __m128i
2105 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2107 return (__m128i)
vec_avg((__v16qu)__A, (__v16qu)__B);
2110extern __inline __m128i
2111 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2113 return (__m128i)
vec_avg((__v8hu)__A, (__v8hu)__B);
2116extern __inline __m128i
2117 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2122 const __v4su __zero = {0, 0, 0, 0};
2130 __vabsdiff =
vec_sub(__vmax, __vmin);
2132 __vabsdiff = vec_absd(
__a,
__b);
2135 __vsum = (__vector
signed int)
vec_sum4s(__vabsdiff, __zero);
2136#ifdef __LITTLE_ENDIAN__
2138 __asm__(
"vsum2sws %0,%1,%2" :
"=v"(__result) :
"v"(__vsum),
"v"(__zero));
2147 __result = vec_sum2s(__vsum, (__vector
signed int)__zero);
2149 __result =
vec_sld(__result, __result, 6);
2151 return (__m128i)__result;
2155 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2156 _mm_stream_si32(
int *__A,
int __B) {
2158 __asm__(
"dcbtstt 0,%0" : :
"b"(__A) :
"memory");
2163 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2164 _mm_stream_si64(
long long int *__A,
long long int __B) {
2166 __asm__(
" dcbtstt 0,%0" : :
"b"(__A) :
"memory");
2171 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2174 __asm__(
"dcbtstt 0,%0" : :
"b"(__A) :
"memory");
2179 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2182 __asm__(
"dcbtstt 0,%0" : :
"b"(__A) :
"memory");
2183 *(__m128d *)__A = __B;
2187 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2190 __asm__(
"dcbf 0,%0" : :
"b"(__A) :
"memory");
2194 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2197 __atomic_thread_fence(__ATOMIC_RELEASE);
2201 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2204 __atomic_thread_fence(__ATOMIC_SEQ_CST);
2207extern __inline __m128i
2208 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2213extern __inline __m128i
2214 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2216 return __extension__(__m128i)(__v2di){__A, 0LL};
2220extern __inline __m128i
2221 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2222 _mm_cvtsi64x_si128(
long long __A) {
2223 return __extension__(__m128i)(__v2di){__A, 0LL};
2228extern __inline __m128
2229 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2234extern __inline __m128i
2235 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2237 return (__m128i)__A;
2240extern __inline __m128d
2241 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2243 return (__m128d)__A;
2246extern __inline __m128i
2247 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2249 return (__m128i)__A;
2252extern __inline __m128
2253 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2258extern __inline __m128d
2259 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2261 return (__m128d)__A;
2265#include_next <emmintrin.h>
static __inline__ vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, vector unsigned char __b)
static __inline__ vector bool char __ATTRS_o_ai vec_cmpeq(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_sra(vector signed char __a, vector unsigned char __b)
static __inline__ vector int __ATTRS_o_ai vec_vmrghw(vector int __a, vector int __b)
static __inline__ vector signed char __ATTRS_o_ai vec_sro(vector signed char __a, vector signed char __b)
#define vec_ctf(__a, __b)
static __inline__ vector float vector float vector float __c
static __inline__ vector short __ATTRS_o_ai vec_mule(vector signed char __a, vector signed char __b)
static __inline__ vector float vector float __b
static __inline__ vector signed char __ATTRS_o_ai vec_ld(long __a, const vector signed char *__b)
static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a)
static __inline__ vector signed char __ATTRS_o_ai vec_andc(vector signed char __a, vector signed char __b)
static __inline__ void __ATTRS_o_ai vec_st(vector signed char __a, long __b, vector signed char *__c)
static __inline__ vector signed int __ATTRS_o_ai vec_sld(vector signed int, vector signed int, unsigned const int __c)
static __inline__ vector short __ATTRS_o_ai vec_unpackl(vector signed char __a)
static __inline__ vector int __ATTRS_o_ai vec_sum4s(vector signed char __a, vector int __b)
static __inline__ vector signed char __ATTRS_o_ai vec_and(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_avg(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_mergel(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_subs(vector signed char __a, vector signed char __b)
static __inline__ vector int __ATTRS_o_ai vec_splat_s32(signed char __a)
static __inline__ vector signed char __ATTRS_o_ai vec_adds(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c)
static __inline__ vector signed char __ATTRS_o_ai vec_sel(vector signed char __a, vector signed char __b, vector unsigned char __c)
static __inline__ vector signed char __ATTRS_o_ai vec_mergeh(vector signed char __a, vector signed char __b)
static __inline__ vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_max(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_slo(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_nor(vector signed char __a, vector signed char __b)
static __inline__ vector bool char __ATTRS_o_ai vec_cmpge(vector signed char __a, vector signed char __b)
static __inline__ vector unsigned char __ATTRS_o_ai vec_packsu(vector short __a, vector short __b)
static __inline__ vector signed char __ATTRS_o_ai vec_min(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_splat(vector signed char __a, unsigned const int __b)
static __inline__ vector signed char __ATTRS_o_ai vec_or(vector signed char __a, vector signed char __b)
static __inline__ vector short __ATTRS_o_ai vec_unpackh(vector signed char __a)
static __inline__ vector unsigned char __ATTRS_o_ai vec_sl(vector unsigned char __a, vector unsigned char __b)
static __inline__ vector short __ATTRS_o_ai vec_splat_s16(signed char __a)
static __inline__ vector unsigned char __ATTRS_o_ai vec_xor(vector unsigned char __a, vector unsigned char __b)
static __inline__ vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a, vector signed char __b)
static __inline__ vector bool char __ATTRS_o_ai vec_cmple(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_packs(vector short __a, vector short __b)
static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector signed char __a, vector signed char __b)
static __inline__ uint32_t volatile uint32_t * __p
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, __m64 __q1)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 64-bit integral ...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-3) values from each of the two 128-bit vectors of [8 x i16] and interl...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a)
Moves the 64-bit operand to a 128-bit integer vector, zeroing the upper bits.
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp, __m128d __a)
Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to the upper and lower 64 bits of a...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
Initializes the 16-bit values in a 128-bit vector of [8 x i16] with the specified 16-bit integer valu...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd1(double __w)
Constructs a 128-bit floating-point vector of [2 x double], with each of the two double-precision flo...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtpi32_pd(__m64 __a)
Converts the two signed 32-bit integer elements of a 64-bit vector of [2 x i32] into two double-preci...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, __m128i __b)
Converts, with saturation, 16-bit signed integers from both 128-bit integer vector operands into 8-bi...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the smaller value f...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b)
Performs an element-by-element division of two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b)
Subtracts two 128-bit vectors of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a)
Casts a 128-bit floating-point vector of [2 x double] into a 128-bit integer vector.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, __m128d __b)
Performs a bitwise OR of two 128-bit vectors of [2 x double].
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p, __m128i __a)
Stores a 128-bit integer vector to a 128-bit aligned memory location.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp)
Loads a 128-bit floating-point vector of [2 x double] from an unaligned memory location.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b)
Subtracts the corresponding elements of two [2 x i64] vectors.
static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsi128_si64(__m128i __a)
Moves the least significant 64 bits of a vector of [2 x i64] to a 64-bit signed integer value.
static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
Moves bytes selected by the mask from the first operand to the specified unaligned memory location.
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
Initializes the 32-bit values in a 128-bit vector of [4 x i32] with the specified 32-bit integer valu...
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [16 x i8] vectors,...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp)
Loads a 128-bit floating-point vector of [2 x double] from an aligned memory location.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a)
Converts the lower two integer elements of a 128-bit vector of [4 x i32] into two double-precision fl...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w)
Constructs a 128-bit floating-point vector of [2 x double], with each of the two double-precision flo...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding signed 16-bit values of the 128-bit integer vectors to determine i...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, double __x)
Constructs a 128-bit floating-point vector of [2 x double] initialized with the specified double-prec...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, __m128i __b)
Subtracts the corresponding 16-bit integer values in the operands.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
#define _mm_slli_si128(a, imm)
Left-shifts the 128-bit integer vector operand by the specified number of bytes.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, __m128i __a)
Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to a memory location.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding signed 32-bit values of the 128-bit integer vectors to determine i...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, __m128i __b)
Performs a bitwise OR of two 128-bit integer vectors.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns a vecto...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp)
Loads a 64-bit double-precision value to the low element of a 128-bit integer vector and clears the u...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the two 64-bit integer vecto...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors, using the one's complement of the values conta...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding signed 8-bit values of the 128-bit integer vectors to determine if...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding 16-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the lower 16 bits of ea...
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, int __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, __m128d __b)
Performs a bitwise XOR of two 128-bit vectors of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the greater value f...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b)
Computes the rounded averages of corresponding elements of two 128-bit unsigned [8 x i16] vectors,...
static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a)
Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to the upper and lower 64 bits of a...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a)
Casts a 128-bit integer vector into a 128-bit floating-point vector of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-7) values from two 128-bit vectors of [16 x i8] and interleaves them i...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding signed 16-bit values of the 128-bit integer vectors to determine i...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them i...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 8-bit signed integer values in the input and returns the di...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them into...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a)
Extracts the sign bits of the double-precision values in the 128-bit vector of [2 x double],...
#define _mm_shuffle_pd(a, b, i)
Constructs a 128-bit floating-point vector of [2 x double] from two 128-bit vector parameters of [2 x...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding 32-bit values of the 128-bit integer vectors for equality.
static __inline__ void int __a
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
void _mm_mfence(void)
Forces strong memory ordering (serialization) between load and store instructions preceding this inst...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 16-bit unsigned integer values in the input and returns the...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtpd_pi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a)
Moves the lower 64 bits of a 128-bit integer vector to a 128-bit integer vector, zeroing the upper bi...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them int...
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a)
Converts the low-order element of a [2 x double] vector into a 32-bit signed truncated (rounded towar...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x float].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a)
Returns a vector of [2 x i64] where the lower element is the input operand and the upper element is z...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x i32] and interleaves them i...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding signed 32-bit values of the 128-bit integer vectors to determine i...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a, __m128 __b)
Converts the lower single-precision floating-point element of a 128-bit vector of [4 x float],...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
Calculates the square root of the each of two values stored in a 128-bit vector of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, __m128i __b)
Unpacks the high-order (index 4-7) values from two 128-bit vectors of [8 x i16] and interleaves them ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a)
Casts a 128-bit floating-point vector of [4 x float] into a 128-bit integer vector.
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two 128-bit signed [8 x i16] vectors, producing eight interm...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [8 x i16] vectors,...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a)
Returns a vector of [4 x i32] where the lowest element is the input operand and the remaining element...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
#define _mm_insert_epi16(a, b, imm)
Constructs a 128-bit integer vector by first making a copy of the 128-bit integer vector parameter,...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, __m128d __b)
Converts the lower double-precision floating-point element of a 128-bit vector of [2 x double],...
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a)
Converts the low-order element of a 128-bit vector of [2 x double] into a 32-bit signed integer value...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a, double const *__dp)
Loads a double-precision value into the low-order bits of a 128-bit vector of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [16 x i8] vectors,...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [8 x i16] vectors,...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 16-bit signed integer values in the input and returns the d...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, __m128i __b)
Subtracts the corresponding 8-bit integer values in the operands.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] with unspecified content.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x i32] and interleaves them ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 8-bit integral v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the upper 16 bits of ea...
#define _mm_extract_epi16(a, imm)
Extracts 16 bits from a 128-bit integer vector of [8 x i16], using the immediate-value parameter as a...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
#define _mm_shufflelo_epi16(a, imm)
Constructs a 128-bit integer vector by shuffling four lower 16-bit elements of a 128-bit integer vect...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [2 x i64], saving the lower 64 bits of each...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two unsigned [8 x i16] vectors, saving the upper 16 bits of ...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double], using the one's complement of the valu...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the greater value fro...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a)
Casts a 128-bit floating-point vector of [2 x double] into a 128-bit floating-point vector of [4 x fl...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, __m128i __b)
Converts, with saturation, 16-bit signed integers from both 128-bit integer vector operands into 8-bi...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp)
Loads two double-precision values, in reverse order, from an aligned memory location into a 128-bit v...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a)
Casts a 128-bit floating-point vector of [4 x float] into a 128-bit floating-point vector of [2 x dou...
#define _mm_bsrli_si128(a, imm)
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp, __m128d __a)
Stores the upper 64 bits of a 128-bit vector of [2 x double] to a memory location.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
void _mm_lfence(void)
Forces strong memory ordering (serialization) between load instructions preceding this instruction an...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, __m128i __b)
Computes the absolute differences of corresponding 8-bit integer values in two 128-bit vectors.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, __m128d __a)
Stores the lower 64 bits of a 128-bit vector of [2 x double] to a memory location.
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp, __m128d __a)
Moves packed double-precision values from a 128-bit vector of [2 x double] to a memory location.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [4 x i32], saving the lower 32 bits of each...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, __m128i __b)
Converts, with saturation, 32-bit signed integers from both 128-bit integer vector operands into 16-b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [16 x i8], saving the lower 8 bits of each ...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p, __m128d __a)
Stores a 128-bit floating point vector of [2 x double] to a 128-bit aligned memory location.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding signed 8-bit values of the 128-bit integer vectors to determine if...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a)
Casts a 128-bit integer vector into a 128-bit floating-point vector of [4 x float].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, long long __q0)
Initializes both 64-bit values in a 128-bit vector of [2 x i64] with the specified 64-bit integer val...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, int __b)
Converts a 32-bit signed integer value, in the second parameter, into a double-precision floating-poi...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the corresponding elements o...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a)
Converts a vector of [4 x float] into four signed truncated (rounded toward zero) 32-bit integers,...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 16-bit integral ...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a, double const *__dp)
Loads a double-precision value into the high-order bits of a 128-bit vector of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Initializes the 8-bit values in a 128-bit vector of [16 x i8] with the specified 8-bit integer values...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q)
Initializes both values in a 128-bit vector of [2 x i64] with the specified 64-bit value.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, __m128d __b)
Calculates the square root of the lower double-precision value of the second operand and returns it i...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, double __x)
Constructs a 128-bit floating-point vector of [2 x double], initialized in reverse order with the spe...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, __m64 __q0)
Initializes both 64-bit values in a 128-bit vector of [2 x i64] with the specified 64-bit integer val...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, __m128d __b)
Compares lower 64-bit double-precision values of both operands, and returns the lesser of the pair of...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b)
Adds two 128-bit vectors of [2 x double].
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp, __m128d __a)
Stores the lower 64 bits of a 128-bit vector of [2 x double] to a memory location.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp)
Loads a double-precision floating-point value from a specified memory location and duplicates it to b...
#define _mm_shufflehi_epi16(a, imm)
Constructs a 128-bit integer vector by shuffling four upper 16-bit elements of a 128-bit integer vect...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns a vecto...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the smaller value fro...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a)
Returns the lower 64 bits of a 128-bit integer vector as a 64-bit integer.
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p, __m128i __b)
Stores a 128-bit integer vector to a memory location aligned on a 128-bit boundary.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b)
Multiplies two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] for...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w)
Constructs a 128-bit floating-point vector of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, __m128d __b)
Compares lower 64-bit double-precision values of both operands, and returns the greater of the pair o...
static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a)
Returns the low-order element of a 128-bit vector of [2 x double] as a double-precision floating-poin...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadl_epi64(__m128i_u const *__p)
Returns a vector of [2 x i64] where the lower element is taken from the lower element of the operand,...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w)
Initializes all values in a 128-bit vector of [8 x i16] with the specified 16-bit value.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q)
Initializes both values in a 128-bit integer vector with the specified 64-bit integer value.
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a)
Moves the least significant 32 bits of a vector of [4 x i32] to a 32-bit signed integer value.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b)
Subtracts the corresponding 32-bit integer values in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b)
Computes the rounded averages of corresponding elements of two 128-bit unsigned [16 x i8] vectors,...
static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp, __m128d __a)
Stores two double-precision values, in reverse order, from a 128-bit vector of [2 x double] to a 16-b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b)
Unpacks the high-order (index 8-15) values from two 128-bit vectors of [16 x i8] and interleaves them...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvttpd_pi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, int __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, __m128d __a)
Stores a 128-bit vector of [2 x double] into an unaligned memory location.
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [8 x i16], saving the lower 16 bits of each...
#define _mm_bslli_si128(a, imm)
#define _mm_srli_si128(a, imm)
Right-shifts the 128-bit integer vector operand by the specified number of bytes.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 8-bit unsigned integer values in the input and returns the ...
#define _mm_shuffle_epi32(a, imm)
Constructs a 128-bit integer vector by shuffling four 32-bit elements of a 128-bit integer vector par...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a)
Converts the lower two single-precision floating-point elements of a 128-bit vector of [4 x float] in...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si128(__m128i_u *__p, __m128i __b)
Stores a 128-bit integer vector to an unaligned memory location.
double __m128d __attribute__((__vector_size__(16), __aligned__(16)))
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 32-bit integral ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, __m128i __b)
Performs a bitwise exclusive OR of two 128-bit integer vectors.
void _mm_clflush(void const *__p)
The cache line containing __p is flushed and invalidated from all caches in the coherency domain.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ void short __D
__inline unsigned int unsigned int unsigned int * __P
__inline unsigned int unsigned int __Y