clang  16.0.0git
tmmintrin.h
Go to the documentation of this file.
1 /*===---- tmmintrin.h - Implementation of SSSE3 intrinsics on PowerPC ------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 /* Implemented from the specification included in the Intel C++ Compiler
11  User Guide and Reference, version 9.0. */
12 
13 #ifndef NO_WARN_X86_INTRINSICS
14 /* This header is distributed to simplify porting x86_64 code that
15  makes explicit use of Intel intrinsics to powerpc64le.
16 
17  It is the user's responsibility to determine if the results are
18  acceptable and make additional changes as necessary.
19 
20  Note that much code that uses Intel intrinsics can be rewritten in
21  standard C or GNU C extensions, which are more portable and better
22  optimized across multiple targets. */
23 #endif
24 
25 #ifndef TMMINTRIN_H_
26 #define TMMINTRIN_H_
27 
28 #if defined(__ppc64__) && \
29  (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
30 
31 #include <altivec.h>
32 
33 /* We need definitions from the SSE header files. */
34 #include <pmmintrin.h>
35 
36 extern __inline __m128i
37  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
38  _mm_abs_epi16(__m128i __A) {
39  return (__m128i)vec_abs((__v8hi)__A);
40 }
41 
42 extern __inline __m128i
43  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
44  _mm_abs_epi32(__m128i __A) {
45  return (__m128i)vec_abs((__v4si)__A);
46 }
47 
48 extern __inline __m128i
49  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
50  _mm_abs_epi8(__m128i __A) {
51  return (__m128i)vec_abs((__v16qi)__A);
52 }
53 
54 extern __inline __m64
55  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
56  _mm_abs_pi16(__m64 __A) {
57  __v8hi __B = (__v8hi)(__v2du){__A, __A};
58  return (__m64)((__v2du)vec_abs(__B))[0];
59 }
60 
61 extern __inline __m64
62  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
63  _mm_abs_pi32(__m64 __A) {
64  __v4si __B = (__v4si)(__v2du){__A, __A};
65  return (__m64)((__v2du)vec_abs(__B))[0];
66 }
67 
68 extern __inline __m64
69  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
70  _mm_abs_pi8(__m64 __A) {
71  __v16qi __B = (__v16qi)(__v2du){__A, __A};
72  return (__m64)((__v2du)vec_abs(__B))[0];
73 }
74 
75 extern __inline __m128i
76  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
77  _mm_alignr_epi8(__m128i __A, __m128i __B, const unsigned int __count) {
78  if (__builtin_constant_p(__count) && __count < 16) {
79 #ifdef __LITTLE_ENDIAN__
80  __A = (__m128i)vec_reve((__v16qu)__A);
81  __B = (__m128i)vec_reve((__v16qu)__B);
82 #endif
83  __A = (__m128i)vec_sld((__v16qu)__B, (__v16qu)__A, __count);
84 #ifdef __LITTLE_ENDIAN__
85  __A = (__m128i)vec_reve((__v16qu)__A);
86 #endif
87  return __A;
88  }
89 
90  if (__count == 0)
91  return __B;
92 
93  if (__count >= 16) {
94  if (__count >= 32) {
95  const __v16qu __zero = {0};
96  return (__m128i)__zero;
97  } else {
98  const __v16qu __shift = vec_splats((unsigned char)((__count - 16) * 8));
99 #ifdef __LITTLE_ENDIAN__
100  return (__m128i)vec_sro((__v16qu)__A, __shift);
101 #else
102  return (__m128i)vec_slo((__v16qu)__A, __shift);
103 #endif
104  }
105  } else {
106  const __v16qu __shiftA = vec_splats((unsigned char)((16 - __count) * 8));
107  const __v16qu __shiftB = vec_splats((unsigned char)(__count * 8));
108 #ifdef __LITTLE_ENDIAN__
109  __A = (__m128i)vec_slo((__v16qu)__A, __shiftA);
110  __B = (__m128i)vec_sro((__v16qu)__B, __shiftB);
111 #else
112  __A = (__m128i)vec_sro((__v16qu)__A, __shiftA);
113  __B = (__m128i)vec_slo((__v16qu)__B, __shiftB);
114 #endif
115  return (__m128i)vec_or((__v16qu)__A, (__v16qu)__B);
116  }
117 }
118 
119 extern __inline __m64
120  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
121  _mm_alignr_pi8(__m64 __A, __m64 __B, unsigned int __count) {
122  if (__count < 16) {
123  __v2du __C = {__B, __A};
124 #ifdef __LITTLE_ENDIAN__
125  const __v4su __shift = {__count << 3, 0, 0, 0};
126  __C = (__v2du)vec_sro((__v16qu)__C, (__v16qu)__shift);
127 #else
128  const __v4su __shift = {0, 0, 0, __count << 3};
129  __C = (__v2du)vec_slo((__v16qu)__C, (__v16qu)__shift);
130 #endif
131  return (__m64)__C[0];
132  } else {
133  const __m64 __zero = {0};
134  return __zero;
135  }
136 }
137 
138 extern __inline __m128i
139  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
140  _mm_hadd_epi16(__m128i __A, __m128i __B) {
141  const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13,
142  16, 17, 20, 21, 24, 25, 28, 29};
143  const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15,
144  18, 19, 22, 23, 26, 27, 30, 31};
145  __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);
146  __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
147  return (__m128i)vec_add(__C, __D);
148 }
149 
150 extern __inline __m128i
151  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
152  _mm_hadd_epi32(__m128i __A, __m128i __B) {
153  const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11,
154  16, 17, 18, 19, 24, 25, 26, 27};
155  const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15,
156  20, 21, 22, 23, 28, 29, 30, 31};
157  __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P);
158  __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q);
159  return (__m128i)vec_add(__C, __D);
160 }
161 
162 extern __inline __m64
163  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
164  _mm_hadd_pi16(__m64 __A, __m64 __B) {
165  __v8hi __C = (__v8hi)(__v2du){__A, __B};
166  const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
167  const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
168  __v8hi __D = vec_perm(__C, __C, __Q);
169  __C = vec_perm(__C, __C, __P);
170  __C = vec_add(__C, __D);
171  return (__m64)((__v2du)__C)[1];
172 }
173 
174 extern __inline __m64
175  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
176  _mm_hadd_pi32(__m64 __A, __m64 __B) {
177  __v4si __C = (__v4si)(__v2du){__A, __B};
178  const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};
179  const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};
180  __v4si __D = vec_perm(__C, __C, __Q);
181  __C = vec_perm(__C, __C, __P);
182  __C = vec_add(__C, __D);
183  return (__m64)((__v2du)__C)[1];
184 }
185 
186 extern __inline __m128i
187  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
188  _mm_hadds_epi16(__m128i __A, __m128i __B) {
189  __v4si __C = {0}, __D = {0};
190  __C = vec_sum4s((__v8hi)__A, __C);
191  __D = vec_sum4s((__v8hi)__B, __D);
192  __C = (__v4si)vec_packs(__C, __D);
193  return (__m128i)__C;
194 }
195 
196 extern __inline __m64
197  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
198  _mm_hadds_pi16(__m64 __A, __m64 __B) {
199  const __v4si __zero = {0};
200  __v8hi __C = (__v8hi)(__v2du){__A, __B};
201  __v4si __D = vec_sum4s(__C, __zero);
202  __C = vec_packs(__D, __D);
203  return (__m64)((__v2du)__C)[1];
204 }
205 
206 extern __inline __m128i
207  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
208  _mm_hsub_epi16(__m128i __A, __m128i __B) {
209  const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13,
210  16, 17, 20, 21, 24, 25, 28, 29};
211  const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15,
212  18, 19, 22, 23, 26, 27, 30, 31};
213  __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);
214  __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
215  return (__m128i)vec_sub(__C, __D);
216 }
217 
218 extern __inline __m128i
219  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
220  _mm_hsub_epi32(__m128i __A, __m128i __B) {
221  const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11,
222  16, 17, 18, 19, 24, 25, 26, 27};
223  const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15,
224  20, 21, 22, 23, 28, 29, 30, 31};
225  __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P);
226  __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q);
227  return (__m128i)vec_sub(__C, __D);
228 }
229 
230 extern __inline __m64
231  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
232  _mm_hsub_pi16(__m64 __A, __m64 __B) {
233  const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
234  const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
235  __v8hi __C = (__v8hi)(__v2du){__A, __B};
236  __v8hi __D = vec_perm(__C, __C, __Q);
237  __C = vec_perm(__C, __C, __P);
238  __C = vec_sub(__C, __D);
239  return (__m64)((__v2du)__C)[1];
240 }
241 
242 extern __inline __m64
243  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
244  _mm_hsub_pi32(__m64 __A, __m64 __B) {
245  const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};
246  const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};
247  __v4si __C = (__v4si)(__v2du){__A, __B};
248  __v4si __D = vec_perm(__C, __C, __Q);
249  __C = vec_perm(__C, __C, __P);
250  __C = vec_sub(__C, __D);
251  return (__m64)((__v2du)__C)[1];
252 }
253 
254 extern __inline __m128i
255  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
256  _mm_hsubs_epi16(__m128i __A, __m128i __B) {
257  const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13,
258  16, 17, 20, 21, 24, 25, 28, 29};
259  const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15,
260  18, 19, 22, 23, 26, 27, 30, 31};
261  __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);
262  __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
263  return (__m128i)vec_subs(__C, __D);
264 }
265 
266 extern __inline __m64
267  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
268  _mm_hsubs_pi16(__m64 __A, __m64 __B) {
269  const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
270  const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
271  __v8hi __C = (__v8hi)(__v2du){__A, __B};
272  __v8hi __D = vec_perm(__C, __C, __P);
273  __v8hi __E = vec_perm(__C, __C, __Q);
274  __C = vec_subs(__D, __E);
275  return (__m64)((__v2du)__C)[1];
276 }
277 
278 extern __inline __m128i
279  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
280  _mm_shuffle_epi8(__m128i __A, __m128i __B) {
281  const __v16qi __zero = {0};
282  __vector __bool char __select = vec_cmplt((__v16qi)__B, __zero);
283  __v16qi __C = vec_perm((__v16qi)__A, (__v16qi)__A, (__v16qu)__B);
284  return (__m128i)vec_sel(__C, __zero, __select);
285 }
286 
287 extern __inline __m64
288  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
289  _mm_shuffle_pi8(__m64 __A, __m64 __B) {
290  const __v16qi __zero = {0};
291  __v16qi __C = (__v16qi)(__v2du){__A, __A};
292  __v16qi __D = (__v16qi)(__v2du){__B, __B};
293  __vector __bool char __select = vec_cmplt((__v16qi)__D, __zero);
294  __C = vec_perm((__v16qi)__C, (__v16qi)__C, (__v16qu)__D);
295  __C = vec_sel(__C, __zero, __select);
296  return (__m64)((__v2du)(__C))[0];
297 }
298 
299 #ifdef _ARCH_PWR8
300 extern __inline __m128i
301  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
302  _mm_sign_epi8(__m128i __A, __m128i __B) {
303  const __v16qi __zero = {0};
304  __v16qi __selectneg = (__v16qi)vec_cmplt((__v16qi)__B, __zero);
305  __v16qi __selectpos =
306  (__v16qi)vec_neg((__v16qi)vec_cmpgt((__v16qi)__B, __zero));
307  __v16qi __conv = vec_add(__selectneg, __selectpos);
308  return (__m128i)vec_mul((__v16qi)__A, (__v16qi)__conv);
309 }
310 #endif
311 
312 #ifdef _ARCH_PWR8
313 extern __inline __m128i
314  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
315  _mm_sign_epi16(__m128i __A, __m128i __B) {
316  const __v8hi __zero = {0};
317  __v8hi __selectneg = (__v8hi)vec_cmplt((__v8hi)__B, __zero);
318  __v8hi __selectpos = (__v8hi)vec_neg((__v8hi)vec_cmpgt((__v8hi)__B, __zero));
319  __v8hi __conv = vec_add(__selectneg, __selectpos);
320  return (__m128i)vec_mul((__v8hi)__A, (__v8hi)__conv);
321 }
322 #endif
323 
324 #ifdef _ARCH_PWR8
325 extern __inline __m128i
326  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
327  _mm_sign_epi32(__m128i __A, __m128i __B) {
328  const __v4si __zero = {0};
329  __v4si __selectneg = (__v4si)vec_cmplt((__v4si)__B, __zero);
330  __v4si __selectpos = (__v4si)vec_neg((__v4si)vec_cmpgt((__v4si)__B, __zero));
331  __v4si __conv = vec_add(__selectneg, __selectpos);
332  return (__m128i)vec_mul((__v4si)__A, (__v4si)__conv);
333 }
334 #endif
335 
336 #ifdef _ARCH_PWR8
337 extern __inline __m64
338  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
339  _mm_sign_pi8(__m64 __A, __m64 __B) {
340  const __v16qi __zero = {0};
341  __v16qi __C = (__v16qi)(__v2du){__A, __A};
342  __v16qi __D = (__v16qi)(__v2du){__B, __B};
343  __C = (__v16qi)_mm_sign_epi8((__m128i)__C, (__m128i)__D);
344  return (__m64)((__v2du)(__C))[0];
345 }
346 #endif
347 
348 #ifdef _ARCH_PWR8
349 extern __inline __m64
350  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
351  _mm_sign_pi16(__m64 __A, __m64 __B) {
352  const __v8hi __zero = {0};
353  __v8hi __C = (__v8hi)(__v2du){__A, __A};
354  __v8hi __D = (__v8hi)(__v2du){__B, __B};
355  __C = (__v8hi)_mm_sign_epi16((__m128i)__C, (__m128i)__D);
356  return (__m64)((__v2du)(__C))[0];
357 }
358 #endif
359 
360 #ifdef _ARCH_PWR8
361 extern __inline __m64
362  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
363  _mm_sign_pi32(__m64 __A, __m64 __B) {
364  const __v4si __zero = {0};
365  __v4si __C = (__v4si)(__v2du){__A, __A};
366  __v4si __D = (__v4si)(__v2du){__B, __B};
367  __C = (__v4si)_mm_sign_epi32((__m128i)__C, (__m128i)__D);
368  return (__m64)((__v2du)(__C))[0];
369 }
370 #endif
371 
372 extern __inline __m128i
373  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
374  _mm_maddubs_epi16(__m128i __A, __m128i __B) {
375  __v8hi __unsigned = vec_splats((signed short)0x00ff);
376  __v8hi __C = vec_and(vec_unpackh((__v16qi)__A), __unsigned);
377  __v8hi __D = vec_and(vec_unpackl((__v16qi)__A), __unsigned);
378  __v8hi __E = vec_unpackh((__v16qi)__B);
379  __v8hi __F = vec_unpackl((__v16qi)__B);
380  __C = vec_mul(__C, __E);
381  __D = vec_mul(__D, __F);
382  const __v16qu __odds = {0, 1, 4, 5, 8, 9, 12, 13,
383  16, 17, 20, 21, 24, 25, 28, 29};
384  const __v16qu __evens = {2, 3, 6, 7, 10, 11, 14, 15,
385  18, 19, 22, 23, 26, 27, 30, 31};
386  __E = vec_perm(__C, __D, __odds);
387  __F = vec_perm(__C, __D, __evens);
388  return (__m128i)vec_adds(__E, __F);
389 }
390 
391 extern __inline __m64
392  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
393  _mm_maddubs_pi16(__m64 __A, __m64 __B) {
394  __v8hi __C = (__v8hi)(__v2du){__A, __A};
395  __C = vec_unpackl((__v16qi)__C);
396  const __v8hi __unsigned = vec_splats((signed short)0x00ff);
397  __C = vec_and(__C, __unsigned);
398  __v8hi __D = (__v8hi)(__v2du){__B, __B};
399  __D = vec_unpackl((__v16qi)__D);
400  __D = vec_mul(__C, __D);
401  const __v16qu __odds = {0, 1, 4, 5, 8, 9, 12, 13,
402  16, 17, 20, 21, 24, 25, 28, 29};
403  const __v16qu __evens = {2, 3, 6, 7, 10, 11, 14, 15,
404  18, 19, 22, 23, 26, 27, 30, 31};
405  __C = vec_perm(__D, __D, __odds);
406  __D = vec_perm(__D, __D, __evens);
407  __C = vec_adds(__C, __D);
408  return (__m64)((__v2du)(__C))[0];
409 }
410 
411 extern __inline __m128i
412  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
413  _mm_mulhrs_epi16(__m128i __A, __m128i __B) {
414  __v4si __C = vec_unpackh((__v8hi)__A);
415  __v4si __D = vec_unpackh((__v8hi)__B);
416  __C = vec_mul(__C, __D);
417  __D = vec_unpackl((__v8hi)__A);
418  __v4si __E = vec_unpackl((__v8hi)__B);
419  __D = vec_mul(__D, __E);
420  const __v4su __shift = vec_splats((unsigned int)14);
421  __C = vec_sr(__C, __shift);
422  __D = vec_sr(__D, __shift);
423  const __v4si __ones = vec_splats((signed int)1);
424  __C = vec_add(__C, __ones);
425  __C = vec_sr(__C, (__v4su)__ones);
426  __D = vec_add(__D, __ones);
427  __D = vec_sr(__D, (__v4su)__ones);
428  return (__m128i)vec_pack(__C, __D);
429 }
430 
431 extern __inline __m64
432  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
433  _mm_mulhrs_pi16(__m64 __A, __m64 __B) {
434  __v4si __C = (__v4si)(__v2du){__A, __A};
435  __C = vec_unpackh((__v8hi)__C);
436  __v4si __D = (__v4si)(__v2du){__B, __B};
437  __D = vec_unpackh((__v8hi)__D);
438  __C = vec_mul(__C, __D);
439  const __v4su __shift = vec_splats((unsigned int)14);
440  __C = vec_sr(__C, __shift);
441  const __v4si __ones = vec_splats((signed int)1);
442  __C = vec_add(__C, __ones);
443  __C = vec_sr(__C, (__v4su)__ones);
444  __v8hi __E = vec_pack(__C, __D);
445  return (__m64)((__v2du)(__E))[0];
446 }
447 
448 #else
449 #include_next <tmmintrin.h>
450 #endif /* defined(__ppc64__) &&
451  * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */
452 
453 #endif /* TMMINTRIN_H_ */
_mm_hadd_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
Definition: tmmintrin.h:195
vec_unpackl
static __inline__ vector short __ATTRS_o_ai vec_unpackl(vector signed char __a)
Definition: altivec.h:12769
_mm_abs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:90
vec_and
static __inline__ vector signed char __ATTRS_o_ai vec_and(vector signed char __a, vector signed char __b)
Definition: altivec.h:882
__conv
#define __conv
Definition: opencl-c.h:36
_mm_sign_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi16(__m64 __a, __m64 __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:741
vec_sro
static __inline__ vector signed char __ATTRS_o_ai vec_sro(vector signed char __a, vector signed char __b)
Definition: altivec.h:10967
_mm_maddubs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:490
_mm_abs_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:126
_mm_mulhrs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:540
vec_add
static __inline__ vector signed char __ATTRS_o_ai vec_add(vector signed char __a, vector signed char __b)
Definition: altivec.h:200
vec_adds
static __inline__ vector signed char __ATTRS_o_ai vec_adds(vector signed char __a, vector signed char __b)
Definition: altivec.h:626
vec_mul
static __inline__ vector signed char __ATTRS_o_ai vec_mul(vector signed char __a, vector signed char __b)
Definition: altivec.h:6193
_mm_abs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi16(__m64 __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:72
__P
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25
_mm_hsubs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsubs_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition: tmmintrin.h:456
_mm_sign_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi32(__m128i __a, __m128i __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:689
_mm_sign_epi8
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi8(__m128i __a, __m128i __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:637
_mm_hadd_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi32(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32].
Definition: tmmintrin.h:218
vec_sld
static __inline__ vector signed int __ATTRS_o_ai vec_sld(vector signed int, vector signed int, unsigned const int __c)
Definition: altivec.h:9137
vec_or
static __inline__ vector signed char __ATTRS_o_ai vec_or(vector signed char __a, vector signed char __b)
Definition: altivec.h:6853
vec_cmplt
static __inline__ vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a, vector signed char __b)
Definition: altivec.h:2435
_mm_hsub_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition: tmmintrin.h:383
_mm_alignr_epi8
#define _mm_alignr_epi8(a, b, n)
Concatenates the two 128-bit integer vector operands, and right-shifts the result by the number of by...
Definition: tmmintrin.h:151
vec_slo
static __inline__ vector signed char __ATTRS_o_ai vec_slo(vector signed char __a, vector signed char __b)
Definition: altivec.h:9872
_mm_hadds_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadds_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
Definition: tmmintrin.h:314
_mm_abs_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi32(__m64 __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:108
_mm_mulhrs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mulhrs_pi16(__m64 __a, __m64 __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:560
_mm_hadds_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadds_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
Definition: tmmintrin.h:289
altivec.h
__D
static __inline__ void short __D
Definition: immintrin.h:382
vec_splats
static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a)
Definition: altivec.h:14706
vec_subs
static __inline__ vector signed char __ATTRS_o_ai vec_subs(vector signed char __a, vector signed char __b)
Definition: altivec.h:12137
_mm_abs_pi8
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi8(__m64 __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:36
_mm_hsub_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
Definition: tmmintrin.h:337
vec_perm
static __inline__ vector signed char __ATTRS_o_ai vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c)
Definition: altivec.h:7950
vec_pack
static __inline__ vector signed char __ATTRS_o_ai vec_pack(vector signed short __a, vector signed short __b)
Definition: altivec.h:7377
_mm_alignr_pi8
#define _mm_alignr_pi8(a, b, n)
Concatenates the two 64-bit integer vector operands, and right-shifts the result by the number of byt...
Definition: tmmintrin.h:174
vec_sub
static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector signed char __a, vector signed char __b)
Definition: altivec.h:11857
vec_neg
static vector float __ATTRS_o_ai vec_neg(vector float __a)
Definition: altivec.h:18204
_mm_hsub_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi32(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32]...
Definition: tmmintrin.h:406
vec_unpackh
static __inline__ vector short __ATTRS_o_ai vec_unpackh(vector signed char __a)
Definition: altivec.h:12630
vec_abs
static __inline__ vector signed char __ATTRS_o_ai vec_abs(vector signed char __a)
Definition: altivec.h:117
_mm_sign_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi32(__m64 __a, __m64 __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:767
__attribute__
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
Definition: __clang_hip_libdevice_declares.h:311
_mm_hadd_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
Definition: tmmintrin.h:241
_mm_hadd_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi32(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32].
Definition: tmmintrin.h:264
_mm_hsubs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubs_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
Definition: tmmintrin.h:431
vec_cmpgt
static __inline__ vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a, vector signed char __b)
Definition: altivec.h:2131
_mm_shuffle_epi8
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
Definition: tmmintrin.h:586
_mm_sign_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi16(__m128i __a, __m128i __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:663
vec_sum4s
static __inline__ vector int __ATTRS_o_ai vec_sum4s(vector signed char __a, vector int __b)
Definition: altivec.h:12475
_mm_maddubs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_maddubs_pi16(__m64 __a, __m64 __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:520
_mm_abs_epi8
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:54
_mm_sign_pi8
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi8(__m64 __a, __m64 __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:715
vec_sel
static __inline__ vector signed char __ATTRS_o_ai vec_sel(vector signed char __a, vector signed char __b, vector unsigned char __c)
Definition: altivec.h:8576
vec_reve
static __ATTRS_o_ai vector bool char vec_reve(vector bool char __a)
Definition: altivec.h:17497
vec_packs
static __inline__ vector signed char __ATTRS_o_ai vec_packs(vector short __a, vector short __b)
Definition: altivec.h:7703
_mm_hsub_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi32(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32...
Definition: tmmintrin.h:360
vec_sr
static __inline__ vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, vector unsigned char __b)
Definition: altivec.h:10381
_mm_shuffle_pi8
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_shuffle_pi8(__m64 __a, __m64 __b)
Copies the 8-bit integers from a 64-bit integer vector to the destination or clears 8-bit values in t...
Definition: tmmintrin.h:611