clang  14.0.0git
tmmintrin.h
Go to the documentation of this file.
1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #ifndef __TMMINTRIN_H
11 #define __TMMINTRIN_H
12 
13 #if !defined(__i386__) && !defined(__x86_64__)
14 #error "This header is only meant to be used on x86 and x64 architecture"
15 #endif
16 
17 #include <pmmintrin.h>
18 
19 /* Define the default attributes for the functions in this file. */
20 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
21 #define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
22 
23 /// Computes the absolute value of each of the packed 8-bit signed
24 /// integers in the source operand and stores the 8-bit unsigned integer
25 /// results in the destination.
26 ///
27 /// \headerfile <x86intrin.h>
28 ///
29 /// This intrinsic corresponds to the \c PABSB instruction.
30 ///
31 /// \param __a
32 /// A 64-bit vector of [8 x i8].
33 /// \returns A 64-bit integer vector containing the absolute values of the
34 /// elements in the operand.
35 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
37 {
38  return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
39 }
40 
41 /// Computes the absolute value of each of the packed 8-bit signed
42 /// integers in the source operand and stores the 8-bit unsigned integer
43 /// results in the destination.
44 ///
45 /// \headerfile <x86intrin.h>
46 ///
47 /// This intrinsic corresponds to the \c VPABSB instruction.
48 ///
49 /// \param __a
50 /// A 128-bit vector of [16 x i8].
51 /// \returns A 128-bit integer vector containing the absolute values of the
52 /// elements in the operand.
53 static __inline__ __m128i __DEFAULT_FN_ATTRS
54 _mm_abs_epi8(__m128i __a)
55 {
56  return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
57 }
58 
59 /// Computes the absolute value of each of the packed 16-bit signed
60 /// integers in the source operand and stores the 16-bit unsigned integer
61 /// results in the destination.
62 ///
63 /// \headerfile <x86intrin.h>
64 ///
65 /// This intrinsic corresponds to the \c PABSW instruction.
66 ///
67 /// \param __a
68 /// A 64-bit vector of [4 x i16].
69 /// \returns A 64-bit integer vector containing the absolute values of the
70 /// elements in the operand.
71 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
73 {
74  return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
75 }
76 
77 /// Computes the absolute value of each of the packed 16-bit signed
78 /// integers in the source operand and stores the 16-bit unsigned integer
79 /// results in the destination.
80 ///
81 /// \headerfile <x86intrin.h>
82 ///
83 /// This intrinsic corresponds to the \c VPABSW instruction.
84 ///
85 /// \param __a
86 /// A 128-bit vector of [8 x i16].
87 /// \returns A 128-bit integer vector containing the absolute values of the
88 /// elements in the operand.
89 static __inline__ __m128i __DEFAULT_FN_ATTRS
91 {
92  return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
93 }
94 
95 /// Computes the absolute value of each of the packed 32-bit signed
96 /// integers in the source operand and stores the 32-bit unsigned integer
97 /// results in the destination.
98 ///
99 /// \headerfile <x86intrin.h>
100 ///
101 /// This intrinsic corresponds to the \c PABSD instruction.
102 ///
103 /// \param __a
104 /// A 64-bit vector of [2 x i32].
105 /// \returns A 64-bit integer vector containing the absolute values of the
106 /// elements in the operand.
107 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
109 {
110  return (__m64)__builtin_ia32_pabsd((__v2si)__a);
111 }
112 
113 /// Computes the absolute value of each of the packed 32-bit signed
114 /// integers in the source operand and stores the 32-bit unsigned integer
115 /// results in the destination.
116 ///
117 /// \headerfile <x86intrin.h>
118 ///
119 /// This intrinsic corresponds to the \c VPABSD instruction.
120 ///
121 /// \param __a
122 /// A 128-bit vector of [4 x i32].
123 /// \returns A 128-bit integer vector containing the absolute values of the
124 /// elements in the operand.
125 static __inline__ __m128i __DEFAULT_FN_ATTRS
127 {
128  return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
129 }
130 
131 /// Concatenates the two 128-bit integer vector operands, and
132 /// right-shifts the result by the number of bytes specified in the immediate
133 /// operand.
134 ///
135 /// \headerfile <x86intrin.h>
136 ///
137 /// \code
138 /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
139 /// \endcode
140 ///
141 /// This intrinsic corresponds to the \c PALIGNR instruction.
142 ///
143 /// \param a
144 /// A 128-bit vector of [16 x i8] containing one of the source operands.
145 /// \param b
146 /// A 128-bit vector of [16 x i8] containing one of the source operands.
147 /// \param n
148 /// An immediate operand specifying how many bytes to right-shift the result.
149 /// \returns A 128-bit integer vector containing the concatenated right-shifted
150 /// value.
151 #define _mm_alignr_epi8(a, b, n) \
152  ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
153  (__v16qi)(__m128i)(b), (n)))
154 
155 /// Concatenates the two 64-bit integer vector operands, and right-shifts
156 /// the result by the number of bytes specified in the immediate operand.
157 ///
158 /// \headerfile <x86intrin.h>
159 ///
160 /// \code
161 /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
162 /// \endcode
163 ///
164 /// This intrinsic corresponds to the \c PALIGNR instruction.
165 ///
166 /// \param a
167 /// A 64-bit vector of [8 x i8] containing one of the source operands.
168 /// \param b
169 /// A 64-bit vector of [8 x i8] containing one of the source operands.
170 /// \param n
171 /// An immediate operand specifying how many bytes to right-shift the result.
172 /// \returns A 64-bit integer vector containing the concatenated right-shifted
173 /// value.
174 #define _mm_alignr_pi8(a, b, n) \
175  ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
176 
177 /// Horizontally adds the adjacent pairs of values contained in 2 packed
178 /// 128-bit vectors of [8 x i16].
179 ///
180 /// \headerfile <x86intrin.h>
181 ///
182 /// This intrinsic corresponds to the \c VPHADDW instruction.
183 ///
184 /// \param __a
185 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
186 /// horizontal sums of the values are stored in the lower bits of the
187 /// destination.
188 /// \param __b
189 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
190 /// horizontal sums of the values are stored in the upper bits of the
191 /// destination.
192 /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
193 /// both operands.
194 static __inline__ __m128i __DEFAULT_FN_ATTRS
195 _mm_hadd_epi16(__m128i __a, __m128i __b)
196 {
197  return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
198 }
199 
200 /// Horizontally adds the adjacent pairs of values contained in 2 packed
201 /// 128-bit vectors of [4 x i32].
202 ///
203 /// \headerfile <x86intrin.h>
204 ///
205 /// This intrinsic corresponds to the \c VPHADDD instruction.
206 ///
207 /// \param __a
208 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
209 /// horizontal sums of the values are stored in the lower bits of the
210 /// destination.
211 /// \param __b
212 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
213 /// horizontal sums of the values are stored in the upper bits of the
214 /// destination.
215 /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
216 /// both operands.
217 static __inline__ __m128i __DEFAULT_FN_ATTRS
218 _mm_hadd_epi32(__m128i __a, __m128i __b)
219 {
220  return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
221 }
222 
223 /// Horizontally adds the adjacent pairs of values contained in 2 packed
224 /// 64-bit vectors of [4 x i16].
225 ///
226 /// \headerfile <x86intrin.h>
227 ///
228 /// This intrinsic corresponds to the \c PHADDW instruction.
229 ///
230 /// \param __a
231 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
232 /// horizontal sums of the values are stored in the lower bits of the
233 /// destination.
234 /// \param __b
235 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
236 /// horizontal sums of the values are stored in the upper bits of the
237 /// destination.
238 /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
239 /// operands.
240 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
241 _mm_hadd_pi16(__m64 __a, __m64 __b)
242 {
243  return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
244 }
245 
246 /// Horizontally adds the adjacent pairs of values contained in 2 packed
247 /// 64-bit vectors of [2 x i32].
248 ///
249 /// \headerfile <x86intrin.h>
250 ///
251 /// This intrinsic corresponds to the \c PHADDD instruction.
252 ///
253 /// \param __a
254 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
255 /// horizontal sums of the values are stored in the lower bits of the
256 /// destination.
257 /// \param __b
258 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
259 /// horizontal sums of the values are stored in the upper bits of the
260 /// destination.
261 /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
262 /// operands.
263 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
264 _mm_hadd_pi32(__m64 __a, __m64 __b)
265 {
266  return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
267 }
268 
269 /// Horizontally adds the adjacent pairs of values contained in 2 packed
270 /// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
271 /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
272 /// 0x8000.
273 ///
274 /// \headerfile <x86intrin.h>
275 ///
276 /// This intrinsic corresponds to the \c VPHADDSW instruction.
277 ///
278 /// \param __a
279 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
280 /// horizontal sums of the values are stored in the lower bits of the
281 /// destination.
282 /// \param __b
283 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
284 /// horizontal sums of the values are stored in the upper bits of the
285 /// destination.
286 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
287 /// sums of both operands.
288 static __inline__ __m128i __DEFAULT_FN_ATTRS
289 _mm_hadds_epi16(__m128i __a, __m128i __b)
290 {
291  return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
292 }
293 
294 /// Horizontally adds the adjacent pairs of values contained in 2 packed
295 /// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
296 /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
297 /// 0x8000.
298 ///
299 /// \headerfile <x86intrin.h>
300 ///
301 /// This intrinsic corresponds to the \c PHADDSW instruction.
302 ///
303 /// \param __a
304 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
305 /// horizontal sums of the values are stored in the lower bits of the
306 /// destination.
307 /// \param __b
308 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
309 /// horizontal sums of the values are stored in the upper bits of the
310 /// destination.
311 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
312 /// sums of both operands.
313 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
314 _mm_hadds_pi16(__m64 __a, __m64 __b)
315 {
316  return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
317 }
318 
319 /// Horizontally subtracts the adjacent pairs of values contained in 2
320 /// packed 128-bit vectors of [8 x i16].
321 ///
322 /// \headerfile <x86intrin.h>
323 ///
324 /// This intrinsic corresponds to the \c VPHSUBW instruction.
325 ///
326 /// \param __a
327 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
328 /// horizontal differences between the values are stored in the lower bits of
329 /// the destination.
330 /// \param __b
331 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
332 /// horizontal differences between the values are stored in the upper bits of
333 /// the destination.
334 /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
335 /// of both operands.
336 static __inline__ __m128i __DEFAULT_FN_ATTRS
337 _mm_hsub_epi16(__m128i __a, __m128i __b)
338 {
339  return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
340 }
341 
342 /// Horizontally subtracts the adjacent pairs of values contained in 2
343 /// packed 128-bit vectors of [4 x i32].
344 ///
345 /// \headerfile <x86intrin.h>
346 ///
347 /// This intrinsic corresponds to the \c VPHSUBD instruction.
348 ///
349 /// \param __a
350 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
351 /// horizontal differences between the values are stored in the lower bits of
352 /// the destination.
353 /// \param __b
354 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
355 /// horizontal differences between the values are stored in the upper bits of
356 /// the destination.
357 /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
358 /// of both operands.
359 static __inline__ __m128i __DEFAULT_FN_ATTRS
360 _mm_hsub_epi32(__m128i __a, __m128i __b)
361 {
362  return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
363 }
364 
365 /// Horizontally subtracts the adjacent pairs of values contained in 2
366 /// packed 64-bit vectors of [4 x i16].
367 ///
368 /// \headerfile <x86intrin.h>
369 ///
370 /// This intrinsic corresponds to the \c PHSUBW instruction.
371 ///
372 /// \param __a
373 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
374 /// horizontal differences between the values are stored in the lower bits of
375 /// the destination.
376 /// \param __b
377 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
378 /// horizontal differences between the values are stored in the upper bits of
379 /// the destination.
380 /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
381 /// of both operands.
382 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
383 _mm_hsub_pi16(__m64 __a, __m64 __b)
384 {
385  return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
386 }
387 
388 /// Horizontally subtracts the adjacent pairs of values contained in 2
389 /// packed 64-bit vectors of [2 x i32].
390 ///
391 /// \headerfile <x86intrin.h>
392 ///
393 /// This intrinsic corresponds to the \c PHSUBD instruction.
394 ///
395 /// \param __a
396 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
397 /// horizontal differences between the values are stored in the lower bits of
398 /// the destination.
399 /// \param __b
400 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
401 /// horizontal differences between the values are stored in the upper bits of
402 /// the destination.
403 /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
404 /// of both operands.
405 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
406 _mm_hsub_pi32(__m64 __a, __m64 __b)
407 {
408  return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
409 }
410 
411 /// Horizontally subtracts the adjacent pairs of values contained in 2
412 /// packed 128-bit vectors of [8 x i16]. Positive differences greater than
413 /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
414 /// saturated to 0x8000.
415 ///
416 /// \headerfile <x86intrin.h>
417 ///
418 /// This intrinsic corresponds to the \c VPHSUBSW instruction.
419 ///
420 /// \param __a
421 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
422 /// horizontal differences between the values are stored in the lower bits of
423 /// the destination.
424 /// \param __b
425 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
426 /// horizontal differences between the values are stored in the upper bits of
427 /// the destination.
428 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
429 /// differences of both operands.
430 static __inline__ __m128i __DEFAULT_FN_ATTRS
431 _mm_hsubs_epi16(__m128i __a, __m128i __b)
432 {
433  return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
434 }
435 
436 /// Horizontally subtracts the adjacent pairs of values contained in 2
437 /// packed 64-bit vectors of [4 x i16]. Positive differences greater than
438 /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
439 /// saturated to 0x8000.
440 ///
441 /// \headerfile <x86intrin.h>
442 ///
443 /// This intrinsic corresponds to the \c PHSUBSW instruction.
444 ///
445 /// \param __a
446 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
447 /// horizontal differences between the values are stored in the lower bits of
448 /// the destination.
449 /// \param __b
450 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
451 /// horizontal differences between the values are stored in the upper bits of
452 /// the destination.
453 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
454 /// differences of both operands.
455 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
456 _mm_hsubs_pi16(__m64 __a, __m64 __b)
457 {
458  return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
459 }
460 
461 /// Multiplies corresponding pairs of packed 8-bit unsigned integer
462 /// values contained in the first source operand and packed 8-bit signed
463 /// integer values contained in the second source operand, adds pairs of
464 /// contiguous products with signed saturation, and writes the 16-bit sums to
465 /// the corresponding bits in the destination.
466 ///
467 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
468 /// both operands are multiplied, and the sum of both results is written to
469 /// bits [15:0] of the destination.
470 ///
471 /// \headerfile <x86intrin.h>
472 ///
473 /// This intrinsic corresponds to the \c VPMADDUBSW instruction.
474 ///
475 /// \param __a
476 /// A 128-bit integer vector containing the first source operand.
477 /// \param __b
478 /// A 128-bit integer vector containing the second source operand.
479 /// \returns A 128-bit integer vector containing the sums of products of both
480 /// operands: \n
481 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
482 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
483 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
484 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
485 /// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
486 /// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
487 /// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
488 /// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
489 static __inline__ __m128i __DEFAULT_FN_ATTRS
490 _mm_maddubs_epi16(__m128i __a, __m128i __b)
491 {
492  return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
493 }
494 
495 /// Multiplies corresponding pairs of packed 8-bit unsigned integer
496 /// values contained in the first source operand and packed 8-bit signed
497 /// integer values contained in the second source operand, adds pairs of
498 /// contiguous products with signed saturation, and writes the 16-bit sums to
499 /// the corresponding bits in the destination.
500 ///
501 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
502 /// both operands are multiplied, and the sum of both results is written to
503 /// bits [15:0] of the destination.
504 ///
505 /// \headerfile <x86intrin.h>
506 ///
507 /// This intrinsic corresponds to the \c PMADDUBSW instruction.
508 ///
509 /// \param __a
510 /// A 64-bit integer vector containing the first source operand.
511 /// \param __b
512 /// A 64-bit integer vector containing the second source operand.
513 /// \returns A 64-bit integer vector containing the sums of products of both
514 /// operands: \n
515 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
516 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
517 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
518 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
519 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
520 _mm_maddubs_pi16(__m64 __a, __m64 __b)
521 {
522  return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
523 }
524 
525 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
526 /// products to the 18 most significant bits by right-shifting, rounds the
527 /// truncated value by adding 1, and writes bits [16:1] to the destination.
528 ///
529 /// \headerfile <x86intrin.h>
530 ///
531 /// This intrinsic corresponds to the \c VPMULHRSW instruction.
532 ///
533 /// \param __a
534 /// A 128-bit vector of [8 x i16] containing one of the source operands.
535 /// \param __b
536 /// A 128-bit vector of [8 x i16] containing one of the source operands.
537 /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
538 /// products of both operands.
539 static __inline__ __m128i __DEFAULT_FN_ATTRS
540 _mm_mulhrs_epi16(__m128i __a, __m128i __b)
541 {
542  return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
543 }
544 
545 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
546 /// products to the 18 most significant bits by right-shifting, rounds the
547 /// truncated value by adding 1, and writes bits [16:1] to the destination.
548 ///
549 /// \headerfile <x86intrin.h>
550 ///
551 /// This intrinsic corresponds to the \c PMULHRSW instruction.
552 ///
553 /// \param __a
554 /// A 64-bit vector of [4 x i16] containing one of the source operands.
555 /// \param __b
556 /// A 64-bit vector of [4 x i16] containing one of the source operands.
557 /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
558 /// products of both operands.
559 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
560 _mm_mulhrs_pi16(__m64 __a, __m64 __b)
561 {
562  return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
563 }
564 
565 /// Copies the 8-bit integers from a 128-bit integer vector to the
566 /// destination or clears 8-bit values in the destination, as specified by
567 /// the second source operand.
568 ///
569 /// \headerfile <x86intrin.h>
570 ///
571 /// This intrinsic corresponds to the \c VPSHUFB instruction.
572 ///
573 /// \param __a
574 /// A 128-bit integer vector containing the values to be copied.
575 /// \param __b
576 /// A 128-bit integer vector containing control bytes corresponding to
577 /// positions in the destination:
578 /// Bit 7: \n
579 /// 1: Clear the corresponding byte in the destination. \n
580 /// 0: Copy the selected source byte to the corresponding byte in the
581 /// destination. \n
582 /// Bits [6:4] Reserved. \n
583 /// Bits [3:0] select the source byte to be copied.
584 /// \returns A 128-bit integer vector containing the copied or cleared values.
585 static __inline__ __m128i __DEFAULT_FN_ATTRS
586 _mm_shuffle_epi8(__m128i __a, __m128i __b)
587 {
588  return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
589 }
590 
591 /// Copies the 8-bit integers from a 64-bit integer vector to the
592 /// destination or clears 8-bit values in the destination, as specified by
593 /// the second source operand.
594 ///
595 /// \headerfile <x86intrin.h>
596 ///
597 /// This intrinsic corresponds to the \c PSHUFB instruction.
598 ///
599 /// \param __a
600 /// A 64-bit integer vector containing the values to be copied.
601 /// \param __b
602 /// A 64-bit integer vector containing control bytes corresponding to
603 /// positions in the destination:
604 /// Bit 7: \n
605 /// 1: Clear the corresponding byte in the destination. \n
606 /// 0: Copy the selected source byte to the corresponding byte in the
607 /// destination. \n
608 /// Bits [3:0] select the source byte to be copied.
609 /// \returns A 64-bit integer vector containing the copied or cleared values.
610 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
611 _mm_shuffle_pi8(__m64 __a, __m64 __b)
612 {
613  return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
614 }
615 
616 /// For each 8-bit integer in the first source operand, perform one of
617 /// the following actions as specified by the second source operand.
618 ///
619 /// If the byte in the second source is negative, calculate the two's
620 /// complement of the corresponding byte in the first source, and write that
621 /// value to the destination. If the byte in the second source is positive,
622 /// copy the corresponding byte from the first source to the destination. If
623 /// the byte in the second source is zero, clear the corresponding byte in
624 /// the destination.
625 ///
626 /// \headerfile <x86intrin.h>
627 ///
628 /// This intrinsic corresponds to the \c VPSIGNB instruction.
629 ///
630 /// \param __a
631 /// A 128-bit integer vector containing the values to be copied.
632 /// \param __b
633 /// A 128-bit integer vector containing control bytes corresponding to
634 /// positions in the destination.
635 /// \returns A 128-bit integer vector containing the resultant values.
636 static __inline__ __m128i __DEFAULT_FN_ATTRS
637 _mm_sign_epi8(__m128i __a, __m128i __b)
638 {
639  return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
640 }
641 
642 /// For each 16-bit integer in the first source operand, perform one of
643 /// the following actions as specified by the second source operand.
644 ///
645 /// If the word in the second source is negative, calculate the two's
646 /// complement of the corresponding word in the first source, and write that
647 /// value to the destination. If the word in the second source is positive,
648 /// copy the corresponding word from the first source to the destination. If
649 /// the word in the second source is zero, clear the corresponding word in
650 /// the destination.
651 ///
652 /// \headerfile <x86intrin.h>
653 ///
654 /// This intrinsic corresponds to the \c VPSIGNW instruction.
655 ///
656 /// \param __a
657 /// A 128-bit integer vector containing the values to be copied.
658 /// \param __b
659 /// A 128-bit integer vector containing control words corresponding to
660 /// positions in the destination.
661 /// \returns A 128-bit integer vector containing the resultant values.
662 static __inline__ __m128i __DEFAULT_FN_ATTRS
663 _mm_sign_epi16(__m128i __a, __m128i __b)
664 {
665  return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
666 }
667 
668 /// For each 32-bit integer in the first source operand, perform one of
669 /// the following actions as specified by the second source operand.
670 ///
671 /// If the doubleword in the second source is negative, calculate the two's
672 /// complement of the corresponding word in the first source, and write that
673 /// value to the destination. If the doubleword in the second source is
674 /// positive, copy the corresponding word from the first source to the
675 /// destination. If the doubleword in the second source is zero, clear the
676 /// corresponding word in the destination.
677 ///
678 /// \headerfile <x86intrin.h>
679 ///
680 /// This intrinsic corresponds to the \c VPSIGND instruction.
681 ///
682 /// \param __a
683 /// A 128-bit integer vector containing the values to be copied.
684 /// \param __b
685 /// A 128-bit integer vector containing control doublewords corresponding to
686 /// positions in the destination.
687 /// \returns A 128-bit integer vector containing the resultant values.
688 static __inline__ __m128i __DEFAULT_FN_ATTRS
689 _mm_sign_epi32(__m128i __a, __m128i __b)
690 {
691  return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
692 }
693 
694 /// For each 8-bit integer in the first source operand, perform one of
695 /// the following actions as specified by the second source operand.
696 ///
697 /// If the byte in the second source is negative, calculate the two's
698 /// complement of the corresponding byte in the first source, and write that
699 /// value to the destination. If the byte in the second source is positive,
700 /// copy the corresponding byte from the first source to the destination. If
701 /// the byte in the second source is zero, clear the corresponding byte in
702 /// the destination.
703 ///
704 /// \headerfile <x86intrin.h>
705 ///
706 /// This intrinsic corresponds to the \c PSIGNB instruction.
707 ///
708 /// \param __a
709 /// A 64-bit integer vector containing the values to be copied.
710 /// \param __b
711 /// A 64-bit integer vector containing control bytes corresponding to
712 /// positions in the destination.
713 /// \returns A 64-bit integer vector containing the resultant values.
714 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
715 _mm_sign_pi8(__m64 __a, __m64 __b)
716 {
717  return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
718 }
719 
720 /// For each 16-bit integer in the first source operand, perform one of
721 /// the following actions as specified by the second source operand.
722 ///
723 /// If the word in the second source is negative, calculate the two's
724 /// complement of the corresponding word in the first source, and write that
725 /// value to the destination. If the word in the second source is positive,
726 /// copy the corresponding word from the first source to the destination. If
727 /// the word in the second source is zero, clear the corresponding word in
728 /// the destination.
729 ///
730 /// \headerfile <x86intrin.h>
731 ///
732 /// This intrinsic corresponds to the \c PSIGNW instruction.
733 ///
734 /// \param __a
735 /// A 64-bit integer vector containing the values to be copied.
736 /// \param __b
737 /// A 64-bit integer vector containing control words corresponding to
738 /// positions in the destination.
739 /// \returns A 64-bit integer vector containing the resultant values.
740 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
741 _mm_sign_pi16(__m64 __a, __m64 __b)
742 {
743  return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
744 }
745 
746 /// For each 32-bit integer in the first source operand, perform one of
747 /// the following actions as specified by the second source operand.
748 ///
749 /// If the doubleword in the second source is negative, calculate the two's
750 /// complement of the corresponding doubleword in the first source, and
751 /// write that value to the destination. If the doubleword in the second
752 /// source is positive, copy the corresponding doubleword from the first
753 /// source to the destination. If the doubleword in the second source is
754 /// zero, clear the corresponding doubleword in the destination.
755 ///
756 /// \headerfile <x86intrin.h>
757 ///
758 /// This intrinsic corresponds to the \c PSIGND instruction.
759 ///
760 /// \param __a
761 /// A 64-bit integer vector containing the values to be copied.
762 /// \param __b
763 /// A 64-bit integer vector containing two control doublewords corresponding
764 /// to positions in the destination.
765 /// \returns A 64-bit integer vector containing the resultant values.
766 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
767 _mm_sign_pi32(__m64 __a, __m64 __b)
768 {
769  return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
770 }
771 
772 #undef __DEFAULT_FN_ATTRS
773 #undef __DEFAULT_FN_ATTRS_MMX
774 
775 #endif /* __TMMINTRIN_H */
_mm_hadd_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
Definition: tmmintrin.h:195
_mm_abs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:90
_mm_sign_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi16(__m64 __a, __m64 __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:741
_mm_maddubs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:490
_mm_abs_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:126
__a
static __inline__ void int __a
Definition: emmintrin.h:4189
_mm_mulhrs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:540
_mm_abs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi16(__m64 __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:72
_mm_hsubs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsubs_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition: tmmintrin.h:456
_mm_sign_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi32(__m128i __a, __m128i __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:689
_mm_sign_epi8
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi8(__m128i __a, __m128i __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:637
_mm_hadd_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi32(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32].
Definition: tmmintrin.h:218
_mm_hsub_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition: tmmintrin.h:383
_mm_hadds_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadds_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
Definition: tmmintrin.h:314
_mm_abs_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi32(__m64 __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:108
_mm_mulhrs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mulhrs_pi16(__m64 __a, __m64 __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:560
_mm_hadds_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadds_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
Definition: tmmintrin.h:289
__DEFAULT_FN_ATTRS
#define __DEFAULT_FN_ATTRS
Definition: tmmintrin.h:20
_mm_abs_pi8
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi8(__m64 __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:36
_mm_hsub_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
Definition: tmmintrin.h:337
pmmintrin.h
_mm_hsub_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi32(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32]...
Definition: tmmintrin.h:406
_mm_sign_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi32(__m64 __a, __m64 __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:767
__DEFAULT_FN_ATTRS_MMX
#define __DEFAULT_FN_ATTRS_MMX
Definition: tmmintrin.h:21
_mm_hadd_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
Definition: tmmintrin.h:241
_mm_hadd_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi32(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32].
Definition: tmmintrin.h:264
_mm_hsubs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubs_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
Definition: tmmintrin.h:431
__b
static __inline__ vector float vector float __b
Definition: altivec.h:566
_mm_shuffle_epi8
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
Definition: tmmintrin.h:586
_mm_sign_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi16(__m128i __a, __m128i __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:663
_mm_maddubs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_maddubs_pi16(__m64 __a, __m64 __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:520
_mm_abs_epi8
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:54
_mm_sign_pi8
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi8(__m64 __a, __m64 __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:715
_mm_hsub_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi32(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32...
Definition: tmmintrin.h:360
_mm_shuffle_pi8
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_shuffle_pi8(__m64 __a, __m64 __b)
Copies the 8-bit integers from a 64-bit integer vector to the destination or clears 8-bit values in t...
Definition: tmmintrin.h:611