clang  6.0.0svn
tmmintrin.h
Go to the documentation of this file.
1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23 
24 #ifndef __TMMINTRIN_H
25 #define __TMMINTRIN_H
26 
27 #include <pmmintrin.h>
28 
29 /* Define the default attributes for the functions in this file. */
30 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
31 
32 /// \brief Computes the absolute value of each of the packed 8-bit signed
33 /// integers in the source operand and stores the 8-bit unsigned integer
34 /// results in the destination.
35 ///
36 /// \headerfile <x86intrin.h>
37 ///
38 /// This intrinsic corresponds to the \c PABSB instruction.
39 ///
40 /// \param __a
41 /// A 64-bit vector of [8 x i8].
42 /// \returns A 64-bit integer vector containing the absolute values of the
43 /// elements in the operand.
44 static __inline__ __m64 __DEFAULT_FN_ATTRS
45 _mm_abs_pi8(__m64 __a)
46 {
47  return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
48 }
49 
50 /// \brief Computes the absolute value of each of the packed 8-bit signed
51 /// integers in the source operand and stores the 8-bit unsigned integer
52 /// results in the destination.
53 ///
54 /// \headerfile <x86intrin.h>
55 ///
56 /// This intrinsic corresponds to the \c VPABSB instruction.
57 ///
58 /// \param __a
59 /// A 128-bit vector of [16 x i8].
60 /// \returns A 128-bit integer vector containing the absolute values of the
61 /// elements in the operand.
62 static __inline__ __m128i __DEFAULT_FN_ATTRS
63 _mm_abs_epi8(__m128i __a)
64 {
65  return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
66 }
67 
68 /// \brief Computes the absolute value of each of the packed 16-bit signed
69 /// integers in the source operand and stores the 16-bit unsigned integer
70 /// results in the destination.
71 ///
72 /// \headerfile <x86intrin.h>
73 ///
74 /// This intrinsic corresponds to the \c PABSW instruction.
75 ///
76 /// \param __a
77 /// A 64-bit vector of [4 x i16].
78 /// \returns A 64-bit integer vector containing the absolute values of the
79 /// elements in the operand.
80 static __inline__ __m64 __DEFAULT_FN_ATTRS
81 _mm_abs_pi16(__m64 __a)
82 {
83  return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
84 }
85 
86 /// \brief Computes the absolute value of each of the packed 16-bit signed
87 /// integers in the source operand and stores the 16-bit unsigned integer
88 /// results in the destination.
89 ///
90 /// \headerfile <x86intrin.h>
91 ///
92 /// This intrinsic corresponds to the \c VPABSW instruction.
93 ///
94 /// \param __a
95 /// A 128-bit vector of [8 x i16].
96 /// \returns A 128-bit integer vector containing the absolute values of the
97 /// elements in the operand.
98 static __inline__ __m128i __DEFAULT_FN_ATTRS
99 _mm_abs_epi16(__m128i __a)
100 {
101  return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
102 }
103 
104 /// \brief Computes the absolute value of each of the packed 32-bit signed
105 /// integers in the source operand and stores the 32-bit unsigned integer
106 /// results in the destination.
107 ///
108 /// \headerfile <x86intrin.h>
109 ///
110 /// This intrinsic corresponds to the \c PABSD instruction.
111 ///
112 /// \param __a
113 /// A 64-bit vector of [2 x i32].
114 /// \returns A 64-bit integer vector containing the absolute values of the
115 /// elements in the operand.
116 static __inline__ __m64 __DEFAULT_FN_ATTRS
117 _mm_abs_pi32(__m64 __a)
118 {
119  return (__m64)__builtin_ia32_pabsd((__v2si)__a);
120 }
121 
122 /// \brief Computes the absolute value of each of the packed 32-bit signed
123 /// integers in the source operand and stores the 32-bit unsigned integer
124 /// results in the destination.
125 ///
126 /// \headerfile <x86intrin.h>
127 ///
128 /// This intrinsic corresponds to the \c VPABSD instruction.
129 ///
130 /// \param __a
131 /// A 128-bit vector of [4 x i32].
132 /// \returns A 128-bit integer vector containing the absolute values of the
133 /// elements in the operand.
134 static __inline__ __m128i __DEFAULT_FN_ATTRS
135 _mm_abs_epi32(__m128i __a)
136 {
137  return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
138 }
139 
140 /// \brief Concatenates the two 128-bit integer vector operands, and
141 /// right-shifts the result by the number of bytes specified in the immediate
142 /// operand.
143 ///
144 /// \headerfile <x86intrin.h>
145 ///
146 /// \code
147 /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
148 /// \endcode
149 ///
150 /// This intrinsic corresponds to the \c PALIGNR instruction.
151 ///
152 /// \param a
153 /// A 128-bit vector of [16 x i8] containing one of the source operands.
154 /// \param b
155 /// A 128-bit vector of [16 x i8] containing one of the source operands.
156 /// \param n
157 /// An immediate operand specifying how many bytes to right-shift the result.
158 /// \returns A 128-bit integer vector containing the concatenated right-shifted
159 /// value.
160 #define _mm_alignr_epi8(a, b, n) __extension__ ({ \
161  (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
162  (__v16qi)(__m128i)(b), (n)); })
163 
164 /// \brief Concatenates the two 64-bit integer vector operands, and right-shifts
165 /// the result by the number of bytes specified in the immediate operand.
166 ///
167 /// \headerfile <x86intrin.h>
168 ///
169 /// \code
170 /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
171 /// \endcode
172 ///
173 /// This intrinsic corresponds to the \c PALIGNR instruction.
174 ///
175 /// \param a
176 /// A 64-bit vector of [8 x i8] containing one of the source operands.
177 /// \param b
178 /// A 64-bit vector of [8 x i8] containing one of the source operands.
179 /// \param n
180 /// An immediate operand specifying how many bytes to right-shift the result.
181 /// \returns A 64-bit integer vector containing the concatenated right-shifted
182 /// value.
183 #define _mm_alignr_pi8(a, b, n) __extension__ ({ \
184  (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); })
185 
186 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
187 /// 128-bit vectors of [8 x i16].
188 ///
189 /// \headerfile <x86intrin.h>
190 ///
191 /// This intrinsic corresponds to the \c VPHADDW instruction.
192 ///
193 /// \param __a
194 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
195 /// horizontal sums of the values are stored in the lower bits of the
196 /// destination.
197 /// \param __b
198 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
199 /// horizontal sums of the values are stored in the upper bits of the
200 /// destination.
201 /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
202 /// both operands.
203 static __inline__ __m128i __DEFAULT_FN_ATTRS
204 _mm_hadd_epi16(__m128i __a, __m128i __b)
205 {
206  return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
207 }
208 
209 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
210 /// 128-bit vectors of [4 x i32].
211 ///
212 /// \headerfile <x86intrin.h>
213 ///
214 /// This intrinsic corresponds to the \c VPHADDD instruction.
215 ///
216 /// \param __a
217 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
218 /// horizontal sums of the values are stored in the lower bits of the
219 /// destination.
220 /// \param __b
221 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
222 /// horizontal sums of the values are stored in the upper bits of the
223 /// destination.
224 /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
225 /// both operands.
226 static __inline__ __m128i __DEFAULT_FN_ATTRS
227 _mm_hadd_epi32(__m128i __a, __m128i __b)
228 {
229  return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
230 }
231 
232 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
233 /// 64-bit vectors of [4 x i16].
234 ///
235 /// \headerfile <x86intrin.h>
236 ///
237 /// This intrinsic corresponds to the \c PHADDW instruction.
238 ///
239 /// \param __a
240 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
241 /// horizontal sums of the values are stored in the lower bits of the
242 /// destination.
243 /// \param __b
244 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
245 /// horizontal sums of the values are stored in the upper bits of the
246 /// destination.
247 /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
248 /// operands.
249 static __inline__ __m64 __DEFAULT_FN_ATTRS
250 _mm_hadd_pi16(__m64 __a, __m64 __b)
251 {
252  return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
253 }
254 
255 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
256 /// 64-bit vectors of [2 x i32].
257 ///
258 /// \headerfile <x86intrin.h>
259 ///
260 /// This intrinsic corresponds to the \c PHADDD instruction.
261 ///
262 /// \param __a
263 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
264 /// horizontal sums of the values are stored in the lower bits of the
265 /// destination.
266 /// \param __b
267 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
268 /// horizontal sums of the values are stored in the upper bits of the
269 /// destination.
270 /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
271 /// operands.
272 static __inline__ __m64 __DEFAULT_FN_ATTRS
273 _mm_hadd_pi32(__m64 __a, __m64 __b)
274 {
275  return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
276 }
277 
278 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
279 /// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
280 /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
281 ///
282 /// \headerfile <x86intrin.h>
283 ///
284 /// This intrinsic corresponds to the \c VPHADDSW instruction.
285 ///
286 /// \param __a
287 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
288 /// horizontal sums of the values are stored in the lower bits of the
289 /// destination.
290 /// \param __b
291 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
292 /// horizontal sums of the values are stored in the upper bits of the
293 /// destination.
294 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
295 /// sums of both operands.
296 static __inline__ __m128i __DEFAULT_FN_ATTRS
297 _mm_hadds_epi16(__m128i __a, __m128i __b)
298 {
299  return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
300 }
301 
302 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
303 /// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
304 /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
305 ///
306 /// \headerfile <x86intrin.h>
307 ///
308 /// This intrinsic corresponds to the \c PHADDSW instruction.
309 ///
310 /// \param __a
311 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
312 /// horizontal sums of the values are stored in the lower bits of the
313 /// destination.
314 /// \param __b
315 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
316 /// horizontal sums of the values are stored in the upper bits of the
317 /// destination.
318 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
319 /// sums of both operands.
320 static __inline__ __m64 __DEFAULT_FN_ATTRS
321 _mm_hadds_pi16(__m64 __a, __m64 __b)
322 {
323  return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
324 }
325 
326 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
327 /// packed 128-bit vectors of [8 x i16].
328 ///
329 /// \headerfile <x86intrin.h>
330 ///
331 /// This intrinsic corresponds to the \c VPHSUBW instruction.
332 ///
333 /// \param __a
334 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
335 /// horizontal differences between the values are stored in the lower bits of
336 /// the destination.
337 /// \param __b
338 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
339 /// horizontal differences between the values are stored in the upper bits of
340 /// the destination.
341 /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
342 /// of both operands.
343 static __inline__ __m128i __DEFAULT_FN_ATTRS
344 _mm_hsub_epi16(__m128i __a, __m128i __b)
345 {
346  return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
347 }
348 
349 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
350 /// packed 128-bit vectors of [4 x i32].
351 ///
352 /// \headerfile <x86intrin.h>
353 ///
354 /// This intrinsic corresponds to the \c VPHSUBD instruction.
355 ///
356 /// \param __a
357 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
358 /// horizontal differences between the values are stored in the lower bits of
359 /// the destination.
360 /// \param __b
361 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
362 /// horizontal differences between the values are stored in the upper bits of
363 /// the destination.
364 /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
365 /// of both operands.
366 static __inline__ __m128i __DEFAULT_FN_ATTRS
367 _mm_hsub_epi32(__m128i __a, __m128i __b)
368 {
369  return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
370 }
371 
372 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
373 /// packed 64-bit vectors of [4 x i16].
374 ///
375 /// \headerfile <x86intrin.h>
376 ///
377 /// This intrinsic corresponds to the \c PHSUBW instruction.
378 ///
379 /// \param __a
380 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
381 /// horizontal differences between the values are stored in the lower bits of
382 /// the destination.
383 /// \param __b
384 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
385 /// horizontal differences between the values are stored in the upper bits of
386 /// the destination.
387 /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
388 /// of both operands.
389 static __inline__ __m64 __DEFAULT_FN_ATTRS
390 _mm_hsub_pi16(__m64 __a, __m64 __b)
391 {
392  return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
393 }
394 
395 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
396 /// packed 64-bit vectors of [2 x i32].
397 ///
398 /// \headerfile <x86intrin.h>
399 ///
400 /// This intrinsic corresponds to the \c PHSUBD instruction.
401 ///
402 /// \param __a
403 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
404 /// horizontal differences between the values are stored in the lower bits of
405 /// the destination.
406 /// \param __b
407 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
408 /// horizontal differences between the values are stored in the upper bits of
409 /// the destination.
410 /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
411 /// of both operands.
412 static __inline__ __m64 __DEFAULT_FN_ATTRS
413 _mm_hsub_pi32(__m64 __a, __m64 __b)
414 {
415  return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
416 }
417 
418 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
419 /// packed 128-bit vectors of [8 x i16]. Positive differences greater than
420 /// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
421 /// saturated to 8000h.
422 ///
423 /// \headerfile <x86intrin.h>
424 ///
425 /// This intrinsic corresponds to the \c VPHSUBSW instruction.
426 ///
427 /// \param __a
428 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
429 /// horizontal differences between the values are stored in the lower bits of
430 /// the destination.
431 /// \param __b
432 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
433 /// horizontal differences between the values are stored in the upper bits of
434 /// the destination.
435 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
436 /// differences of both operands.
437 static __inline__ __m128i __DEFAULT_FN_ATTRS
438 _mm_hsubs_epi16(__m128i __a, __m128i __b)
439 {
440  return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
441 }
442 
443 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
444 /// packed 64-bit vectors of [4 x i16]. Positive differences greater than
445 /// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
446 /// saturated to 8000h.
447 ///
448 /// \headerfile <x86intrin.h>
449 ///
450 /// This intrinsic corresponds to the \c PHSUBSW instruction.
451 ///
452 /// \param __a
453 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
454 /// horizontal differences between the values are stored in the lower bits of
455 /// the destination.
456 /// \param __b
457 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
458 /// horizontal differences between the values are stored in the upper bits of
459 /// the destination.
460 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
461 /// differences of both operands.
462 static __inline__ __m64 __DEFAULT_FN_ATTRS
463 _mm_hsubs_pi16(__m64 __a, __m64 __b)
464 {
465  return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
466 }
467 
468 /// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
469 /// values contained in the first source operand and packed 8-bit signed
470 /// integer values contained in the second source operand, adds pairs of
471 /// contiguous products with signed saturation, and writes the 16-bit sums to
472 /// the corresponding bits in the destination.
473 ///
474 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
475 /// both operands are multiplied, and the sum of both results is written to
476 /// bits [15:0] of the destination.
477 ///
478 /// \headerfile <x86intrin.h>
479 ///
480 /// This intrinsic corresponds to the \c VPMADDUBSW instruction.
481 ///
482 /// \param __a
483 /// A 128-bit integer vector containing the first source operand.
484 /// \param __b
485 /// A 128-bit integer vector containing the second source operand.
486 /// \returns A 128-bit integer vector containing the sums of products of both
487 /// operands: \n
488 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
489 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
490 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
491 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
492 /// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
493 /// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
494 /// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
495 /// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
496 static __inline__ __m128i __DEFAULT_FN_ATTRS
497 _mm_maddubs_epi16(__m128i __a, __m128i __b)
498 {
499  return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
500 }
501 
502 /// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
503 /// values contained in the first source operand and packed 8-bit signed
504 /// integer values contained in the second source operand, adds pairs of
505 /// contiguous products with signed saturation, and writes the 16-bit sums to
506 /// the corresponding bits in the destination.
507 ///
508 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
509 /// both operands are multiplied, and the sum of both results is written to
510 /// bits [15:0] of the destination.
511 ///
512 /// \headerfile <x86intrin.h>
513 ///
514 /// This intrinsic corresponds to the \c PMADDUBSW instruction.
515 ///
516 /// \param __a
517 /// A 64-bit integer vector containing the first source operand.
518 /// \param __b
519 /// A 64-bit integer vector containing the second source operand.
520 /// \returns A 64-bit integer vector containing the sums of products of both
521 /// operands: \n
522 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
523 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
524 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
525 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
526 static __inline__ __m64 __DEFAULT_FN_ATTRS
527 _mm_maddubs_pi16(__m64 __a, __m64 __b)
528 {
529  return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
530 }
531 
532 /// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
533 /// products to the 18 most significant bits by right-shifting, rounds the
534 /// truncated value by adding 1, and writes bits [16:1] to the destination.
535 ///
536 /// \headerfile <x86intrin.h>
537 ///
538 /// This intrinsic corresponds to the \c VPMULHRSW instruction.
539 ///
540 /// \param __a
541 /// A 128-bit vector of [8 x i16] containing one of the source operands.
542 /// \param __b
543 /// A 128-bit vector of [8 x i16] containing one of the source operands.
544 /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
545 /// products of both operands.
546 static __inline__ __m128i __DEFAULT_FN_ATTRS
547 _mm_mulhrs_epi16(__m128i __a, __m128i __b)
548 {
549  return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
550 }
551 
552 /// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
553 /// products to the 18 most significant bits by right-shifting, rounds the
554 /// truncated value by adding 1, and writes bits [16:1] to the destination.
555 ///
556 /// \headerfile <x86intrin.h>
557 ///
558 /// This intrinsic corresponds to the \c PMULHRSW instruction.
559 ///
560 /// \param __a
561 /// A 64-bit vector of [4 x i16] containing one of the source operands.
562 /// \param __b
563 /// A 64-bit vector of [4 x i16] containing one of the source operands.
564 /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
565 /// products of both operands.
566 static __inline__ __m64 __DEFAULT_FN_ATTRS
567 _mm_mulhrs_pi16(__m64 __a, __m64 __b)
568 {
569  return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
570 }
571 
572 /// \brief Copies the 8-bit integers from a 128-bit integer vector to the
573 /// destination or clears 8-bit values in the destination, as specified by
574 /// the second source operand.
575 ///
576 /// \headerfile <x86intrin.h>
577 ///
578 /// This intrinsic corresponds to the \c VPSHUFB instruction.
579 ///
580 /// \param __a
581 /// A 128-bit integer vector containing the values to be copied.
582 /// \param __b
583 /// A 128-bit integer vector containing control bytes corresponding to
584 /// positions in the destination:
585 /// Bit 7: \n
586 /// 1: Clear the corresponding byte in the destination. \n
587 /// 0: Copy the selected source byte to the corresponding byte in the
588 /// destination. \n
589 /// Bits [6:4] Reserved. \n
590 /// Bits [3:0] select the source byte to be copied.
591 /// \returns A 128-bit integer vector containing the copied or cleared values.
592 static __inline__ __m128i __DEFAULT_FN_ATTRS
593 _mm_shuffle_epi8(__m128i __a, __m128i __b)
594 {
595  return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
596 }
597 
598 /// \brief Copies the 8-bit integers from a 64-bit integer vector to the
599 /// destination or clears 8-bit values in the destination, as specified by
600 /// the second source operand.
601 ///
602 /// \headerfile <x86intrin.h>
603 ///
604 /// This intrinsic corresponds to the \c PSHUFB instruction.
605 ///
606 /// \param __a
607 /// A 64-bit integer vector containing the values to be copied.
608 /// \param __b
609 /// A 64-bit integer vector containing control bytes corresponding to
610 /// positions in the destination:
611 /// Bit 7: \n
612 /// 1: Clear the corresponding byte in the destination. \n
613 /// 0: Copy the selected source byte to the corresponding byte in the
614 /// destination. \n
615 /// Bits [3:0] select the source byte to be copied.
616 /// \returns A 64-bit integer vector containing the copied or cleared values.
617 static __inline__ __m64 __DEFAULT_FN_ATTRS
618 _mm_shuffle_pi8(__m64 __a, __m64 __b)
619 {
620  return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
621 }
622 
623 /// \brief For each 8-bit integer in the first source operand, perform one of
624 /// the following actions as specified by the second source operand.
625 ///
626 /// If the byte in the second source is negative, calculate the two's
627 /// complement of the corresponding byte in the first source, and write that
628 /// value to the destination. If the byte in the second source is positive,
629 /// copy the corresponding byte from the first source to the destination. If
630 /// the byte in the second source is zero, clear the corresponding byte in
631 /// the destination.
632 ///
633 /// \headerfile <x86intrin.h>
634 ///
635 /// This intrinsic corresponds to the \c VPSIGNB instruction.
636 ///
637 /// \param __a
638 /// A 128-bit integer vector containing the values to be copied.
639 /// \param __b
640 /// A 128-bit integer vector containing control bytes corresponding to
641 /// positions in the destination.
642 /// \returns A 128-bit integer vector containing the resultant values.
643 static __inline__ __m128i __DEFAULT_FN_ATTRS
644 _mm_sign_epi8(__m128i __a, __m128i __b)
645 {
646  return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
647 }
648 
649 /// \brief For each 16-bit integer in the first source operand, perform one of
650 /// the following actions as specified by the second source operand.
651 ///
652 /// If the word in the second source is negative, calculate the two's
653 /// complement of the corresponding word in the first source, and write that
654 /// value to the destination. If the word in the second source is positive,
655 /// copy the corresponding word from the first source to the destination. If
656 /// the word in the second source is zero, clear the corresponding word in
657 /// the destination.
658 ///
659 /// \headerfile <x86intrin.h>
660 ///
661 /// This intrinsic corresponds to the \c VPSIGNW instruction.
662 ///
663 /// \param __a
664 /// A 128-bit integer vector containing the values to be copied.
665 /// \param __b
666 /// A 128-bit integer vector containing control words corresponding to
667 /// positions in the destination.
668 /// \returns A 128-bit integer vector containing the resultant values.
669 static __inline__ __m128i __DEFAULT_FN_ATTRS
670 _mm_sign_epi16(__m128i __a, __m128i __b)
671 {
672  return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
673 }
674 
675 /// \brief For each 32-bit integer in the first source operand, perform one of
676 /// the following actions as specified by the second source operand.
677 ///
678 /// If the doubleword in the second source is negative, calculate the two's
679 /// complement of the corresponding word in the first source, and write that
680 /// value to the destination. If the doubleword in the second source is
681 /// positive, copy the corresponding word from the first source to the
682 /// destination. If the doubleword in the second source is zero, clear the
683 /// corresponding word in the destination.
684 ///
685 /// \headerfile <x86intrin.h>
686 ///
687 /// This intrinsic corresponds to the \c VPSIGND instruction.
688 ///
689 /// \param __a
690 /// A 128-bit integer vector containing the values to be copied.
691 /// \param __b
692 /// A 128-bit integer vector containing control doublewords corresponding to
693 /// positions in the destination.
694 /// \returns A 128-bit integer vector containing the resultant values.
695 static __inline__ __m128i __DEFAULT_FN_ATTRS
696 _mm_sign_epi32(__m128i __a, __m128i __b)
697 {
698  return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
699 }
700 
701 /// \brief For each 8-bit integer in the first source operand, perform one of
702 /// the following actions as specified by the second source operand.
703 ///
704 /// If the byte in the second source is negative, calculate the two's
705 /// complement of the corresponding byte in the first source, and write that
706 /// value to the destination. If the byte in the second source is positive,
707 /// copy the corresponding byte from the first source to the destination. If
708 /// the byte in the second source is zero, clear the corresponding byte in
709 /// the destination.
710 ///
711 /// \headerfile <x86intrin.h>
712 ///
713 /// This intrinsic corresponds to the \c PSIGNB instruction.
714 ///
715 /// \param __a
716 /// A 64-bit integer vector containing the values to be copied.
717 /// \param __b
718 /// A 64-bit integer vector containing control bytes corresponding to
719 /// positions in the destination.
720 /// \returns A 64-bit integer vector containing the resultant values.
721 static __inline__ __m64 __DEFAULT_FN_ATTRS
722 _mm_sign_pi8(__m64 __a, __m64 __b)
723 {
724  return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
725 }
726 
727 /// \brief For each 16-bit integer in the first source operand, perform one of
728 /// the following actions as specified by the second source operand.
729 ///
730 /// If the word in the second source is negative, calculate the two's
731 /// complement of the corresponding word in the first source, and write that
732 /// value to the destination. If the word in the second source is positive,
733 /// copy the corresponding word from the first source to the destination. If
734 /// the word in the second source is zero, clear the corresponding word in
735 /// the destination.
736 ///
737 /// \headerfile <x86intrin.h>
738 ///
739 /// This intrinsic corresponds to the \c PSIGNW instruction.
740 ///
741 /// \param __a
742 /// A 64-bit integer vector containing the values to be copied.
743 /// \param __b
744 /// A 64-bit integer vector containing control words corresponding to
745 /// positions in the destination.
746 /// \returns A 64-bit integer vector containing the resultant values.
747 static __inline__ __m64 __DEFAULT_FN_ATTRS
748 _mm_sign_pi16(__m64 __a, __m64 __b)
749 {
750  return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
751 }
752 
753 /// \brief For each 32-bit integer in the first source operand, perform one of
754 /// the following actions as specified by the second source operand.
755 ///
756 /// If the doubleword in the second source is negative, calculate the two's
757 /// complement of the corresponding doubleword in the first source, and
758 /// write that value to the destination. If the doubleword in the second
759 /// source is positive, copy the corresponding doubleword from the first
760 /// source to the destination. If the doubleword in the second source is
761 /// zero, clear the corresponding doubleword in the destination.
762 ///
763 /// \headerfile <x86intrin.h>
764 ///
765 /// This intrinsic corresponds to the \c PSIGND instruction.
766 ///
767 /// \param __a
768 /// A 64-bit integer vector containing the values to be copied.
769 /// \param __b
770 /// A 64-bit integer vector containing two control doublewords corresponding
771 /// to positions in the destination.
772 /// \returns A 64-bit integer vector containing the resultant values.
773 static __inline__ __m64 __DEFAULT_FN_ATTRS
774 _mm_sign_pi32(__m64 __a, __m64 __b)
775 {
776  return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
777 }
778 
779 #undef __DEFAULT_FN_ATTRS
780 
781 #endif /* __TMMINTRIN_H */
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition: tmmintrin.h:390
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi16(__m64 __a, __m64 __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:748
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition: tmmintrin.h:250
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi32(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32]...
Definition: tmmintrin.h:273
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi8(__m128i __a, __m128i __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:644
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi32(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32...
Definition: tmmintrin.h:367
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:135
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:497
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi32(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32]...
Definition: tmmintrin.h:227
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadds_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16]...
Definition: tmmintrin.h:297
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi8(__m64 __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:45
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_shuffle_pi8(__m64 __a, __m64 __b)
Copies the 8-bit integers from a 64-bit integer vector to the destination or clears 8-bit values in t...
Definition: tmmintrin.h:618
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16]...
Definition: tmmintrin.h:204
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsubs_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition: tmmintrin.h:463
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi32(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32]...
Definition: tmmintrin.h:413
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi32(__m128i __a, __m128i __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:696
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:547
static __inline__ vector float vector float __b
Definition: altivec.h:534
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:63
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi8(__m64 __a, __m64 __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:722
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
Definition: tmmintrin.h:344
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubs_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
Definition: tmmintrin.h:438
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi16(__m128i __a, __m128i __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:670
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi32(__m64 __a, __m64 __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:774
#define __DEFAULT_FN_ATTRS
Definition: tmmintrin.h:30
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
Definition: tmmintrin.h:593
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_maddubs_pi16(__m64 __a, __m64 __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:527
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi16(__m64 __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:81
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhrs_pi16(__m64 __a, __m64 __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:567
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadds_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition: tmmintrin.h:321
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:99
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi32(__m64 __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:117