clang 22.0.0git
tmmintrin.h
Go to the documentation of this file.
1/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __TMMINTRIN_H
11#define __TMMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17#include <pmmintrin.h>
18
19/* Define the default attributes for the functions in this file. */
20#define __DEFAULT_FN_ATTRS \
21 __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), \
22 __min_vector_width__(128)))
23
24#define __trunc64(x) \
25 (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
26#define __zext128(x) \
27 (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
28 1, 2, 3)
29#define __anyext128(x) \
30 (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
31 1, -1, -1)
32
33#if defined(__cplusplus) && (__cplusplus >= 201103L)
34#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
35#else
36#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
37#endif
38
39/// Computes the absolute value of each of the packed 8-bit signed
40/// integers in the source operand and stores the 8-bit unsigned integer
41/// results in the destination.
42///
43/// \headerfile <x86intrin.h>
44///
45/// This intrinsic corresponds to the \c PABSB instruction.
46///
47/// \param __a
48/// A 64-bit vector of [8 x i8].
49/// \returns A 64-bit integer vector containing the absolute values of the
50/// elements in the operand.
51static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi8(__m64 __a) {
52 return (__m64)__builtin_elementwise_abs((__v8qs)__a);
53}
54
55/// Computes the absolute value of each of the packed 8-bit signed
56/// integers in the source operand and stores the 8-bit unsigned integer
57/// results in the destination.
58///
59/// \headerfile <x86intrin.h>
60///
61/// This intrinsic corresponds to the \c VPABSB instruction.
62///
63/// \param __a
64/// A 128-bit vector of [16 x i8].
65/// \returns A 128-bit integer vector containing the absolute values of the
66/// elements in the operand.
67static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
68_mm_abs_epi8(__m128i __a) {
69 return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
70}
71
72/// Computes the absolute value of each of the packed 16-bit signed
73/// integers in the source operand and stores the 16-bit unsigned integer
74/// results in the destination.
75///
76/// \headerfile <x86intrin.h>
77///
78/// This intrinsic corresponds to the \c PABSW instruction.
79///
80/// \param __a
81/// A 64-bit vector of [4 x i16].
82/// \returns A 64-bit integer vector containing the absolute values of the
83/// elements in the operand.
84static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi16(__m64 __a) {
85 return (__m64)__builtin_elementwise_abs((__v4hi)__a);
86}
87
88/// Computes the absolute value of each of the packed 16-bit signed
89/// integers in the source operand and stores the 16-bit unsigned integer
90/// results in the destination.
91///
92/// \headerfile <x86intrin.h>
93///
94/// This intrinsic corresponds to the \c VPABSW instruction.
95///
96/// \param __a
97/// A 128-bit vector of [8 x i16].
98/// \returns A 128-bit integer vector containing the absolute values of the
99/// elements in the operand.
100static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
102 return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
103}
104
105/// Computes the absolute value of each of the packed 32-bit signed
106/// integers in the source operand and stores the 32-bit unsigned integer
107/// results in the destination.
108///
109/// \headerfile <x86intrin.h>
110///
111/// This intrinsic corresponds to the \c PABSD instruction.
112///
113/// \param __a
114/// A 64-bit vector of [2 x i32].
115/// \returns A 64-bit integer vector containing the absolute values of the
116/// elements in the operand.
117static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi32(__m64 __a) {
118 return (__m64)__builtin_elementwise_abs((__v2si)__a);
119}
120
121/// Computes the absolute value of each of the packed 32-bit signed
122/// integers in the source operand and stores the 32-bit unsigned integer
123/// results in the destination.
124///
125/// \headerfile <x86intrin.h>
126///
127/// This intrinsic corresponds to the \c VPABSD instruction.
128///
129/// \param __a
130/// A 128-bit vector of [4 x i32].
131/// \returns A 128-bit integer vector containing the absolute values of the
132/// elements in the operand.
133static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
135 return (__m128i)__builtin_elementwise_abs((__v4si)__a);
136}
137
138/// Concatenates the two 128-bit integer vector operands, and
139/// right-shifts the result by the number of bytes specified in the immediate
140/// operand.
141///
142/// \headerfile <x86intrin.h>
143///
144/// \code
145/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
146/// \endcode
147///
148/// This intrinsic corresponds to the \c PALIGNR instruction.
149///
150/// \param a
151/// A 128-bit vector of [16 x i8] containing one of the source operands.
152/// \param b
153/// A 128-bit vector of [16 x i8] containing one of the source operands.
154/// \param n
155/// An immediate operand specifying how many bytes to right-shift the result.
156/// \returns A 128-bit integer vector containing the concatenated right-shifted
157/// value.
158#define _mm_alignr_epi8(a, b, n) \
159 ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
160 (__v16qi)(__m128i)(b), (n)))
161
162/// Concatenates the two 64-bit integer vector operands, and right-shifts
163/// the result by the number of bytes specified in the immediate operand.
164///
165/// \headerfile <x86intrin.h>
166///
167/// \code
168/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
169/// \endcode
170///
171/// This intrinsic corresponds to the \c PALIGNR instruction.
172///
173/// \param a
174/// A 64-bit vector of [8 x i8] containing one of the source operands.
175/// \param b
176/// A 64-bit vector of [8 x i8] containing one of the source operands.
177/// \param n
178/// An immediate operand specifying how many bytes to right-shift the result.
179/// \returns A 64-bit integer vector containing the concatenated right-shifted
180/// value.
181#define _mm_alignr_pi8(a, b, n) \
182 ((__m64)__builtin_shufflevector( \
183 (__v2di)__builtin_ia32_psrldqi128_byteshift( \
184 (__v16qi)__builtin_shufflevector((__v1di)(a), (__v1di)(b), 1, 0), \
185 (n)), \
186 __extension__(__v2di){}, 0))
187
188/// Horizontally adds the adjacent pairs of values contained in 2 packed
189/// 128-bit vectors of [8 x i16].
190///
191/// \headerfile <x86intrin.h>
192///
193/// This intrinsic corresponds to the \c VPHADDW instruction.
194///
195/// \param __a
196/// A 128-bit vector of [8 x i16] containing one of the source operands. The
197/// horizontal sums of the values are stored in the lower bits of the
198/// destination.
199/// \param __b
200/// A 128-bit vector of [8 x i16] containing one of the source operands. The
201/// horizontal sums of the values are stored in the upper bits of the
202/// destination.
203/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
204/// both operands.
205static __inline__ __m128i __DEFAULT_FN_ATTRS
206_mm_hadd_epi16(__m128i __a, __m128i __b)
207{
208 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
209}
210
211/// Horizontally adds the adjacent pairs of values contained in 2 packed
212/// 128-bit vectors of [4 x i32].
213///
214/// \headerfile <x86intrin.h>
215///
216/// This intrinsic corresponds to the \c VPHADDD instruction.
217///
218/// \param __a
219/// A 128-bit vector of [4 x i32] containing one of the source operands. The
220/// horizontal sums of the values are stored in the lower bits of the
221/// destination.
222/// \param __b
223/// A 128-bit vector of [4 x i32] containing one of the source operands. The
224/// horizontal sums of the values are stored in the upper bits of the
225/// destination.
226/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
227/// both operands.
228static __inline__ __m128i __DEFAULT_FN_ATTRS
229_mm_hadd_epi32(__m128i __a, __m128i __b)
230{
231 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
232}
233
234/// Horizontally adds the adjacent pairs of values contained in 2 packed
235/// 64-bit vectors of [4 x i16].
236///
237/// \headerfile <x86intrin.h>
238///
239/// This intrinsic corresponds to the \c PHADDW instruction.
240///
241/// \param __a
242/// A 64-bit vector of [4 x i16] containing one of the source operands. The
243/// horizontal sums of the values are stored in the lower bits of the
244/// destination.
245/// \param __b
246/// A 64-bit vector of [4 x i16] containing one of the source operands. The
247/// horizontal sums of the values are stored in the upper bits of the
248/// destination.
249/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
250/// operands.
251static __inline__ __m64 __DEFAULT_FN_ATTRS
252_mm_hadd_pi16(__m64 __a, __m64 __b)
253{
254 return __trunc64(__builtin_ia32_phaddw128(
255 (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
256}
257
258/// Horizontally adds the adjacent pairs of values contained in 2 packed
259/// 64-bit vectors of [2 x i32].
260///
261/// \headerfile <x86intrin.h>
262///
263/// This intrinsic corresponds to the \c PHADDD instruction.
264///
265/// \param __a
266/// A 64-bit vector of [2 x i32] containing one of the source operands. The
267/// horizontal sums of the values are stored in the lower bits of the
268/// destination.
269/// \param __b
270/// A 64-bit vector of [2 x i32] containing one of the source operands. The
271/// horizontal sums of the values are stored in the upper bits of the
272/// destination.
273/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
274/// operands.
275static __inline__ __m64 __DEFAULT_FN_ATTRS
276_mm_hadd_pi32(__m64 __a, __m64 __b)
277{
278 return __trunc64(__builtin_ia32_phaddd128(
279 (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
280}
281
282/// Horizontally adds, with saturation, the adjacent pairs of values contained
283/// in two packed 128-bit vectors of [8 x i16].
284///
285/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
286/// less than 0x8000 are saturated to 0x8000.
287///
288/// \headerfile <x86intrin.h>
289///
290/// This intrinsic corresponds to the \c VPHADDSW instruction.
291///
292/// \param __a
293/// A 128-bit vector of [8 x i16] containing one of the source operands. The
294/// horizontal sums of the values are stored in the lower bits of the
295/// destination.
296/// \param __b
297/// A 128-bit vector of [8 x i16] containing one of the source operands. The
298/// horizontal sums of the values are stored in the upper bits of the
299/// destination.
300/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
301/// sums of both operands.
302static __inline__ __m128i __DEFAULT_FN_ATTRS
303_mm_hadds_epi16(__m128i __a, __m128i __b)
304{
305 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
306}
307
308/// Horizontally adds, with saturation, the adjacent pairs of values contained
309/// in two packed 64-bit vectors of [4 x i16].
310///
311/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
312/// less than 0x8000 are saturated to 0x8000.
313///
314/// \headerfile <x86intrin.h>
315///
316/// This intrinsic corresponds to the \c PHADDSW instruction.
317///
318/// \param __a
319/// A 64-bit vector of [4 x i16] containing one of the source operands. The
320/// horizontal sums of the values are stored in the lower bits of the
321/// destination.
322/// \param __b
323/// A 64-bit vector of [4 x i16] containing one of the source operands. The
324/// horizontal sums of the values are stored in the upper bits of the
325/// destination.
326/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
327/// sums of both operands.
328static __inline__ __m64 __DEFAULT_FN_ATTRS
329_mm_hadds_pi16(__m64 __a, __m64 __b)
330{
331 return __trunc64(__builtin_ia32_phaddsw128(
332 (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
333}
334
335/// Horizontally subtracts the adjacent pairs of values contained in 2
336/// packed 128-bit vectors of [8 x i16].
337///
338/// \headerfile <x86intrin.h>
339///
340/// This intrinsic corresponds to the \c VPHSUBW instruction.
341///
342/// \param __a
343/// A 128-bit vector of [8 x i16] containing one of the source operands. The
344/// horizontal differences between the values are stored in the lower bits of
345/// the destination.
346/// \param __b
347/// A 128-bit vector of [8 x i16] containing one of the source operands. The
348/// horizontal differences between the values are stored in the upper bits of
349/// the destination.
350/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
351/// of both operands.
352static __inline__ __m128i __DEFAULT_FN_ATTRS
353_mm_hsub_epi16(__m128i __a, __m128i __b)
354{
355 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
356}
357
358/// Horizontally subtracts the adjacent pairs of values contained in 2
359/// packed 128-bit vectors of [4 x i32].
360///
361/// \headerfile <x86intrin.h>
362///
363/// This intrinsic corresponds to the \c VPHSUBD instruction.
364///
365/// \param __a
366/// A 128-bit vector of [4 x i32] containing one of the source operands. The
367/// horizontal differences between the values are stored in the lower bits of
368/// the destination.
369/// \param __b
370/// A 128-bit vector of [4 x i32] containing one of the source operands. The
371/// horizontal differences between the values are stored in the upper bits of
372/// the destination.
373/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
374/// of both operands.
375static __inline__ __m128i __DEFAULT_FN_ATTRS
376_mm_hsub_epi32(__m128i __a, __m128i __b)
377{
378 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
379}
380
381/// Horizontally subtracts the adjacent pairs of values contained in 2
382/// packed 64-bit vectors of [4 x i16].
383///
384/// \headerfile <x86intrin.h>
385///
386/// This intrinsic corresponds to the \c PHSUBW instruction.
387///
388/// \param __a
389/// A 64-bit vector of [4 x i16] containing one of the source operands. The
390/// horizontal differences between the values are stored in the lower bits of
391/// the destination.
392/// \param __b
393/// A 64-bit vector of [4 x i16] containing one of the source operands. The
394/// horizontal differences between the values are stored in the upper bits of
395/// the destination.
396/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
397/// of both operands.
398static __inline__ __m64 __DEFAULT_FN_ATTRS
399_mm_hsub_pi16(__m64 __a, __m64 __b)
400{
401 return __trunc64(__builtin_ia32_phsubw128(
402 (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
403}
404
405/// Horizontally subtracts the adjacent pairs of values contained in 2
406/// packed 64-bit vectors of [2 x i32].
407///
408/// \headerfile <x86intrin.h>
409///
410/// This intrinsic corresponds to the \c PHSUBD instruction.
411///
412/// \param __a
413/// A 64-bit vector of [2 x i32] containing one of the source operands. The
414/// horizontal differences between the values are stored in the lower bits of
415/// the destination.
416/// \param __b
417/// A 64-bit vector of [2 x i32] containing one of the source operands. The
418/// horizontal differences between the values are stored in the upper bits of
419/// the destination.
420/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
421/// of both operands.
422static __inline__ __m64 __DEFAULT_FN_ATTRS
423_mm_hsub_pi32(__m64 __a, __m64 __b)
424{
425 return __trunc64(__builtin_ia32_phsubd128(
426 (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
427}
428
429/// Horizontally subtracts, with saturation, the adjacent pairs of values
430/// contained in two packed 128-bit vectors of [8 x i16].
431///
432/// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
433/// Negative differences less than 0x8000 are saturated to 0x8000.
434///
435/// \headerfile <x86intrin.h>
436///
437/// This intrinsic corresponds to the \c VPHSUBSW instruction.
438///
439/// \param __a
440/// A 128-bit vector of [8 x i16] containing one of the source operands. The
441/// horizontal differences between the values are stored in the lower bits of
442/// the destination.
443/// \param __b
444/// A 128-bit vector of [8 x i16] containing one of the source operands. The
445/// horizontal differences between the values are stored in the upper bits of
446/// the destination.
447/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
448/// differences of both operands.
449static __inline__ __m128i __DEFAULT_FN_ATTRS
450_mm_hsubs_epi16(__m128i __a, __m128i __b)
451{
452 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
453}
454
455/// Horizontally subtracts, with saturation, the adjacent pairs of values
456/// contained in two packed 64-bit vectors of [4 x i16].
457///
458/// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
459/// Negative differences less than 0x8000 are saturated to 0x8000.
460///
461/// \headerfile <x86intrin.h>
462///
463/// This intrinsic corresponds to the \c PHSUBSW instruction.
464///
465/// \param __a
466/// A 64-bit vector of [4 x i16] containing one of the source operands. The
467/// horizontal differences between the values are stored in the lower bits of
468/// the destination.
469/// \param __b
470/// A 64-bit vector of [4 x i16] containing one of the source operands. The
471/// horizontal differences between the values are stored in the upper bits of
472/// the destination.
473/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
474/// differences of both operands.
475static __inline__ __m64 __DEFAULT_FN_ATTRS
476_mm_hsubs_pi16(__m64 __a, __m64 __b)
477{
478 return __trunc64(__builtin_ia32_phsubsw128(
479 (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
480}
481
482/// Multiplies corresponding pairs of packed 8-bit unsigned integer
483/// values contained in the first source operand and packed 8-bit signed
484/// integer values contained in the second source operand, adds pairs of
485/// contiguous products with signed saturation, and writes the 16-bit sums to
486/// the corresponding bits in the destination.
487///
488/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
489/// both operands are multiplied, and the sum of both results is written to
490/// bits [15:0] of the destination.
491///
492/// \headerfile <x86intrin.h>
493///
494/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
495///
496/// \param __a
497/// A 128-bit integer vector containing the first source operand.
498/// \param __b
499/// A 128-bit integer vector containing the second source operand.
500/// \returns A 128-bit integer vector containing the sums of products of both
501/// operands: \n
502/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
503/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
504/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
505/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
506/// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
507/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
508/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
509/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
510static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
511_mm_maddubs_epi16(__m128i __a, __m128i __b) {
512 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
513}
514
515/// Multiplies corresponding pairs of packed 8-bit unsigned integer
516/// values contained in the first source operand and packed 8-bit signed
517/// integer values contained in the second source operand, adds pairs of
518/// contiguous products with signed saturation, and writes the 16-bit sums to
519/// the corresponding bits in the destination.
520///
521/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
522/// both operands are multiplied, and the sum of both results is written to
523/// bits [15:0] of the destination.
524///
525/// \headerfile <x86intrin.h>
526///
527/// This intrinsic corresponds to the \c PMADDUBSW instruction.
528///
529/// \param __a
530/// A 64-bit integer vector containing the first source operand.
531/// \param __b
532/// A 64-bit integer vector containing the second source operand.
533/// \returns A 64-bit integer vector containing the sums of products of both
534/// operands: \n
535/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
536/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
537/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
538/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
539static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
540_mm_maddubs_pi16(__m64 __a, __m64 __b) {
541 return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__zext128(__a),
542 (__v16qi)__zext128(__b)));
543}
544
545/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
546/// products to the 18 most significant bits by right-shifting, rounds the
547/// truncated value by adding 1, and writes bits [16:1] to the destination.
548///
549/// \headerfile <x86intrin.h>
550///
551/// This intrinsic corresponds to the \c VPMULHRSW instruction.
552///
553/// \param __a
554/// A 128-bit vector of [8 x i16] containing one of the source operands.
555/// \param __b
556/// A 128-bit vector of [8 x i16] containing one of the source operands.
557/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
558/// products of both operands.
559static __inline__ __m128i __DEFAULT_FN_ATTRS
560_mm_mulhrs_epi16(__m128i __a, __m128i __b)
561{
562 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
563}
564
565/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
566/// products to the 18 most significant bits by right-shifting, rounds the
567/// truncated value by adding 1, and writes bits [16:1] to the destination.
568///
569/// \headerfile <x86intrin.h>
570///
571/// This intrinsic corresponds to the \c PMULHRSW instruction.
572///
573/// \param __a
574/// A 64-bit vector of [4 x i16] containing one of the source operands.
575/// \param __b
576/// A 64-bit vector of [4 x i16] containing one of the source operands.
577/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
578/// products of both operands.
579static __inline__ __m64 __DEFAULT_FN_ATTRS
581{
582 return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__anyext128(__a),
583 (__v8hi)__anyext128(__b)));
584}
585
586/// Copies the 8-bit integers from a 128-bit integer vector to the
587/// destination or clears 8-bit values in the destination, as specified by
588/// the second source operand.
589///
590/// \headerfile <x86intrin.h>
591///
592/// This intrinsic corresponds to the \c VPSHUFB instruction.
593///
594/// \param __a
595/// A 128-bit integer vector containing the values to be copied.
596/// \param __b
597/// A 128-bit integer vector containing control bytes corresponding to
598/// positions in the destination:
599/// Bit 7: \n
600/// 1: Clear the corresponding byte in the destination. \n
601/// 0: Copy the selected source byte to the corresponding byte in the
602/// destination. \n
603/// Bits [6:4] Reserved. \n
604/// Bits [3:0] select the source byte to be copied.
605/// \returns A 128-bit integer vector containing the copied or cleared values.
606static __inline__ __m128i __DEFAULT_FN_ATTRS
607_mm_shuffle_epi8(__m128i __a, __m128i __b)
608{
609 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
610}
611
612/// Copies the 8-bit integers from a 64-bit integer vector to the
613/// destination or clears 8-bit values in the destination, as specified by
614/// the second source operand.
615///
616/// \headerfile <x86intrin.h>
617///
618/// This intrinsic corresponds to the \c PSHUFB instruction.
619///
620/// \param __a
621/// A 64-bit integer vector containing the values to be copied.
622/// \param __b
623/// A 64-bit integer vector containing control bytes corresponding to
624/// positions in the destination:
625/// Bit 7: \n
626/// 1: Clear the corresponding byte in the destination. \n
627/// 0: Copy the selected source byte to the corresponding byte in the
628/// destination. \n
629/// Bits [2:0] select the source byte to be copied.
630/// \returns A 64-bit integer vector containing the copied or cleared values.
631static __inline__ __m64 __DEFAULT_FN_ATTRS
633{
634 return __trunc64(__builtin_ia32_pshufb128(
635 (__v16qi)__builtin_shufflevector(
636 (__v2si)(__a), __extension__ (__v2si){}, 0, 1, 0, 1),
637 (__v16qi)__anyext128(__b)));
638}
639
640/// For each 8-bit integer in the first source operand, perform one of
641/// the following actions as specified by the second source operand.
642///
643/// If the byte in the second source is negative, calculate the two's
644/// complement of the corresponding byte in the first source, and write that
645/// value to the destination. If the byte in the second source is positive,
646/// copy the corresponding byte from the first source to the destination. If
647/// the byte in the second source is zero, clear the corresponding byte in
648/// the destination.
649///
650/// \headerfile <x86intrin.h>
651///
652/// This intrinsic corresponds to the \c VPSIGNB instruction.
653///
654/// \param __a
655/// A 128-bit integer vector containing the values to be copied.
656/// \param __b
657/// A 128-bit integer vector containing control bytes corresponding to
658/// positions in the destination.
659/// \returns A 128-bit integer vector containing the resultant values.
660static __inline__ __m128i __DEFAULT_FN_ATTRS
661_mm_sign_epi8(__m128i __a, __m128i __b)
662{
663 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
664}
665
666/// For each 16-bit integer in the first source operand, perform one of
667/// the following actions as specified by the second source operand.
668///
669/// If the word in the second source is negative, calculate the two's
670/// complement of the corresponding word in the first source, and write that
671/// value to the destination. If the word in the second source is positive,
672/// copy the corresponding word from the first source to the destination. If
673/// the word in the second source is zero, clear the corresponding word in
674/// the destination.
675///
676/// \headerfile <x86intrin.h>
677///
678/// This intrinsic corresponds to the \c VPSIGNW instruction.
679///
680/// \param __a
681/// A 128-bit integer vector containing the values to be copied.
682/// \param __b
683/// A 128-bit integer vector containing control words corresponding to
684/// positions in the destination.
685/// \returns A 128-bit integer vector containing the resultant values.
686static __inline__ __m128i __DEFAULT_FN_ATTRS
687_mm_sign_epi16(__m128i __a, __m128i __b)
688{
689 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
690}
691
692/// For each 32-bit integer in the first source operand, perform one of
693/// the following actions as specified by the second source operand.
694///
695/// If the doubleword in the second source is negative, calculate the two's
696/// complement of the corresponding word in the first source, and write that
697/// value to the destination. If the doubleword in the second source is
698/// positive, copy the corresponding word from the first source to the
699/// destination. If the doubleword in the second source is zero, clear the
700/// corresponding word in the destination.
701///
702/// \headerfile <x86intrin.h>
703///
704/// This intrinsic corresponds to the \c VPSIGND instruction.
705///
706/// \param __a
707/// A 128-bit integer vector containing the values to be copied.
708/// \param __b
709/// A 128-bit integer vector containing control doublewords corresponding to
710/// positions in the destination.
711/// \returns A 128-bit integer vector containing the resultant values.
712static __inline__ __m128i __DEFAULT_FN_ATTRS
713_mm_sign_epi32(__m128i __a, __m128i __b)
714{
715 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
716}
717
718/// For each 8-bit integer in the first source operand, perform one of
719/// the following actions as specified by the second source operand.
720///
721/// If the byte in the second source is negative, calculate the two's
722/// complement of the corresponding byte in the first source, and write that
723/// value to the destination. If the byte in the second source is positive,
724/// copy the corresponding byte from the first source to the destination. If
725/// the byte in the second source is zero, clear the corresponding byte in
726/// the destination.
727///
728/// \headerfile <x86intrin.h>
729///
730/// This intrinsic corresponds to the \c PSIGNB instruction.
731///
732/// \param __a
733/// A 64-bit integer vector containing the values to be copied.
734/// \param __b
735/// A 64-bit integer vector containing control bytes corresponding to
736/// positions in the destination.
737/// \returns A 64-bit integer vector containing the resultant values.
738static __inline__ __m64 __DEFAULT_FN_ATTRS
739_mm_sign_pi8(__m64 __a, __m64 __b)
740{
741 return __trunc64(__builtin_ia32_psignb128((__v16qi)__anyext128(__a),
742 (__v16qi)__anyext128(__b)));
743}
744
745/// For each 16-bit integer in the first source operand, perform one of
746/// the following actions as specified by the second source operand.
747///
748/// If the word in the second source is negative, calculate the two's
749/// complement of the corresponding word in the first source, and write that
750/// value to the destination. If the word in the second source is positive,
751/// copy the corresponding word from the first source to the destination. If
752/// the word in the second source is zero, clear the corresponding word in
753/// the destination.
754///
755/// \headerfile <x86intrin.h>
756///
757/// This intrinsic corresponds to the \c PSIGNW instruction.
758///
759/// \param __a
760/// A 64-bit integer vector containing the values to be copied.
761/// \param __b
762/// A 64-bit integer vector containing control words corresponding to
763/// positions in the destination.
764/// \returns A 64-bit integer vector containing the resultant values.
765static __inline__ __m64 __DEFAULT_FN_ATTRS
766_mm_sign_pi16(__m64 __a, __m64 __b)
767{
768 return __trunc64(__builtin_ia32_psignw128((__v8hi)__anyext128(__a),
769 (__v8hi)__anyext128(__b)));
770}
771
772/// For each 32-bit integer in the first source operand, perform one of
773/// the following actions as specified by the second source operand.
774///
775/// If the doubleword in the second source is negative, calculate the two's
776/// complement of the corresponding doubleword in the first source, and
777/// write that value to the destination. If the doubleword in the second
778/// source is positive, copy the corresponding doubleword from the first
779/// source to the destination. If the doubleword in the second source is
780/// zero, clear the corresponding doubleword in the destination.
781///
782/// \headerfile <x86intrin.h>
783///
784/// This intrinsic corresponds to the \c PSIGND instruction.
785///
786/// \param __a
787/// A 64-bit integer vector containing the values to be copied.
788/// \param __b
789/// A 64-bit integer vector containing two control doublewords corresponding
790/// to positions in the destination.
791/// \returns A 64-bit integer vector containing the resultant values.
792static __inline__ __m64 __DEFAULT_FN_ATTRS
793_mm_sign_pi32(__m64 __a, __m64 __b)
794{
795 return __trunc64(__builtin_ia32_psignd128((__v4si)__anyext128(__a),
796 (__v4si)__anyext128(__b)));
797}
798
799#undef __anyext128
800#undef __zext128
801#undef __trunc64
802#undef __DEFAULT_FN_ATTRS
803#undef __DEFAULT_FN_ATTRS_CONSTEXPR
804
805#endif /* __TMMINTRIN_H */
#define __DEFAULT_FN_ATTRS
static __inline__ vector float vector float __b
Definition altivec.h:578
#define __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ void int __a
Definition emmintrin.h:4077
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi16(__m128i __a, __m128i __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition tmmintrin.h:687
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhrs_pi16(__m64 __a, __m64 __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition tmmintrin.h:580
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi8(__m64 __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition tmmintrin.h:51
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi32(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32].
Definition tmmintrin.h:229
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition tmmintrin.h:101
#define __anyext128(x)
Definition tmmintrin.h:29
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition tmmintrin.h:511
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi32(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32]...
Definition tmmintrin.h:423
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsubs_pi16(__m64 __a, __m64 __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 64-bit ...
Definition tmmintrin.h:476
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi32(__m64 __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition tmmintrin.h:117
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi32(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32...
Definition tmmintrin.h:376
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi32(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32].
Definition tmmintrin.h:276
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition tmmintrin.h:134
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi16(__m64 __a, __m64 __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition tmmintrin.h:766
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition tmmintrin.h:560
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi16(__m64 __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition tmmintrin.h:84
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi8(__m64 __a, __m64 __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition tmmintrin.h:739
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_maddubs_pi16(__m64 __a, __m64 __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition tmmintrin.h:540
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi32(__m64 __a, __m64 __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition tmmintrin.h:793
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi8(__m128i __a, __m128i __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition tmmintrin.h:661
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
Definition tmmintrin.h:353
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubs_epi16(__m128i __a, __m128i __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 128-bit...
Definition tmmintrin.h:450
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
Definition tmmintrin.h:252
#define __trunc64(x)
Definition tmmintrin.h:24
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition tmmintrin.h:399
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadds_epi16(__m128i __a, __m128i __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 128-bit vect...
Definition tmmintrin.h:303
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi32(__m128i __a, __m128i __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition tmmintrin.h:713
#define __zext128(x)
Definition tmmintrin.h:26
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
Definition tmmintrin.h:206
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
Definition tmmintrin.h:607
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadds_pi16(__m64 __a, __m64 __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 64-bit vecto...
Definition tmmintrin.h:329
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_shuffle_pi8(__m64 __a, __m64 __b)
Copies the 8-bit integers from a 64-bit integer vector to the destination or clears 8-bit values in t...
Definition tmmintrin.h:632
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition tmmintrin.h:68