clang 22.0.0git
tmmintrin.h
Go to the documentation of this file.
1/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __TMMINTRIN_H
11#define __TMMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17#include <pmmintrin.h>
18
19/* Define the default attributes for the functions in this file. */
20#define __DEFAULT_FN_ATTRS \
21 __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), \
22 __min_vector_width__(128)))
23
24#define __trunc64(x) \
25 (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
26#define __zext128(x) \
27 (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
28 1, 2, 3)
29
30#if defined(__cplusplus) && (__cplusplus >= 201103L)
31#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
32#else
33#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
34#endif
35
36/// Computes the absolute value of each of the packed 8-bit signed
37/// integers in the source operand and stores the 8-bit unsigned integer
38/// results in the destination.
39///
40/// \headerfile <x86intrin.h>
41///
42/// This intrinsic corresponds to the \c PABSB instruction.
43///
44/// \param __a
45/// A 64-bit vector of [8 x i8].
46/// \returns A 64-bit integer vector containing the absolute values of the
47/// elements in the operand.
48static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi8(__m64 __a) {
49 return (__m64)__builtin_elementwise_abs((__v8qs)__a);
50}
51
52/// Computes the absolute value of each of the packed 8-bit signed
53/// integers in the source operand and stores the 8-bit unsigned integer
54/// results in the destination.
55///
56/// \headerfile <x86intrin.h>
57///
58/// This intrinsic corresponds to the \c VPABSB instruction.
59///
60/// \param __a
61/// A 128-bit vector of [16 x i8].
62/// \returns A 128-bit integer vector containing the absolute values of the
63/// elements in the operand.
64static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
65_mm_abs_epi8(__m128i __a) {
66 return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
67}
68
69/// Computes the absolute value of each of the packed 16-bit signed
70/// integers in the source operand and stores the 16-bit unsigned integer
71/// results in the destination.
72///
73/// \headerfile <x86intrin.h>
74///
75/// This intrinsic corresponds to the \c PABSW instruction.
76///
77/// \param __a
78/// A 64-bit vector of [4 x i16].
79/// \returns A 64-bit integer vector containing the absolute values of the
80/// elements in the operand.
81static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi16(__m64 __a) {
82 return (__m64)__builtin_elementwise_abs((__v4hi)__a);
83}
84
85/// Computes the absolute value of each of the packed 16-bit signed
86/// integers in the source operand and stores the 16-bit unsigned integer
87/// results in the destination.
88///
89/// \headerfile <x86intrin.h>
90///
91/// This intrinsic corresponds to the \c VPABSW instruction.
92///
93/// \param __a
94/// A 128-bit vector of [8 x i16].
95/// \returns A 128-bit integer vector containing the absolute values of the
96/// elements in the operand.
97static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
99 return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
100}
101
102/// Computes the absolute value of each of the packed 32-bit signed
103/// integers in the source operand and stores the 32-bit unsigned integer
104/// results in the destination.
105///
106/// \headerfile <x86intrin.h>
107///
108/// This intrinsic corresponds to the \c PABSD instruction.
109///
110/// \param __a
111/// A 64-bit vector of [2 x i32].
112/// \returns A 64-bit integer vector containing the absolute values of the
113/// elements in the operand.
114static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi32(__m64 __a) {
115 return (__m64)__builtin_elementwise_abs((__v2si)__a);
116}
117
118/// Computes the absolute value of each of the packed 32-bit signed
119/// integers in the source operand and stores the 32-bit unsigned integer
120/// results in the destination.
121///
122/// \headerfile <x86intrin.h>
123///
124/// This intrinsic corresponds to the \c VPABSD instruction.
125///
126/// \param __a
127/// A 128-bit vector of [4 x i32].
128/// \returns A 128-bit integer vector containing the absolute values of the
129/// elements in the operand.
130static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
132 return (__m128i)__builtin_elementwise_abs((__v4si)__a);
133}
134
135/// Concatenates the two 128-bit integer vector operands, and
136/// right-shifts the result by the number of bytes specified in the immediate
137/// operand.
138///
139/// \headerfile <x86intrin.h>
140///
141/// \code
142/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
143/// \endcode
144///
145/// This intrinsic corresponds to the \c PALIGNR instruction.
146///
147/// \param a
148/// A 128-bit vector of [16 x i8] containing one of the source operands.
149/// \param b
150/// A 128-bit vector of [16 x i8] containing one of the source operands.
151/// \param n
152/// An immediate operand specifying how many bytes to right-shift the result.
153/// \returns A 128-bit integer vector containing the concatenated right-shifted
154/// value.
155#define _mm_alignr_epi8(a, b, n) \
156 ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
157 (__v16qi)(__m128i)(b), (n)))
158
159/// Concatenates the two 64-bit integer vector operands, and right-shifts
160/// the result by the number of bytes specified in the immediate operand.
161///
162/// \headerfile <x86intrin.h>
163///
164/// \code
165/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
166/// \endcode
167///
168/// This intrinsic corresponds to the \c PALIGNR instruction.
169///
170/// \param a
171/// A 64-bit vector of [8 x i8] containing one of the source operands.
172/// \param b
173/// A 64-bit vector of [8 x i8] containing one of the source operands.
174/// \param n
175/// An immediate operand specifying how many bytes to right-shift the result.
176/// \returns A 64-bit integer vector containing the concatenated right-shifted
177/// value.
178#define _mm_alignr_pi8(a, b, n) \
179 ((__m64)__builtin_shufflevector( \
180 (__v2di)__builtin_ia32_psrldqi128_byteshift( \
181 (__v16qi)__builtin_shufflevector((__v1di)(a), (__v1di)(b), 1, 0), \
182 (n)), \
183 __extension__(__v2di){}, 0))
184
185/// Horizontally adds the adjacent pairs of values contained in 2 packed
186/// 128-bit vectors of [8 x i16].
187///
188/// \headerfile <x86intrin.h>
189///
190/// This intrinsic corresponds to the \c VPHADDW instruction.
191///
192/// \param __a
193/// A 128-bit vector of [8 x i16] containing one of the source operands. The
194/// horizontal sums of the values are stored in the lower bits of the
195/// destination.
196/// \param __b
197/// A 128-bit vector of [8 x i16] containing one of the source operands. The
198/// horizontal sums of the values are stored in the upper bits of the
199/// destination.
200/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
201/// both operands.
202static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
203_mm_hadd_epi16(__m128i __a, __m128i __b) {
204 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
205}
206
207/// Horizontally adds the adjacent pairs of values contained in 2 packed
208/// 128-bit vectors of [4 x i32].
209///
210/// \headerfile <x86intrin.h>
211///
212/// This intrinsic corresponds to the \c VPHADDD instruction.
213///
214/// \param __a
215/// A 128-bit vector of [4 x i32] containing one of the source operands. The
216/// horizontal sums of the values are stored in the lower bits of the
217/// destination.
218/// \param __b
219/// A 128-bit vector of [4 x i32] containing one of the source operands. The
220/// horizontal sums of the values are stored in the upper bits of the
221/// destination.
222/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
223/// both operands.
224static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
225_mm_hadd_epi32(__m128i __a, __m128i __b) {
226 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
227}
228
229/// Horizontally adds the adjacent pairs of values contained in 2 packed
230/// 64-bit vectors of [4 x i16].
231///
232/// \headerfile <x86intrin.h>
233///
234/// This intrinsic corresponds to the \c PHADDW instruction.
235///
236/// \param __a
237/// A 64-bit vector of [4 x i16] containing one of the source operands. The
238/// horizontal sums of the values are stored in the lower bits of the
239/// destination.
240/// \param __b
241/// A 64-bit vector of [4 x i16] containing one of the source operands. The
242/// horizontal sums of the values are stored in the upper bits of the
243/// destination.
244/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
245/// operands.
246static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi16(__m64 __a,
247 __m64 __b) {
248 return __trunc64(__builtin_ia32_phaddw128(
249 (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
250}
251
252/// Horizontally adds the adjacent pairs of values contained in 2 packed
253/// 64-bit vectors of [2 x i32].
254///
255/// \headerfile <x86intrin.h>
256///
257/// This intrinsic corresponds to the \c PHADDD instruction.
258///
259/// \param __a
260/// A 64-bit vector of [2 x i32] containing one of the source operands. The
261/// horizontal sums of the values are stored in the lower bits of the
262/// destination.
263/// \param __b
264/// A 64-bit vector of [2 x i32] containing one of the source operands. The
265/// horizontal sums of the values are stored in the upper bits of the
266/// destination.
267/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
268/// operands.
269static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi32(__m64 __a,
270 __m64 __b) {
271 return __trunc64(__builtin_ia32_phaddd128(
272 (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
273}
274
275/// Horizontally adds, with saturation, the adjacent pairs of values contained
276/// in two packed 128-bit vectors of [8 x i16].
277///
278/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
279/// less than 0x8000 are saturated to 0x8000.
280///
281/// \headerfile <x86intrin.h>
282///
283/// This intrinsic corresponds to the \c VPHADDSW instruction.
284///
285/// \param __a
286/// A 128-bit vector of [8 x i16] containing one of the source operands. The
287/// horizontal sums of the values are stored in the lower bits of the
288/// destination.
289/// \param __b
290/// A 128-bit vector of [8 x i16] containing one of the source operands. The
291/// horizontal sums of the values are stored in the upper bits of the
292/// destination.
293/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
294/// sums of both operands.
295static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
296_mm_hadds_epi16(__m128i __a, __m128i __b) {
297 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
298}
299
300/// Horizontally adds, with saturation, the adjacent pairs of values contained
301/// in two packed 64-bit vectors of [4 x i16].
302///
303/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
304/// less than 0x8000 are saturated to 0x8000.
305///
306/// \headerfile <x86intrin.h>
307///
308/// This intrinsic corresponds to the \c PHADDSW instruction.
309///
310/// \param __a
311/// A 64-bit vector of [4 x i16] containing one of the source operands. The
312/// horizontal sums of the values are stored in the lower bits of the
313/// destination.
314/// \param __b
315/// A 64-bit vector of [4 x i16] containing one of the source operands. The
316/// horizontal sums of the values are stored in the upper bits of the
317/// destination.
318/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
319/// sums of both operands.
320static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadds_pi16(__m64 __a,
321 __m64 __b) {
322 return __trunc64(__builtin_ia32_phaddsw128(
323 (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
324}
325
326/// Horizontally subtracts the adjacent pairs of values contained in 2
327/// packed 128-bit vectors of [8 x i16].
328///
329/// \headerfile <x86intrin.h>
330///
331/// This intrinsic corresponds to the \c VPHSUBW instruction.
332///
333/// \param __a
334/// A 128-bit vector of [8 x i16] containing one of the source operands. The
335/// horizontal differences between the values are stored in the lower bits of
336/// the destination.
337/// \param __b
338/// A 128-bit vector of [8 x i16] containing one of the source operands. The
339/// horizontal differences between the values are stored in the upper bits of
340/// the destination.
341/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
342/// of both operands.
343static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
344_mm_hsub_epi16(__m128i __a, __m128i __b) {
345 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
346}
347
348/// Horizontally subtracts the adjacent pairs of values contained in 2
349/// packed 128-bit vectors of [4 x i32].
350///
351/// \headerfile <x86intrin.h>
352///
353/// This intrinsic corresponds to the \c VPHSUBD instruction.
354///
355/// \param __a
356/// A 128-bit vector of [4 x i32] containing one of the source operands. The
357/// horizontal differences between the values are stored in the lower bits of
358/// the destination.
359/// \param __b
360/// A 128-bit vector of [4 x i32] containing one of the source operands. The
361/// horizontal differences between the values are stored in the upper bits of
362/// the destination.
363/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
364/// of both operands.
365static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
366_mm_hsub_epi32(__m128i __a, __m128i __b) {
367 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
368}
369
370/// Horizontally subtracts the adjacent pairs of values contained in 2
371/// packed 64-bit vectors of [4 x i16].
372///
373/// \headerfile <x86intrin.h>
374///
375/// This intrinsic corresponds to the \c PHSUBW instruction.
376///
377/// \param __a
378/// A 64-bit vector of [4 x i16] containing one of the source operands. The
379/// horizontal differences between the values are stored in the lower bits of
380/// the destination.
381/// \param __b
382/// A 64-bit vector of [4 x i16] containing one of the source operands. The
383/// horizontal differences between the values are stored in the upper bits of
384/// the destination.
385/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
386/// of both operands.
387static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi16(__m64 __a,
388 __m64 __b) {
389 return __trunc64(__builtin_ia32_phsubw128(
390 (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
391}
392
393/// Horizontally subtracts the adjacent pairs of values contained in 2
394/// packed 64-bit vectors of [2 x i32].
395///
396/// \headerfile <x86intrin.h>
397///
398/// This intrinsic corresponds to the \c PHSUBD instruction.
399///
400/// \param __a
401/// A 64-bit vector of [2 x i32] containing one of the source operands. The
402/// horizontal differences between the values are stored in the lower bits of
403/// the destination.
404/// \param __b
405/// A 64-bit vector of [2 x i32] containing one of the source operands. The
406/// horizontal differences between the values are stored in the upper bits of
407/// the destination.
408/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
409/// of both operands.
410static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi32(__m64 __a,
411 __m64 __b) {
412 return __trunc64(__builtin_ia32_phsubd128(
413 (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
414}
415
416/// Horizontally subtracts, with saturation, the adjacent pairs of values
417/// contained in two packed 128-bit vectors of [8 x i16].
418///
419/// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
420/// Negative differences less than 0x8000 are saturated to 0x8000.
421///
422/// \headerfile <x86intrin.h>
423///
424/// This intrinsic corresponds to the \c VPHSUBSW instruction.
425///
426/// \param __a
427/// A 128-bit vector of [8 x i16] containing one of the source operands. The
428/// horizontal differences between the values are stored in the lower bits of
429/// the destination.
430/// \param __b
431/// A 128-bit vector of [8 x i16] containing one of the source operands. The
432/// horizontal differences between the values are stored in the upper bits of
433/// the destination.
434/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
435/// differences of both operands.
436static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
437_mm_hsubs_epi16(__m128i __a, __m128i __b) {
438 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
439}
440
441/// Horizontally subtracts, with saturation, the adjacent pairs of values
442/// contained in two packed 64-bit vectors of [4 x i16].
443///
444/// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
445/// Negative differences less than 0x8000 are saturated to 0x8000.
446///
447/// \headerfile <x86intrin.h>
448///
449/// This intrinsic corresponds to the \c PHSUBSW instruction.
450///
451/// \param __a
452/// A 64-bit vector of [4 x i16] containing one of the source operands. The
453/// horizontal differences between the values are stored in the lower bits of
454/// the destination.
455/// \param __b
456/// A 64-bit vector of [4 x i16] containing one of the source operands. The
457/// horizontal differences between the values are stored in the upper bits of
458/// the destination.
459/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
460/// differences of both operands.
461static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsubs_pi16(__m64 __a,
462 __m64 __b) {
463 return __trunc64(__builtin_ia32_phsubsw128(
464 (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
465}
466
467/// Multiplies corresponding pairs of packed 8-bit unsigned integer
468/// values contained in the first source operand and packed 8-bit signed
469/// integer values contained in the second source operand, adds pairs of
470/// contiguous products with signed saturation, and writes the 16-bit sums to
471/// the corresponding bits in the destination.
472///
473/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
474/// both operands are multiplied, and the sum of both results is written to
475/// bits [15:0] of the destination.
476///
477/// \headerfile <x86intrin.h>
478///
479/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
480///
481/// \param __a
482/// A 128-bit integer vector containing the first source operand.
483/// \param __b
484/// A 128-bit integer vector containing the second source operand.
485/// \returns A 128-bit integer vector containing the sums of products of both
486/// operands: \n
487/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
488/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
489/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
490/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
491/// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
492/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
493/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
494/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
495static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
496_mm_maddubs_epi16(__m128i __a, __m128i __b) {
497 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
498}
499
500/// Multiplies corresponding pairs of packed 8-bit unsigned integer
501/// values contained in the first source operand and packed 8-bit signed
502/// integer values contained in the second source operand, adds pairs of
503/// contiguous products with signed saturation, and writes the 16-bit sums to
504/// the corresponding bits in the destination.
505///
506/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
507/// both operands are multiplied, and the sum of both results is written to
508/// bits [15:0] of the destination.
509///
510/// \headerfile <x86intrin.h>
511///
512/// This intrinsic corresponds to the \c PMADDUBSW instruction.
513///
514/// \param __a
515/// A 64-bit integer vector containing the first source operand.
516/// \param __b
517/// A 64-bit integer vector containing the second source operand.
518/// \returns A 64-bit integer vector containing the sums of products of both
519/// operands: \n
520/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
521/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
522/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
523/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
524static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
525_mm_maddubs_pi16(__m64 __a, __m64 __b) {
526 return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__zext128(__a),
527 (__v16qi)__zext128(__b)));
528}
529
530/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
531/// products to the 18 most significant bits by right-shifting, rounds the
532/// truncated value by adding 1, and writes bits [16:1] to the destination.
533///
534/// \headerfile <x86intrin.h>
535///
536/// This intrinsic corresponds to the \c VPMULHRSW instruction.
537///
538/// \param __a
539/// A 128-bit vector of [8 x i16] containing one of the source operands.
540/// \param __b
541/// A 128-bit vector of [8 x i16] containing one of the source operands.
542/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
543/// products of both operands.
544static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
545_mm_mulhrs_epi16(__m128i __a, __m128i __b) {
546 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
547}
548
549/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
550/// products to the 18 most significant bits by right-shifting, rounds the
551/// truncated value by adding 1, and writes bits [16:1] to the destination.
552///
553/// \headerfile <x86intrin.h>
554///
555/// This intrinsic corresponds to the \c PMULHRSW instruction.
556///
557/// \param __a
558/// A 64-bit vector of [4 x i16] containing one of the source operands.
559/// \param __b
560/// A 64-bit vector of [4 x i16] containing one of the source operands.
561/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
562/// products of both operands.
563static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
564_mm_mulhrs_pi16(__m64 __a, __m64 __b) {
565 return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__zext128(__a),
566 (__v8hi)__zext128(__b)));
567}
568
569/// Copies the 8-bit integers from a 128-bit integer vector to the
570/// destination or clears 8-bit values in the destination, as specified by
571/// the second source operand.
572///
573/// \headerfile <x86intrin.h>
574///
575/// This intrinsic corresponds to the \c VPSHUFB instruction.
576///
577/// \param __a
578/// A 128-bit integer vector containing the values to be copied.
579/// \param __b
580/// A 128-bit integer vector containing control bytes corresponding to
581/// positions in the destination:
582/// Bit 7: \n
583/// 1: Clear the corresponding byte in the destination. \n
584/// 0: Copy the selected source byte to the corresponding byte in the
585/// destination. \n
586/// Bits [6:4] Reserved. \n
587/// Bits [3:0] select the source byte to be copied.
588/// \returns A 128-bit integer vector containing the copied or cleared values.
589static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
590_mm_shuffle_epi8(__m128i __a, __m128i __b) {
591 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
592}
593
594/// Copies the 8-bit integers from a 64-bit integer vector to the
595/// destination or clears 8-bit values in the destination, as specified by
596/// the second source operand.
597///
598/// \headerfile <x86intrin.h>
599///
600/// This intrinsic corresponds to the \c PSHUFB instruction.
601///
602/// \param __a
603/// A 64-bit integer vector containing the values to be copied.
604/// \param __b
605/// A 64-bit integer vector containing control bytes corresponding to
606/// positions in the destination:
607/// Bit 7: \n
608/// 1: Clear the corresponding byte in the destination. \n
609/// 0: Copy the selected source byte to the corresponding byte in the
610/// destination. \n
611/// Bits [2:0] select the source byte to be copied.
612/// \returns A 64-bit integer vector containing the copied or cleared values.
613static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
614_mm_shuffle_pi8(__m64 __a, __m64 __b) {
615 return __trunc64(__builtin_ia32_pshufb128(
616 (__v16qi)__builtin_shufflevector((__v2si)(__a), __extension__(__v2si){},
617 0, 1, 0, 1),
618 (__v16qi)__zext128(__b)));
619}
620
621/// For each 8-bit integer in the first source operand, perform one of
622/// the following actions as specified by the second source operand.
623///
624/// If the byte in the second source is negative, calculate the two's
625/// complement of the corresponding byte in the first source, and write that
626/// value to the destination. If the byte in the second source is positive,
627/// copy the corresponding byte from the first source to the destination. If
628/// the byte in the second source is zero, clear the corresponding byte in
629/// the destination.
630///
631/// \headerfile <x86intrin.h>
632///
633/// This intrinsic corresponds to the \c VPSIGNB instruction.
634///
635/// \param __a
636/// A 128-bit integer vector containing the values to be copied.
637/// \param __b
638/// A 128-bit integer vector containing control bytes corresponding to
639/// positions in the destination.
640/// \returns A 128-bit integer vector containing the resultant values.
641static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
642_mm_sign_epi8(__m128i __a, __m128i __b) {
643 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
644}
645
646/// For each 16-bit integer in the first source operand, perform one of
647/// the following actions as specified by the second source operand.
648///
649/// If the word in the second source is negative, calculate the two's
650/// complement of the corresponding word in the first source, and write that
651/// value to the destination. If the word in the second source is positive,
652/// copy the corresponding word from the first source to the destination. If
653/// the word in the second source is zero, clear the corresponding word in
654/// the destination.
655///
656/// \headerfile <x86intrin.h>
657///
658/// This intrinsic corresponds to the \c VPSIGNW instruction.
659///
660/// \param __a
661/// A 128-bit integer vector containing the values to be copied.
662/// \param __b
663/// A 128-bit integer vector containing control words corresponding to
664/// positions in the destination.
665/// \returns A 128-bit integer vector containing the resultant values.
666static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
667_mm_sign_epi16(__m128i __a, __m128i __b) {
668 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
669}
670
671/// For each 32-bit integer in the first source operand, perform one of
672/// the following actions as specified by the second source operand.
673///
674/// If the doubleword in the second source is negative, calculate the two's
675/// complement of the corresponding word in the first source, and write that
676/// value to the destination. If the doubleword in the second source is
677/// positive, copy the corresponding word from the first source to the
678/// destination. If the doubleword in the second source is zero, clear the
679/// corresponding word in the destination.
680///
681/// \headerfile <x86intrin.h>
682///
683/// This intrinsic corresponds to the \c VPSIGND instruction.
684///
685/// \param __a
686/// A 128-bit integer vector containing the values to be copied.
687/// \param __b
688/// A 128-bit integer vector containing control doublewords corresponding to
689/// positions in the destination.
690/// \returns A 128-bit integer vector containing the resultant values.
691static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
692_mm_sign_epi32(__m128i __a, __m128i __b) {
693 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
694}
695
696/// For each 8-bit integer in the first source operand, perform one of
697/// the following actions as specified by the second source operand.
698///
699/// If the byte in the second source is negative, calculate the two's
700/// complement of the corresponding byte in the first source, and write that
701/// value to the destination. If the byte in the second source is positive,
702/// copy the corresponding byte from the first source to the destination. If
703/// the byte in the second source is zero, clear the corresponding byte in
704/// the destination.
705///
706/// \headerfile <x86intrin.h>
707///
708/// This intrinsic corresponds to the \c PSIGNB instruction.
709///
710/// \param __a
711/// A 64-bit integer vector containing the values to be copied.
712/// \param __b
713/// A 64-bit integer vector containing control bytes corresponding to
714/// positions in the destination.
715/// \returns A 64-bit integer vector containing the resultant values.
716static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi8(__m64 __a,
717 __m64 __b) {
718 return __trunc64(__builtin_ia32_psignb128((__v16qi)__zext128(__a),
719 (__v16qi)__zext128(__b)));
720}
721
722/// For each 16-bit integer in the first source operand, perform one of
723/// the following actions as specified by the second source operand.
724///
725/// If the word in the second source is negative, calculate the two's
726/// complement of the corresponding word in the first source, and write that
727/// value to the destination. If the word in the second source is positive,
728/// copy the corresponding word from the first source to the destination. If
729/// the word in the second source is zero, clear the corresponding word in
730/// the destination.
731///
732/// \headerfile <x86intrin.h>
733///
734/// This intrinsic corresponds to the \c PSIGNW instruction.
735///
736/// \param __a
737/// A 64-bit integer vector containing the values to be copied.
738/// \param __b
739/// A 64-bit integer vector containing control words corresponding to
740/// positions in the destination.
741/// \returns A 64-bit integer vector containing the resultant values.
742static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi16(__m64 __a,
743 __m64 __b) {
744 return __trunc64(
745 __builtin_ia32_psignw128((__v8hi)__zext128(__a), (__v8hi)__zext128(__b)));
746}
747
748/// For each 32-bit integer in the first source operand, perform one of
749/// the following actions as specified by the second source operand.
750///
751/// If the doubleword in the second source is negative, calculate the two's
752/// complement of the corresponding doubleword in the first source, and
753/// write that value to the destination. If the doubleword in the second
754/// source is positive, copy the corresponding doubleword from the first
755/// source to the destination. If the doubleword in the second source is
756/// zero, clear the corresponding doubleword in the destination.
757///
758/// \headerfile <x86intrin.h>
759///
760/// This intrinsic corresponds to the \c PSIGND instruction.
761///
762/// \param __a
763/// A 64-bit integer vector containing the values to be copied.
764/// \param __b
765/// A 64-bit integer vector containing two control doublewords corresponding
766/// to positions in the destination.
767/// \returns A 64-bit integer vector containing the resultant values.
768static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi32(__m64 __a,
769 __m64 __b) {
770 return __trunc64(
771 __builtin_ia32_psignd128((__v4si)__zext128(__a), (__v4si)__zext128(__b)));
772}
773
774#undef __zext128
775#undef __trunc64
776#undef __DEFAULT_FN_ATTRS
777#undef __DEFAULT_FN_ATTRS_CONSTEXPR
778
779#endif /* __TMMINTRIN_H */
static __inline__ vector float vector float __b
Definition altivec.h:578
#define __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ void int __a
Definition emmintrin.h:4077
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi32(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32]...
Definition tmmintrin.h:410
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi8(__m64 __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition tmmintrin.h:48
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_epi8(__m128i __a, __m128i __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition tmmintrin.h:642
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition tmmintrin.h:98
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition tmmintrin.h:496
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_epi32(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32...
Definition tmmintrin.h:366
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsubs_pi16(__m64 __a, __m64 __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 64-bit ...
Definition tmmintrin.h:461
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi16(__m64 __a, __m64 __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition tmmintrin.h:742
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi32(__m64 __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition tmmintrin.h:114
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
Definition tmmintrin.h:246
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_epi32(__m128i __a, __m128i __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition tmmintrin.h:692
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition tmmintrin.h:131
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi16(__m64 __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition tmmintrin.h:81
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi32(__m64 __a, __m64 __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition tmmintrin.h:768
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_maddubs_pi16(__m64 __a, __m64 __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition tmmintrin.h:525
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
Definition tmmintrin.h:203
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_epi16(__m128i __a, __m128i __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition tmmintrin.h:667
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadds_epi16(__m128i __a, __m128i __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 128-bit vect...
Definition tmmintrin.h:296
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadds_pi16(__m64 __a, __m64 __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 64-bit vecto...
Definition tmmintrin.h:320
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_epi32(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32].
Definition tmmintrin.h:225
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition tmmintrin.h:545
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition tmmintrin.h:387
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_shuffle_pi8(__m64 __a, __m64 __b)
Copies the 8-bit integers from a 64-bit integer vector to the destination or clears 8-bit values in t...
Definition tmmintrin.h:614
#define __trunc64(x)
Definition tmmintrin.h:24
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi32(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32].
Definition tmmintrin.h:269
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhrs_pi16(__m64 __a, __m64 __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition tmmintrin.h:564
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
Definition tmmintrin.h:590
#define __zext128(x)
Definition tmmintrin.h:26
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
Definition tmmintrin.h:344
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsubs_epi16(__m128i __a, __m128i __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 128-bit...
Definition tmmintrin.h:437
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi8(__m64 __a, __m64 __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition tmmintrin.h:716
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition tmmintrin.h:65