clang 19.0.0git
tmmintrin.h
Go to the documentation of this file.
1/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __TMMINTRIN_H
11#define __TMMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17#include <pmmintrin.h>
18
19/* Define the default attributes for the functions in this file. */
20#define __DEFAULT_FN_ATTRS \
21 __attribute__((__always_inline__, __nodebug__, \
22 __target__("ssse3,no-evex512"), __min_vector_width__(64)))
23#define __DEFAULT_FN_ATTRS_MMX \
24 __attribute__((__always_inline__, __nodebug__, \
25 __target__("mmx,ssse3,no-evex512"), \
26 __min_vector_width__(64)))
27
28/// Computes the absolute value of each of the packed 8-bit signed
29/// integers in the source operand and stores the 8-bit unsigned integer
30/// results in the destination.
31///
32/// \headerfile <x86intrin.h>
33///
34/// This intrinsic corresponds to the \c PABSB instruction.
35///
36/// \param __a
37/// A 64-bit vector of [8 x i8].
38/// \returns A 64-bit integer vector containing the absolute values of the
39/// elements in the operand.
40static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
42{
43 return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
44}
45
46/// Computes the absolute value of each of the packed 8-bit signed
47/// integers in the source operand and stores the 8-bit unsigned integer
48/// results in the destination.
49///
50/// \headerfile <x86intrin.h>
51///
52/// This intrinsic corresponds to the \c VPABSB instruction.
53///
54/// \param __a
55/// A 128-bit vector of [16 x i8].
56/// \returns A 128-bit integer vector containing the absolute values of the
57/// elements in the operand.
58static __inline__ __m128i __DEFAULT_FN_ATTRS
60{
61 return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
62}
63
64/// Computes the absolute value of each of the packed 16-bit signed
65/// integers in the source operand and stores the 16-bit unsigned integer
66/// results in the destination.
67///
68/// \headerfile <x86intrin.h>
69///
70/// This intrinsic corresponds to the \c PABSW instruction.
71///
72/// \param __a
73/// A 64-bit vector of [4 x i16].
74/// \returns A 64-bit integer vector containing the absolute values of the
75/// elements in the operand.
76static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
78{
79 return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
80}
81
82/// Computes the absolute value of each of the packed 16-bit signed
83/// integers in the source operand and stores the 16-bit unsigned integer
84/// results in the destination.
85///
86/// \headerfile <x86intrin.h>
87///
88/// This intrinsic corresponds to the \c VPABSW instruction.
89///
90/// \param __a
91/// A 128-bit vector of [8 x i16].
92/// \returns A 128-bit integer vector containing the absolute values of the
93/// elements in the operand.
94static __inline__ __m128i __DEFAULT_FN_ATTRS
96{
97 return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
98}
99
100/// Computes the absolute value of each of the packed 32-bit signed
101/// integers in the source operand and stores the 32-bit unsigned integer
102/// results in the destination.
103///
104/// \headerfile <x86intrin.h>
105///
106/// This intrinsic corresponds to the \c PABSD instruction.
107///
108/// \param __a
109/// A 64-bit vector of [2 x i32].
110/// \returns A 64-bit integer vector containing the absolute values of the
111/// elements in the operand.
112static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
114{
115 return (__m64)__builtin_ia32_pabsd((__v2si)__a);
116}
117
118/// Computes the absolute value of each of the packed 32-bit signed
119/// integers in the source operand and stores the 32-bit unsigned integer
120/// results in the destination.
121///
122/// \headerfile <x86intrin.h>
123///
124/// This intrinsic corresponds to the \c VPABSD instruction.
125///
126/// \param __a
127/// A 128-bit vector of [4 x i32].
128/// \returns A 128-bit integer vector containing the absolute values of the
129/// elements in the operand.
130static __inline__ __m128i __DEFAULT_FN_ATTRS
132{
133 return (__m128i)__builtin_elementwise_abs((__v4si)__a);
134}
135
136/// Concatenates the two 128-bit integer vector operands, and
137/// right-shifts the result by the number of bytes specified in the immediate
138/// operand.
139///
140/// \headerfile <x86intrin.h>
141///
142/// \code
143/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
144/// \endcode
145///
146/// This intrinsic corresponds to the \c PALIGNR instruction.
147///
148/// \param a
149/// A 128-bit vector of [16 x i8] containing one of the source operands.
150/// \param b
151/// A 128-bit vector of [16 x i8] containing one of the source operands.
152/// \param n
153/// An immediate operand specifying how many bytes to right-shift the result.
154/// \returns A 128-bit integer vector containing the concatenated right-shifted
155/// value.
156#define _mm_alignr_epi8(a, b, n) \
157 ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
158 (__v16qi)(__m128i)(b), (n)))
159
160/// Concatenates the two 64-bit integer vector operands, and right-shifts
161/// the result by the number of bytes specified in the immediate operand.
162///
163/// \headerfile <x86intrin.h>
164///
165/// \code
166/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
167/// \endcode
168///
169/// This intrinsic corresponds to the \c PALIGNR instruction.
170///
171/// \param a
172/// A 64-bit vector of [8 x i8] containing one of the source operands.
173/// \param b
174/// A 64-bit vector of [8 x i8] containing one of the source operands.
175/// \param n
176/// An immediate operand specifying how many bytes to right-shift the result.
177/// \returns A 64-bit integer vector containing the concatenated right-shifted
178/// value.
179#define _mm_alignr_pi8(a, b, n) \
180 ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
181
182/// Horizontally adds the adjacent pairs of values contained in 2 packed
183/// 128-bit vectors of [8 x i16].
184///
185/// \headerfile <x86intrin.h>
186///
187/// This intrinsic corresponds to the \c VPHADDW instruction.
188///
189/// \param __a
190/// A 128-bit vector of [8 x i16] containing one of the source operands. The
191/// horizontal sums of the values are stored in the lower bits of the
192/// destination.
193/// \param __b
194/// A 128-bit vector of [8 x i16] containing one of the source operands. The
195/// horizontal sums of the values are stored in the upper bits of the
196/// destination.
197/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
198/// both operands.
199static __inline__ __m128i __DEFAULT_FN_ATTRS
200_mm_hadd_epi16(__m128i __a, __m128i __b)
201{
202 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
203}
204
205/// Horizontally adds the adjacent pairs of values contained in 2 packed
206/// 128-bit vectors of [4 x i32].
207///
208/// \headerfile <x86intrin.h>
209///
210/// This intrinsic corresponds to the \c VPHADDD instruction.
211///
212/// \param __a
213/// A 128-bit vector of [4 x i32] containing one of the source operands. The
214/// horizontal sums of the values are stored in the lower bits of the
215/// destination.
216/// \param __b
217/// A 128-bit vector of [4 x i32] containing one of the source operands. The
218/// horizontal sums of the values are stored in the upper bits of the
219/// destination.
220/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
221/// both operands.
222static __inline__ __m128i __DEFAULT_FN_ATTRS
223_mm_hadd_epi32(__m128i __a, __m128i __b)
224{
225 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
226}
227
228/// Horizontally adds the adjacent pairs of values contained in 2 packed
229/// 64-bit vectors of [4 x i16].
230///
231/// \headerfile <x86intrin.h>
232///
233/// This intrinsic corresponds to the \c PHADDW instruction.
234///
235/// \param __a
236/// A 64-bit vector of [4 x i16] containing one of the source operands. The
237/// horizontal sums of the values are stored in the lower bits of the
238/// destination.
239/// \param __b
240/// A 64-bit vector of [4 x i16] containing one of the source operands. The
241/// horizontal sums of the values are stored in the upper bits of the
242/// destination.
243/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
244/// operands.
245static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
246_mm_hadd_pi16(__m64 __a, __m64 __b)
247{
248 return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
249}
250
251/// Horizontally adds the adjacent pairs of values contained in 2 packed
252/// 64-bit vectors of [2 x i32].
253///
254/// \headerfile <x86intrin.h>
255///
256/// This intrinsic corresponds to the \c PHADDD instruction.
257///
258/// \param __a
259/// A 64-bit vector of [2 x i32] containing one of the source operands. The
260/// horizontal sums of the values are stored in the lower bits of the
261/// destination.
262/// \param __b
263/// A 64-bit vector of [2 x i32] containing one of the source operands. The
264/// horizontal sums of the values are stored in the upper bits of the
265/// destination.
266/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
267/// operands.
268static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
269_mm_hadd_pi32(__m64 __a, __m64 __b)
270{
271 return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
272}
273
274/// Horizontally adds, with saturation, the adjacent pairs of values contained
275/// in two packed 128-bit vectors of [8 x i16].
276///
277/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
278/// less than 0x8000 are saturated to 0x8000.
279///
280/// \headerfile <x86intrin.h>
281///
282/// This intrinsic corresponds to the \c VPHADDSW instruction.
283///
284/// \param __a
285/// A 128-bit vector of [8 x i16] containing one of the source operands. The
286/// horizontal sums of the values are stored in the lower bits of the
287/// destination.
288/// \param __b
289/// A 128-bit vector of [8 x i16] containing one of the source operands. The
290/// horizontal sums of the values are stored in the upper bits of the
291/// destination.
292/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
293/// sums of both operands.
294static __inline__ __m128i __DEFAULT_FN_ATTRS
295_mm_hadds_epi16(__m128i __a, __m128i __b)
296{
297 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
298}
299
300/// Horizontally adds, with saturation, the adjacent pairs of values contained
301/// in two packed 64-bit vectors of [4 x i16].
302///
303/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
304/// less than 0x8000 are saturated to 0x8000.
305///
306/// \headerfile <x86intrin.h>
307///
308/// This intrinsic corresponds to the \c PHADDSW instruction.
309///
310/// \param __a
311/// A 64-bit vector of [4 x i16] containing one of the source operands. The
312/// horizontal sums of the values are stored in the lower bits of the
313/// destination.
314/// \param __b
315/// A 64-bit vector of [4 x i16] containing one of the source operands. The
316/// horizontal sums of the values are stored in the upper bits of the
317/// destination.
318/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
319/// sums of both operands.
320static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
321_mm_hadds_pi16(__m64 __a, __m64 __b)
322{
323 return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
324}
325
326/// Horizontally subtracts the adjacent pairs of values contained in 2
327/// packed 128-bit vectors of [8 x i16].
328///
329/// \headerfile <x86intrin.h>
330///
331/// This intrinsic corresponds to the \c VPHSUBW instruction.
332///
333/// \param __a
334/// A 128-bit vector of [8 x i16] containing one of the source operands. The
335/// horizontal differences between the values are stored in the lower bits of
336/// the destination.
337/// \param __b
338/// A 128-bit vector of [8 x i16] containing one of the source operands. The
339/// horizontal differences between the values are stored in the upper bits of
340/// the destination.
341/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
342/// of both operands.
343static __inline__ __m128i __DEFAULT_FN_ATTRS
344_mm_hsub_epi16(__m128i __a, __m128i __b)
345{
346 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
347}
348
349/// Horizontally subtracts the adjacent pairs of values contained in 2
350/// packed 128-bit vectors of [4 x i32].
351///
352/// \headerfile <x86intrin.h>
353///
354/// This intrinsic corresponds to the \c VPHSUBD instruction.
355///
356/// \param __a
357/// A 128-bit vector of [4 x i32] containing one of the source operands. The
358/// horizontal differences between the values are stored in the lower bits of
359/// the destination.
360/// \param __b
361/// A 128-bit vector of [4 x i32] containing one of the source operands. The
362/// horizontal differences between the values are stored in the upper bits of
363/// the destination.
364/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
365/// of both operands.
366static __inline__ __m128i __DEFAULT_FN_ATTRS
367_mm_hsub_epi32(__m128i __a, __m128i __b)
368{
369 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
370}
371
372/// Horizontally subtracts the adjacent pairs of values contained in 2
373/// packed 64-bit vectors of [4 x i16].
374///
375/// \headerfile <x86intrin.h>
376///
377/// This intrinsic corresponds to the \c PHSUBW instruction.
378///
379/// \param __a
380/// A 64-bit vector of [4 x i16] containing one of the source operands. The
381/// horizontal differences between the values are stored in the lower bits of
382/// the destination.
383/// \param __b
384/// A 64-bit vector of [4 x i16] containing one of the source operands. The
385/// horizontal differences between the values are stored in the upper bits of
386/// the destination.
387/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
388/// of both operands.
389static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
390_mm_hsub_pi16(__m64 __a, __m64 __b)
391{
392 return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
393}
394
395/// Horizontally subtracts the adjacent pairs of values contained in 2
396/// packed 64-bit vectors of [2 x i32].
397///
398/// \headerfile <x86intrin.h>
399///
400/// This intrinsic corresponds to the \c PHSUBD instruction.
401///
402/// \param __a
403/// A 64-bit vector of [2 x i32] containing one of the source operands. The
404/// horizontal differences between the values are stored in the lower bits of
405/// the destination.
406/// \param __b
407/// A 64-bit vector of [2 x i32] containing one of the source operands. The
408/// horizontal differences between the values are stored in the upper bits of
409/// the destination.
410/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
411/// of both operands.
412static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
413_mm_hsub_pi32(__m64 __a, __m64 __b)
414{
415 return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
416}
417
418/// Horizontally subtracts, with saturation, the adjacent pairs of values
419/// contained in two packed 128-bit vectors of [8 x i16].
420///
421/// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
422/// Negative differences less than 0x8000 are saturated to 0x8000.
423///
424/// \headerfile <x86intrin.h>
425///
426/// This intrinsic corresponds to the \c VPHSUBSW instruction.
427///
428/// \param __a
429/// A 128-bit vector of [8 x i16] containing one of the source operands. The
430/// horizontal differences between the values are stored in the lower bits of
431/// the destination.
432/// \param __b
433/// A 128-bit vector of [8 x i16] containing one of the source operands. The
434/// horizontal differences between the values are stored in the upper bits of
435/// the destination.
436/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
437/// differences of both operands.
438static __inline__ __m128i __DEFAULT_FN_ATTRS
439_mm_hsubs_epi16(__m128i __a, __m128i __b)
440{
441 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
442}
443
444/// Horizontally subtracts, with saturation, the adjacent pairs of values
445/// contained in two packed 64-bit vectors of [4 x i16].
446///
447/// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
448/// Negative differences less than 0x8000 are saturated to 0x8000.
449///
450/// \headerfile <x86intrin.h>
451///
452/// This intrinsic corresponds to the \c PHSUBSW instruction.
453///
454/// \param __a
455/// A 64-bit vector of [4 x i16] containing one of the source operands. The
456/// horizontal differences between the values are stored in the lower bits of
457/// the destination.
458/// \param __b
459/// A 64-bit vector of [4 x i16] containing one of the source operands. The
460/// horizontal differences between the values are stored in the upper bits of
461/// the destination.
462/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
463/// differences of both operands.
464static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
465_mm_hsubs_pi16(__m64 __a, __m64 __b)
466{
467 return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
468}
469
470/// Multiplies corresponding pairs of packed 8-bit unsigned integer
471/// values contained in the first source operand and packed 8-bit signed
472/// integer values contained in the second source operand, adds pairs of
473/// contiguous products with signed saturation, and writes the 16-bit sums to
474/// the corresponding bits in the destination.
475///
476/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
477/// both operands are multiplied, and the sum of both results is written to
478/// bits [15:0] of the destination.
479///
480/// \headerfile <x86intrin.h>
481///
482/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
483///
484/// \param __a
485/// A 128-bit integer vector containing the first source operand.
486/// \param __b
487/// A 128-bit integer vector containing the second source operand.
488/// \returns A 128-bit integer vector containing the sums of products of both
489/// operands: \n
490/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
491/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
492/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
493/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
494/// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
495/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
496/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
497/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
498static __inline__ __m128i __DEFAULT_FN_ATTRS
499_mm_maddubs_epi16(__m128i __a, __m128i __b)
500{
501 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
502}
503
504/// Multiplies corresponding pairs of packed 8-bit unsigned integer
505/// values contained in the first source operand and packed 8-bit signed
506/// integer values contained in the second source operand, adds pairs of
507/// contiguous products with signed saturation, and writes the 16-bit sums to
508/// the corresponding bits in the destination.
509///
510/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
511/// both operands are multiplied, and the sum of both results is written to
512/// bits [15:0] of the destination.
513///
514/// \headerfile <x86intrin.h>
515///
516/// This intrinsic corresponds to the \c PMADDUBSW instruction.
517///
518/// \param __a
519/// A 64-bit integer vector containing the first source operand.
520/// \param __b
521/// A 64-bit integer vector containing the second source operand.
522/// \returns A 64-bit integer vector containing the sums of products of both
523/// operands: \n
524/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
525/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
526/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
527/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
528static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
530{
531 return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
532}
533
534/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
535/// products to the 18 most significant bits by right-shifting, rounds the
536/// truncated value by adding 1, and writes bits [16:1] to the destination.
537///
538/// \headerfile <x86intrin.h>
539///
540/// This intrinsic corresponds to the \c VPMULHRSW instruction.
541///
542/// \param __a
543/// A 128-bit vector of [8 x i16] containing one of the source operands.
544/// \param __b
545/// A 128-bit vector of [8 x i16] containing one of the source operands.
546/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
547/// products of both operands.
548static __inline__ __m128i __DEFAULT_FN_ATTRS
549_mm_mulhrs_epi16(__m128i __a, __m128i __b)
550{
551 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
552}
553
554/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
555/// products to the 18 most significant bits by right-shifting, rounds the
556/// truncated value by adding 1, and writes bits [16:1] to the destination.
557///
558/// \headerfile <x86intrin.h>
559///
560/// This intrinsic corresponds to the \c PMULHRSW instruction.
561///
562/// \param __a
563/// A 64-bit vector of [4 x i16] containing one of the source operands.
564/// \param __b
565/// A 64-bit vector of [4 x i16] containing one of the source operands.
566/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
567/// products of both operands.
568static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
570{
571 return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
572}
573
574/// Copies the 8-bit integers from a 128-bit integer vector to the
575/// destination or clears 8-bit values in the destination, as specified by
576/// the second source operand.
577///
578/// \headerfile <x86intrin.h>
579///
580/// This intrinsic corresponds to the \c VPSHUFB instruction.
581///
582/// \param __a
583/// A 128-bit integer vector containing the values to be copied.
584/// \param __b
585/// A 128-bit integer vector containing control bytes corresponding to
586/// positions in the destination:
587/// Bit 7: \n
588/// 1: Clear the corresponding byte in the destination. \n
589/// 0: Copy the selected source byte to the corresponding byte in the
590/// destination. \n
591/// Bits [6:4] Reserved. \n
592/// Bits [3:0] select the source byte to be copied.
593/// \returns A 128-bit integer vector containing the copied or cleared values.
594static __inline__ __m128i __DEFAULT_FN_ATTRS
595_mm_shuffle_epi8(__m128i __a, __m128i __b)
596{
597 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
598}
599
600/// Copies the 8-bit integers from a 64-bit integer vector to the
601/// destination or clears 8-bit values in the destination, as specified by
602/// the second source operand.
603///
604/// \headerfile <x86intrin.h>
605///
606/// This intrinsic corresponds to the \c PSHUFB instruction.
607///
608/// \param __a
609/// A 64-bit integer vector containing the values to be copied.
610/// \param __b
611/// A 64-bit integer vector containing control bytes corresponding to
612/// positions in the destination:
613/// Bit 7: \n
614/// 1: Clear the corresponding byte in the destination. \n
615/// 0: Copy the selected source byte to the corresponding byte in the
616/// destination. \n
617/// Bits [3:0] select the source byte to be copied.
618/// \returns A 64-bit integer vector containing the copied or cleared values.
619static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
621{
622 return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
623}
624
625/// For each 8-bit integer in the first source operand, perform one of
626/// the following actions as specified by the second source operand.
627///
628/// If the byte in the second source is negative, calculate the two's
629/// complement of the corresponding byte in the first source, and write that
630/// value to the destination. If the byte in the second source is positive,
631/// copy the corresponding byte from the first source to the destination. If
632/// the byte in the second source is zero, clear the corresponding byte in
633/// the destination.
634///
635/// \headerfile <x86intrin.h>
636///
637/// This intrinsic corresponds to the \c VPSIGNB instruction.
638///
639/// \param __a
640/// A 128-bit integer vector containing the values to be copied.
641/// \param __b
642/// A 128-bit integer vector containing control bytes corresponding to
643/// positions in the destination.
644/// \returns A 128-bit integer vector containing the resultant values.
645static __inline__ __m128i __DEFAULT_FN_ATTRS
646_mm_sign_epi8(__m128i __a, __m128i __b)
647{
648 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
649}
650
651/// For each 16-bit integer in the first source operand, perform one of
652/// the following actions as specified by the second source operand.
653///
654/// If the word in the second source is negative, calculate the two's
655/// complement of the corresponding word in the first source, and write that
656/// value to the destination. If the word in the second source is positive,
657/// copy the corresponding word from the first source to the destination. If
658/// the word in the second source is zero, clear the corresponding word in
659/// the destination.
660///
661/// \headerfile <x86intrin.h>
662///
663/// This intrinsic corresponds to the \c VPSIGNW instruction.
664///
665/// \param __a
666/// A 128-bit integer vector containing the values to be copied.
667/// \param __b
668/// A 128-bit integer vector containing control words corresponding to
669/// positions in the destination.
670/// \returns A 128-bit integer vector containing the resultant values.
671static __inline__ __m128i __DEFAULT_FN_ATTRS
672_mm_sign_epi16(__m128i __a, __m128i __b)
673{
674 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
675}
676
677/// For each 32-bit integer in the first source operand, perform one of
678/// the following actions as specified by the second source operand.
679///
680/// If the doubleword in the second source is negative, calculate the two's
681/// complement of the corresponding word in the first source, and write that
682/// value to the destination. If the doubleword in the second source is
683/// positive, copy the corresponding word from the first source to the
684/// destination. If the doubleword in the second source is zero, clear the
685/// corresponding word in the destination.
686///
687/// \headerfile <x86intrin.h>
688///
689/// This intrinsic corresponds to the \c VPSIGND instruction.
690///
691/// \param __a
692/// A 128-bit integer vector containing the values to be copied.
693/// \param __b
694/// A 128-bit integer vector containing control doublewords corresponding to
695/// positions in the destination.
696/// \returns A 128-bit integer vector containing the resultant values.
697static __inline__ __m128i __DEFAULT_FN_ATTRS
698_mm_sign_epi32(__m128i __a, __m128i __b)
699{
700 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
701}
702
703/// For each 8-bit integer in the first source operand, perform one of
704/// the following actions as specified by the second source operand.
705///
706/// If the byte in the second source is negative, calculate the two's
707/// complement of the corresponding byte in the first source, and write that
708/// value to the destination. If the byte in the second source is positive,
709/// copy the corresponding byte from the first source to the destination. If
710/// the byte in the second source is zero, clear the corresponding byte in
711/// the destination.
712///
713/// \headerfile <x86intrin.h>
714///
715/// This intrinsic corresponds to the \c PSIGNB instruction.
716///
717/// \param __a
718/// A 64-bit integer vector containing the values to be copied.
719/// \param __b
720/// A 64-bit integer vector containing control bytes corresponding to
721/// positions in the destination.
722/// \returns A 64-bit integer vector containing the resultant values.
723static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
724_mm_sign_pi8(__m64 __a, __m64 __b)
725{
726 return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
727}
728
729/// For each 16-bit integer in the first source operand, perform one of
730/// the following actions as specified by the second source operand.
731///
732/// If the word in the second source is negative, calculate the two's
733/// complement of the corresponding word in the first source, and write that
734/// value to the destination. If the word in the second source is positive,
735/// copy the corresponding word from the first source to the destination. If
736/// the word in the second source is zero, clear the corresponding word in
737/// the destination.
738///
739/// \headerfile <x86intrin.h>
740///
741/// This intrinsic corresponds to the \c PSIGNW instruction.
742///
743/// \param __a
744/// A 64-bit integer vector containing the values to be copied.
745/// \param __b
746/// A 64-bit integer vector containing control words corresponding to
747/// positions in the destination.
748/// \returns A 64-bit integer vector containing the resultant values.
749static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
750_mm_sign_pi16(__m64 __a, __m64 __b)
751{
752 return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
753}
754
755/// For each 32-bit integer in the first source operand, perform one of
756/// the following actions as specified by the second source operand.
757///
758/// If the doubleword in the second source is negative, calculate the two's
759/// complement of the corresponding doubleword in the first source, and
760/// write that value to the destination. If the doubleword in the second
761/// source is positive, copy the corresponding doubleword from the first
762/// source to the destination. If the doubleword in the second source is
763/// zero, clear the corresponding doubleword in the destination.
764///
765/// \headerfile <x86intrin.h>
766///
767/// This intrinsic corresponds to the \c PSIGND instruction.
768///
769/// \param __a
770/// A 64-bit integer vector containing the values to be copied.
771/// \param __b
772/// A 64-bit integer vector containing two control doublewords corresponding
773/// to positions in the destination.
774/// \returns A 64-bit integer vector containing the resultant values.
775static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
776_mm_sign_pi32(__m64 __a, __m64 __b)
777{
778 return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
779}
780
781#undef __DEFAULT_FN_ATTRS
782#undef __DEFAULT_FN_ATTRS_MMX
783
784#endif /* __TMMINTRIN_H */
static __inline__ vector float vector float __b
Definition: altivec.h:578
static __inline__ void int __a
Definition: emmintrin.h:4057
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi16(__m128i __a, __m128i __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:672
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi32(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32].
Definition: tmmintrin.h:223
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi16(__m64 __a, __m64 __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:750
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_shuffle_pi8(__m64 __a, __m64 __b)
Copies the 8-bit integers from a 64-bit integer vector to the destination or clears 8-bit values in t...
Definition: tmmintrin.h:620
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition: tmmintrin.h:390
#define __DEFAULT_FN_ATTRS
Definition: tmmintrin.h:20
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi32(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32...
Definition: tmmintrin.h:367
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:59
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:549
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_maddubs_pi16(__m64 __a, __m64 __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:529
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi32(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32].
Definition: tmmintrin.h:269
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:95
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:499
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi8(__m64 __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:41
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi8(__m128i __a, __m128i __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:646
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
Definition: tmmintrin.h:344
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubs_epi16(__m128i __a, __m128i __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 128-bit...
Definition: tmmintrin.h:439
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi16(__m64 __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:77
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadds_pi16(__m64 __a, __m64 __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 64-bit vecto...
Definition: tmmintrin.h:321
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi8(__m64 __a, __m64 __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:724
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi32(__m64 __a, __m64 __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:776
#define __DEFAULT_FN_ATTRS_MMX
Definition: tmmintrin.h:23
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mulhrs_pi16(__m64 __a, __m64 __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:569
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
Definition: tmmintrin.h:246
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsubs_pi16(__m64 __a, __m64 __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 64-bit ...
Definition: tmmintrin.h:465
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadds_epi16(__m128i __a, __m128i __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 128-bit vect...
Definition: tmmintrin.h:295
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi32(__m128i __a, __m128i __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:698
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
Definition: tmmintrin.h:200
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
Definition: tmmintrin.h:595
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi32(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32]...
Definition: tmmintrin.h:413
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:131
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi32(__m64 __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:113