clang 22.0.0git
avx512ifmavlintrin.h
Go to the documentation of this file.
1/*===------------- avx512ifmavlintrin.h - IFMA intrinsics ------------------===
2 *
3 *
4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *
8 *===-----------------------------------------------------------------------===
9 */
10#ifndef __IMMINTRIN_H
11#error \
12 "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
13#endif
14
15#ifndef __IFMAVLINTRIN_H
16#define __IFMAVLINTRIN_H
17
18/* Define the default attributes for the functions in this file. */
19#if defined(__cplusplus) && (__cplusplus >= 201103L)
20#define __DEFAULT_FN_ATTRS128 \
21 __attribute__((__always_inline__, __nodebug__, \
22 __target__("avx512ifma,avx512vl"), \
23 __min_vector_width__(128))) constexpr
24#define __DEFAULT_FN_ATTRS256 \
25 __attribute__((__always_inline__, __nodebug__, \
26 __target__("avx512ifma,avx512vl"), \
27 __min_vector_width__(256))) constexpr
28#else
29#define __DEFAULT_FN_ATTRS128 \
30 __attribute__((__always_inline__, __nodebug__, \
31 __target__("avx512ifma,avx512vl"), \
32 __min_vector_width__(128)))
33#define __DEFAULT_FN_ATTRS256 \
34 __attribute__((__always_inline__, __nodebug__, \
35 __target__("avx512ifma,avx512vl"), \
36 __min_vector_width__(256)))
37#endif
38
39#if !(defined(__AVXIFMA__) || defined(__AVX512IFMA__))
40#define _mm_madd52hi_epu64(X, Y, Z) \
41 ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \
42 (__v2di)(Z)))
43
44#define _mm256_madd52hi_epu64(X, Y, Z) \
45 ((__m256i)__builtin_ia32_vpmadd52huq256((__v4di)(X), (__v4di)(Y), \
46 (__v4di)(Z)))
47
48#define _mm_madd52lo_epu64(X, Y, Z) \
49 ((__m128i)__builtin_ia32_vpmadd52luq128((__v2di)(X), (__v2di)(Y), \
50 (__v2di)(Z)))
51
52#define _mm256_madd52lo_epu64(X, Y, Z) \
53 ((__m256i)__builtin_ia32_vpmadd52luq256((__v4di)(X), (__v4di)(Y), \
54 (__v4di)(Z)))
55#endif
56
57#if defined(__AVX512IFMA__)
58static __inline__ __m128i __DEFAULT_FN_ATTRS128
59_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
60 return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)__Y,
61 (__v2di)__Z);
62}
63
64static __inline__ __m256i __DEFAULT_FN_ATTRS256
65_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
66 return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y,
67 (__v4di)__Z);
68}
69
70static __inline__ __m128i __DEFAULT_FN_ATTRS128
71_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
72 return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y,
73 (__v2di)__Z);
74}
75
76static __inline__ __m256i __DEFAULT_FN_ATTRS256
77_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
78 return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y,
79 (__v4di)__Z);
80}
81#endif
82
83static __inline__ __m128i __DEFAULT_FN_ATTRS128
84_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
85 return (__m128i)__builtin_ia32_selectq_128(
86 __M, (__v2di)__builtin_ia32_vpmadd52huq128(__W, __X, __Y), (__v2di)__W);
87}
88
89static __inline__ __m128i __DEFAULT_FN_ATTRS128
90_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
91 return (__m128i)__builtin_ia32_selectq_128(
92 __M, (__v2di)__builtin_ia32_vpmadd52huq128(__X, __Y, __Z),
93 (__v2di)_mm_setzero_si128());
94}
95
97 __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
98 return (__m256i)__builtin_ia32_selectq_256(
99 __M, (__v4di)__builtin_ia32_vpmadd52huq256(__W, __X, __Y), (__v4di)__W);
100}
101
103 __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
104 return (__m256i)__builtin_ia32_selectq_256(
105 __M, (__v4di)__builtin_ia32_vpmadd52huq256(__X, __Y, __Z),
106 (__v4di)_mm256_setzero_si256());
107}
108
109static __inline__ __m128i __DEFAULT_FN_ATTRS128
110_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
111 return (__m128i)__builtin_ia32_selectq_128(
112 __M, (__v2di)__builtin_ia32_vpmadd52luq128(__W, __X, __Y), (__v2di)__W);
113}
114
115static __inline__ __m128i __DEFAULT_FN_ATTRS128
116_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
117 return (__m128i)__builtin_ia32_selectq_128(
118 __M, (__v2di)__builtin_ia32_vpmadd52luq128(__X, __Y, __Z),
119 (__v2di)_mm_setzero_si128());
120}
121
123 __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
124 return (__m256i)__builtin_ia32_selectq_256(
125 __M, (__v4di)__builtin_ia32_vpmadd52luq256(__W, __X, __Y), (__v4di)__W);
126}
127
129 __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
130 return (__m256i)__builtin_ia32_selectq_256(
131 __M, (__v4di)__builtin_ia32_vpmadd52luq256(__X, __Y, __Z),
132 (__v4di)_mm256_setzero_si256());
133}
134
135#undef __DEFAULT_FN_ATTRS128
136#undef __DEFAULT_FN_ATTRS256
137
138#endif
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
unsigned char __mmask8
#define _mm256_madd52lo_epu64(X, Y, Z)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
#define _mm_madd52hi_epu64(X, Y, Z)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
#define _mm_madd52lo_epu64(X, Y, Z)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
#define _mm256_madd52hi_epu64(X, Y, Z)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4314
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3878
__inline unsigned int unsigned int __Y
Definition bmi2intrin.h:19