clang 22.0.0git
avx512vlbf16intrin.h
Go to the documentation of this file.
1/*===--------- avx512vlbf16intrin.h - AVX512_BF16 intrinsics ---------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512vlbf16intrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifdef __SSE2__
14
15#ifndef __AVX512VLBF16INTRIN_H
16#define __AVX512VLBF16INTRIN_H
17
18#define __DEFAULT_FN_ATTRS128 \
19 __attribute__((__always_inline__, __nodebug__, \
20 __target__("avx512vl,avx512bf16"), \
21 __min_vector_width__(128)))
22#define __DEFAULT_FN_ATTRS256 \
23 __attribute__((__always_inline__, __nodebug__, \
24 __target__("avx512vl,avx512bf16"), \
25 __min_vector_width__(256)))
26
27#if defined(__cplusplus) && (__cplusplus >= 201103L)
28#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
29#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
30#else
31#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
32#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
33#endif
34
35/// Convert Two Packed Single Data to One Packed BF16 Data.
36///
37/// \headerfile <x86intrin.h>
38///
39/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
40///
41/// \param __A
42/// A 128-bit vector of [4 x float].
43/// \param __B
44/// A 128-bit vector of [4 x float].
45/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from
46/// conversion of __B, and higher 64 bits come from conversion of __A.
47static __inline__ __m128bh __DEFAULT_FN_ATTRS128
48_mm_cvtne2ps_pbh(__m128 __A, __m128 __B) {
49 return (__m128bh)__builtin_ia32_cvtne2ps2bf16_128((__v4sf) __A,
50 (__v4sf) __B);
51}
52
53/// Convert Two Packed Single Data to One Packed BF16 Data.
54///
55/// \headerfile <x86intrin.h>
56///
57/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
58///
59/// \param __A
60/// A 128-bit vector of [4 x float].
61/// \param __B
62/// A 128-bit vector of [4 x float].
63/// \param __W
64/// A 128-bit vector of [8 x bfloat].
65/// \param __U
66/// A 8-bit mask value specifying what is chosen for each element.
67/// A 1 means conversion of __A or __B. A 0 means element from __W.
68/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from
69/// conversion of __B, and higher 64 bits come from conversion of __A.
70static __inline__ __m128bh __DEFAULT_FN_ATTRS128
71_mm_mask_cvtne2ps_pbh(__m128bh __W, __mmask8 __U, __m128 __A, __m128 __B) {
72 return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U,
73 (__v8bf)_mm_cvtne2ps_pbh(__A, __B),
74 (__v8bf)__W);
75}
76
77/// Convert Two Packed Single Data to One Packed BF16 Data.
78///
79/// \headerfile <x86intrin.h>
80///
81/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
82///
83/// \param __A
84/// A 128-bit vector of [4 x float].
85/// \param __B
86/// A 128-bit vector of [4 x float].
87/// \param __U
88/// A 8-bit mask value specifying what is chosen for each element.
89/// A 1 means conversion of __A or __B. A 0 means element is zero.
90/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from
91/// conversion of __B, and higher 64 bits come from conversion of __A.
92static __inline__ __m128bh __DEFAULT_FN_ATTRS128
93_mm_maskz_cvtne2ps_pbh(__mmask8 __U, __m128 __A, __m128 __B) {
94 return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U,
95 (__v8bf)_mm_cvtne2ps_pbh(__A, __B),
96 (__v8bf)_mm_setzero_si128());
97}
98
99/// Convert Two Packed Single Data to One Packed BF16 Data.
100///
101/// \headerfile <x86intrin.h>
102///
103/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
104///
105/// \param __A
106/// A 256-bit vector of [8 x float].
107/// \param __B
108/// A 256-bit vector of [8 x float].
109/// \returns A 256-bit vector of [16 x bfloat] whose lower 128 bits come from
110/// conversion of __B, and higher 128 bits come from conversion of __A.
111static __inline__ __m256bh __DEFAULT_FN_ATTRS256
112_mm256_cvtne2ps_pbh(__m256 __A, __m256 __B) {
113 return (__m256bh)__builtin_ia32_cvtne2ps2bf16_256((__v8sf) __A,
114 (__v8sf) __B);
115}
116
117/// Convert Two Packed Single Data to One Packed BF16 Data.
118///
119/// \headerfile <x86intrin.h>
120///
121/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
122///
123/// \param __A
124/// A 256-bit vector of [8 x float].
125/// \param __B
126/// A 256-bit vector of [8 x float].
127/// \param __W
128/// A 256-bit vector of [16 x bfloat].
129/// \param __U
130/// A 16-bit mask value specifying what is chosen for each element.
131/// A 1 means conversion of __A or __B. A 0 means element from __W.
132/// \returns A 256-bit vector of [16 x bfloat] whose lower 128 bits come from
133/// conversion of __B, and higher 128 bits come from conversion of __A.
134static __inline__ __m256bh __DEFAULT_FN_ATTRS256
135_mm256_mask_cvtne2ps_pbh(__m256bh __W, __mmask16 __U, __m256 __A, __m256 __B) {
136 return (__m256bh)__builtin_ia32_selectpbf_256((__mmask16)__U,
137 (__v16bf)_mm256_cvtne2ps_pbh(__A, __B),
138 (__v16bf)__W);
139}
140
141/// Convert Two Packed Single Data to One Packed BF16 Data.
142///
143/// \headerfile <x86intrin.h>
144///
145/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
146///
147/// \param __A
148/// A 256-bit vector of [8 x float].
149/// \param __B
150/// A 256-bit vector of [8 x float].
151/// \param __U
152/// A 16-bit mask value specifying what is chosen for each element.
153/// A 1 means conversion of __A or __B. A 0 means element is zero.
154/// \returns A 256-bit vector of [16 x bfloat] whose lower 128 bits come from
155/// conversion of __B, and higher 128 bits come from conversion of __A.
156static __inline__ __m256bh __DEFAULT_FN_ATTRS256
157_mm256_maskz_cvtne2ps_pbh(__mmask16 __U, __m256 __A, __m256 __B) {
158 return (__m256bh)__builtin_ia32_selectpbf_256((__mmask16)__U,
159 (__v16bf)_mm256_cvtne2ps_pbh(__A, __B),
160 (__v16bf)_mm256_setzero_si256());
161}
162
163/// Convert Packed Single Data to Packed BF16 Data.
164///
165/// \headerfile <x86intrin.h>
166///
167/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
168///
169/// \param __A
170/// A 128-bit vector of [4 x float].
171/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from
172/// conversion of __A, and higher 64 bits are 0.
173#define _mm_cvtneps_pbh(A) \
174 ((__m128bh)__builtin_ia32_vcvtneps2bf16128((__v4sf)(A)))
175
176/// Convert Packed Single Data to Packed BF16 Data.
177///
178/// \headerfile <x86intrin.h>
179///
180/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
181///
182/// \param __A
183/// A 128-bit vector of [4 x float].
184/// \param __W
185/// A 128-bit vector of [8 x bfloat].
186/// \param __U
187/// A 4-bit mask value specifying what is chosen for each element.
188/// A 1 means conversion of __A. A 0 means element from __W.
189/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from
190/// conversion of __A, and higher 64 bits are 0.
191static __inline__ __m128bh __DEFAULT_FN_ATTRS128
192_mm_mask_cvtneps_pbh(__m128bh __W, __mmask8 __U, __m128 __A) {
193 return (__m128bh)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A,
194 (__v8bf)__W,
195 (__mmask8)__U);
196}
197
198/// Convert Packed Single Data to Packed BF16 Data.
199///
200/// \headerfile <x86intrin.h>
201///
202/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
203///
204/// \param __A
205/// A 128-bit vector of [4 x float].
206/// \param __U
207/// A 4-bit mask value specifying what is chosen for each element.
208/// A 1 means conversion of __A. A 0 means element is zero.
209/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from
210/// conversion of __A, and higher 64 bits are 0.
211static __inline__ __m128bh __DEFAULT_FN_ATTRS128
212_mm_maskz_cvtneps_pbh(__mmask8 __U, __m128 __A) {
213 return (__m128bh)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A,
214 (__v8bf)_mm_setzero_si128(),
215 (__mmask8)__U);
216}
217
218/// Convert Packed Single Data to Packed BF16 Data.
219///
220/// \headerfile <x86intrin.h>
221///
222/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
223///
224/// \param __A
225/// A 256-bit vector of [8 x float].
226/// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A.
227#define _mm256_cvtneps_pbh(A) \
228 ((__m128bh)__builtin_ia32_vcvtneps2bf16256((__v8sf)(A)))
229
230/// Convert Packed Single Data to Packed BF16 Data.
231///
232/// \headerfile <x86intrin.h>
233///
234/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
235///
236/// \param __A
237/// A 256-bit vector of [8 x float].
238/// \param __W
239/// A 256-bit vector of [8 x bfloat].
240/// \param __U
241/// A 8-bit mask value specifying what is chosen for each element.
242/// A 1 means conversion of __A. A 0 means element from __W.
243/// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A.
244static __inline__ __m128bh __DEFAULT_FN_ATTRS256
245_mm256_mask_cvtneps_pbh(__m128bh __W, __mmask8 __U, __m256 __A) {
246 return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A,
247 (__v8bf)__W,
248 (__mmask8)__U);
249}
250
251/// Convert Packed Single Data to Packed BF16 Data.
252///
253/// \headerfile <x86intrin.h>
254///
255/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
256///
257/// \param __A
258/// A 256-bit vector of [8 x float].
259/// \param __U
260/// A 8-bit mask value specifying what is chosen for each element.
261/// A 1 means conversion of __A. A 0 means element is zero.
262/// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A.
263static __inline__ __m128bh __DEFAULT_FN_ATTRS256
264_mm256_maskz_cvtneps_pbh(__mmask8 __U, __m256 __A) {
265 return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A,
266 (__v8bf)_mm_setzero_si128(),
267 (__mmask8)__U);
268}
269
270/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
271///
272/// \headerfile <x86intrin.h>
273///
274/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
275///
276/// \param __A
277/// A 128-bit vector of [8 x bfloat].
278/// \param __B
279/// A 128-bit vector of [8 x bfloat].
280/// \param __D
281/// A 128-bit vector of [4 x float].
282/// \returns A 128-bit vector of [4 x float] comes from Dot Product of
283/// __A, __B and __D
284static __inline__ __m128 __DEFAULT_FN_ATTRS128
285_mm_dpbf16_ps(__m128 __D, __m128bh __A, __m128bh __B) {
286 return (__m128)__builtin_ia32_dpbf16ps_128((__v4sf)__D,
287 (__v8bf)__A,
288 (__v8bf)__B);
289}
290
291/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
292///
293/// \headerfile <x86intrin.h>
294///
295/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
296///
297/// \param __A
298/// A 128-bit vector of [8 x bfloat].
299/// \param __B
300/// A 128-bit vector of [8 x bfloat].
301/// \param __D
302/// A 128-bit vector of [4 x float].
303/// \param __U
304/// A 8-bit mask value specifying what is chosen for each element.
305/// A 1 means __A and __B's dot product accumulated with __D. A 0 means __D.
306/// \returns A 128-bit vector of [4 x float] comes from Dot Product of
307/// __A, __B and __D
308static __inline__ __m128 __DEFAULT_FN_ATTRS128
309_mm_mask_dpbf16_ps(__m128 __D, __mmask8 __U, __m128bh __A, __m128bh __B) {
310 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
311 (__v4sf)_mm_dpbf16_ps(__D, __A, __B),
312 (__v4sf)__D);
313}
314
315/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
316///
317/// \headerfile <x86intrin.h>
318///
319/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
320///
321/// \param __A
322/// A 128-bit vector of [8 x bfloat].
323/// \param __B
324/// A 128-bit vector of [8 x bfloat].
325/// \param __D
326/// A 128-bit vector of [4 x float].
327/// \param __U
328/// A 8-bit mask value specifying what is chosen for each element.
329/// A 1 means __A and __B's dot product accumulated with __D. A 0 means 0.
330/// \returns A 128-bit vector of [4 x float] comes from Dot Product of
331/// __A, __B and __D
332static __inline__ __m128 __DEFAULT_FN_ATTRS128
333_mm_maskz_dpbf16_ps(__mmask8 __U, __m128 __D, __m128bh __A, __m128bh __B) {
334 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
335 (__v4sf)_mm_dpbf16_ps(__D, __A, __B),
336 (__v4sf)_mm_setzero_si128());
337}
338
339/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
340///
341/// \headerfile <x86intrin.h>
342///
343/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
344///
345/// \param __A
346/// A 256-bit vector of [16 x bfloat].
347/// \param __B
348/// A 256-bit vector of [16 x bfloat].
349/// \param __D
350/// A 256-bit vector of [8 x float].
351/// \returns A 256-bit vector of [8 x float] comes from Dot Product of
352/// __A, __B and __D
353static __inline__ __m256 __DEFAULT_FN_ATTRS256
354_mm256_dpbf16_ps(__m256 __D, __m256bh __A, __m256bh __B) {
355 return (__m256)__builtin_ia32_dpbf16ps_256((__v8sf)__D,
356 (__v16bf)__A,
357 (__v16bf)__B);
358}
359
360/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
361///
362/// \headerfile <x86intrin.h>
363///
364/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
365///
366/// \param __A
367/// A 256-bit vector of [16 x bfloat].
368/// \param __B
369/// A 256-bit vector of [16 x bfloat].
370/// \param __D
371/// A 256-bit vector of [8 x float].
372/// \param __U
373/// A 16-bit mask value specifying what is chosen for each element.
374/// A 1 means __A and __B's dot product accumulated with __D. A 0 means __D.
375/// \returns A 256-bit vector of [8 x float] comes from Dot Product of
376/// __A, __B and __D
377static __inline__ __m256 __DEFAULT_FN_ATTRS256
378_mm256_mask_dpbf16_ps(__m256 __D, __mmask8 __U, __m256bh __A, __m256bh __B) {
379 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
380 (__v8sf)_mm256_dpbf16_ps(__D, __A, __B),
381 (__v8sf)__D);
382}
383
384/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
385///
386/// \headerfile <x86intrin.h>
387///
388/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
389///
390/// \param __A
391/// A 256-bit vector of [16 x bfloat].
392/// \param __B
393/// A 256-bit vector of [16 x bfloat].
394/// \param __D
395/// A 256-bit vector of [8 x float].
396/// \param __U
397/// A 8-bit mask value specifying what is chosen for each element.
398/// A 1 means __A and __B's dot product accumulated with __D. A 0 means 0.
399/// \returns A 256-bit vector of [8 x float] comes from Dot Product of
400/// __A, __B and __D
401static __inline__ __m256 __DEFAULT_FN_ATTRS256
402_mm256_maskz_dpbf16_ps(__mmask8 __U, __m256 __D, __m256bh __A, __m256bh __B) {
403 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
404 (__v8sf)_mm256_dpbf16_ps(__D, __A, __B),
405 (__v8sf)_mm256_setzero_si256());
406}
407
408/// Convert One Single float Data to One BF16 Data.
409///
410/// \headerfile <x86intrin.h>
411///
412/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
413///
414/// \param __A
415/// A float data.
416/// \returns A bf16 data whose sign field and exponent field keep unchanged,
417/// and fraction field is truncated to 7 bits.
418static __inline__ __bf16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) {
419 __v4sf __V = {__A, 0, 0, 0};
420 __v8bf __R = __builtin_ia32_cvtneps2bf16_128_mask(
421 (__v4sf)__V, (__v8bf)_mm_undefined_si128(), (__mmask8)-1);
422 return (__bf16)__R[0];
423}
424
425/// Convert Packed BF16 Data to Packed float Data.
426///
427/// \headerfile <x86intrin.h>
428///
429/// \param __A
430/// A 128-bit vector of [4 x bfloat].
431/// \returns A 128-bit vector of [4 x float] come from conversion of __A
432static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
433_mm_cvtpbh_ps(__m128bh __A) {
434 return (__m128)_mm256_castps256_ps128(
435 (__m256) __builtin_convertvector(__A, __v8sf));
436}
437
438/// Convert Packed BF16 Data to Packed float Data.
439///
440/// \headerfile <x86intrin.h>
441///
442/// \param __A
443/// A 128-bit vector of [8 x bfloat].
444/// \returns A 256-bit vector of [8 x float] come from conversion of __A
445static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
446_mm256_cvtpbh_ps(__m128bh __A) {
447 return (__m256) __builtin_convertvector(__A, __v8sf);
448}
449
450/// Convert Packed BF16 Data to Packed float Data using zeroing mask.
451///
452/// \headerfile <x86intrin.h>
453///
454/// \param __U
455/// A 4-bit mask. Elements are zeroed out when the corresponding mask
456/// bit is not set.
457/// \param __A
458/// A 128-bit vector of [4 x bfloat].
459/// \returns A 128-bit vector of [4 x float] come from conversion of __A
460static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
461_mm_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
462 return (__m128)__builtin_ia32_selectps_128(
463 (__mmask8)__U, (__v4sf)_mm_cvtpbh_ps(__A), (__v4sf)_mm_setzero_ps());
464}
465
466/// Convert Packed BF16 Data to Packed float Data using zeroing mask.
467///
468/// \headerfile <x86intrin.h>
469///
470/// \param __U
471/// A 8-bit mask. Elements are zeroed out when the corresponding mask
472/// bit is not set.
473/// \param __A
474/// A 128-bit vector of [8 x bfloat].
475/// \returns A 256-bit vector of [8 x float] come from conversion of __A
476static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
477_mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
478 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
479 (__v8sf)_mm256_cvtpbh_ps(__A),
480 (__v8sf)_mm256_setzero_ps());
481}
482
483/// Convert Packed BF16 Data to Packed float Data using merging mask.
484///
485/// \headerfile <x86intrin.h>
486///
487/// \param __S
488/// A 128-bit vector of [4 x float]. Elements are copied from __S when
489/// the corresponding mask bit is not set.
490/// \param __U
491/// A 4-bit mask. Elements are zeroed out when the corresponding mask
492/// bit is not set.
493/// \param __A
494/// A 128-bit vector of [4 x bfloat].
495/// \returns A 128-bit vector of [4 x float] come from conversion of __A
496static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
497_mm_mask_cvtpbh_ps(__m128 __S, __mmask8 __U, __m128bh __A) {
498 return (__m128)__builtin_ia32_selectps_128(
499 (__mmask8)__U, (__v4sf)_mm_cvtpbh_ps(__A), (__v4sf)__S);
500}
501
502/// Convert Packed BF16 Data to Packed float Data using merging mask.
503///
504/// \headerfile <x86intrin.h>
505///
506/// \param __S
507/// A 256-bit vector of [8 x float]. Elements are copied from __S when
508/// the corresponding mask bit is not set.
509/// \param __U
510/// A 8-bit mask. Elements are zeroed out when the corresponding mask
511/// bit is not set.
512/// \param __A
513/// A 128-bit vector of [8 x bfloat].
514/// \returns A 256-bit vector of [8 x float] come from conversion of __A
515static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
516_mm256_mask_cvtpbh_ps(__m256 __S, __mmask8 __U, __m128bh __A) {
517 return (__m256)__builtin_ia32_selectps_256(
518 (__mmask8)__U, (__v8sf)_mm256_cvtpbh_ps(__A), (__v8sf)__S);
519}
520
521#undef __DEFAULT_FN_ATTRS128
522#undef __DEFAULT_FN_ATTRS256
523#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
524#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
525
526#endif
527#endif
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
Definition avx2intrin.h:30
#define __DEFAULT_FN_ATTRS256_CONSTEXPR
Definition avx2intrin.h:29
unsigned char __mmask8
unsigned short __mmask16
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition avxintrin.h:4291
static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_castps256_ps128(__m256 __a)
Returns the lower 128 bits of a 256-bit floating-point vector of [8 x float] as a 128-bit floating-po...
Definition avxintrin.h:4439
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4303
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3878
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3493
static __inline__ void short __D
Definition immintrin.h:342
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:2018