clang 23.0.0git
avx512fintrin.h
Go to the documentation of this file.
1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
15
16typedef char __v64qi __attribute__((__vector_size__(64)));
17typedef short __v32hi __attribute__((__vector_size__(64)));
18typedef double __v8df __attribute__((__vector_size__(64)));
19typedef float __v16sf __attribute__((__vector_size__(64)));
20typedef long long __v8di __attribute__((__vector_size__(64)));
21typedef int __v16si __attribute__((__vector_size__(64)));
22
23/* Unsigned types */
24typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28
29/* We need an explicitly signed variant for char. Note that this shouldn't
30 * appear in the interface though. */
31typedef signed char __v64qs __attribute__((__vector_size__(64)));
32
33typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
34typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
35typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
36
37typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
38typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
39typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
40
41typedef unsigned char __mmask8;
42typedef unsigned short __mmask16;
43
44/* Rounding mode macros. */
45#define _MM_FROUND_TO_NEAREST_INT 0x00
46#define _MM_FROUND_TO_NEAREST_TIES_EVEN 0x00
47#define _MM_FROUND_TO_NEG_INF 0x01
48#define _MM_FROUND_TO_POS_INF 0x02
49#define _MM_FROUND_TO_ZERO 0x03
50#define _MM_FROUND_CUR_DIRECTION 0x04
51
52/* Constants for integer comparison predicates */
53typedef enum {
54 _MM_CMPINT_EQ, /* Equal */
55 _MM_CMPINT_LT, /* Less than */
56 _MM_CMPINT_LE, /* Less than or Equal */
58 _MM_CMPINT_NE, /* Not Equal */
59 _MM_CMPINT_NLT, /* Not Less than */
60#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
61 _MM_CMPINT_NLE /* Not Less than or Equal */
62#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
64
65typedef enum
66{
154
155typedef enum
156{
157 _MM_MANT_NORM_1_2, /* interval [1, 2) */
158 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
159 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
160 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
162
163typedef enum
164{
165 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
166 _MM_MANT_SIGN_zero, /* sign = 0 */
167 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
169
170/* Define the default attributes for the functions in this file. */
171#define __DEFAULT_FN_ATTRS512 \
172 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
173 __min_vector_width__(512)))
174#define __DEFAULT_FN_ATTRS128 \
175 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), \
176 __min_vector_width__(128)))
177#define __DEFAULT_FN_ATTRS \
178 __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
179
180#if defined(__cplusplus) && (__cplusplus >= 201103L)
181#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
182#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
183#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
184#else
185#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
186#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
187#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
188#endif
189
190/* Create vectors with repeated elements */
191
192static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
194 return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0};
195}
196
197#define _mm512_setzero_epi32 _mm512_setzero_si512
198
199static __inline__ __m512d __DEFAULT_FN_ATTRS512
201{
202 return (__m512d)__builtin_ia32_undef512();
203}
204
205static __inline__ __m512 __DEFAULT_FN_ATTRS512
207{
208 return (__m512)__builtin_ia32_undef512();
209}
210
211static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void) {
212 return (__m512)__builtin_ia32_undef512();
213}
214
215static __inline__ __m512i __DEFAULT_FN_ATTRS512
217{
218 return (__m512i)__builtin_ia32_undef512();
219}
220
221static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
223 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
225}
226
227static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
228_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) {
229 return (__m512i)__builtin_ia32_selectd_512(__M,
230 (__v16si) _mm512_broadcastd_epi32(__A),
231 (__v16si) __O);
232}
233
234static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
236 return (__m512i)__builtin_ia32_selectd_512(__M,
237 (__v16si) _mm512_broadcastd_epi32(__A),
238 (__v16si) _mm512_setzero_si512());
239}
240
241static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
243 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
244 0, 0, 0, 0, 0, 0, 0, 0);
245}
246
247static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
248_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) {
249 return (__m512i)__builtin_ia32_selectq_512(
250 __M, (__v8di)_mm512_broadcastq_epi64(__A), (__v8di)__O);
251}
252
253static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
255 return (__m512i)__builtin_ia32_selectq_512(__M,
256 (__v8di) _mm512_broadcastq_epi64(__A),
257 (__v8di) _mm512_setzero_si512());
258}
259
261 return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
262 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
263}
264
265#define _mm512_setzero _mm512_setzero_ps
266
267static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
269 return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
270}
271
272static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
274{
275 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
276 __w, __w, __w, __w, __w, __w, __w, __w };
277}
278
279static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
280_mm512_set1_pd(double __w)
281{
282 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
283}
284
285static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
287{
288 return __extension__ (__m512i)(__v64qi){
289 __w, __w, __w, __w, __w, __w, __w, __w,
290 __w, __w, __w, __w, __w, __w, __w, __w,
291 __w, __w, __w, __w, __w, __w, __w, __w,
292 __w, __w, __w, __w, __w, __w, __w, __w,
293 __w, __w, __w, __w, __w, __w, __w, __w,
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w,
296 __w, __w, __w, __w, __w, __w, __w, __w };
297}
298
299static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
301{
302 return __extension__ (__m512i)(__v32hi){
303 __w, __w, __w, __w, __w, __w, __w, __w,
304 __w, __w, __w, __w, __w, __w, __w, __w,
305 __w, __w, __w, __w, __w, __w, __w, __w,
306 __w, __w, __w, __w, __w, __w, __w, __w };
307}
308
309static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
311{
312 return __extension__ (__m512i)(__v16si){
313 __s, __s, __s, __s, __s, __s, __s, __s,
314 __s, __s, __s, __s, __s, __s, __s, __s };
315}
316
317static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
319 return (__m512i)__builtin_ia32_selectd_512(__M,
320 (__v16si)_mm512_set1_epi32(__A),
321 (__v16si)_mm512_setzero_si512());
322}
323
324static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
325_mm512_set1_epi64(long long __d)
326{
327 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
328}
329
330static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
332 return (__m512i)__builtin_ia32_selectq_512(__M,
333 (__v8di)_mm512_set1_epi64(__A),
334 (__v8di)_mm512_setzero_si512());
335}
336
337static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
339 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
340 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
341}
342
343static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
344_mm512_set4_epi32(int __A, int __B, int __C, int __D) {
345 return __extension__ (__m512i)(__v16si)
346 { __D, __C, __B, __A, __D, __C, __B, __A,
347 __D, __C, __B, __A, __D, __C, __B, __A };
348}
349
350static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
351_mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D) {
352 return __extension__ (__m512i) (__v8di)
353 { __D, __C, __B, __A, __D, __C, __B, __A };
354}
355
356static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
357_mm512_set4_pd(double __A, double __B, double __C, double __D) {
358 return __extension__ (__m512d)
359 { __D, __C, __B, __A, __D, __C, __B, __A };
360}
361
362static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
363_mm512_set4_ps(float __A, float __B, float __C, float __D) {
364 return __extension__ (__m512)
365 { __D, __C, __B, __A, __D, __C, __B, __A,
366 __D, __C, __B, __A, __D, __C, __B, __A };
367}
368
369static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
370_mm512_setr4_epi32(int e0, int e1, int e2, int e3) {
371 return _mm512_set4_epi32(e3, e2, e1, e0);
372}
373
374static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
375_mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3) {
376 return _mm512_set4_epi64(e3, e2, e1, e0);
377}
378
379static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
380_mm512_setr4_pd(double e0, double e1, double e2, double e3) {
381 return _mm512_set4_pd(e3, e2, e1, e0);
382}
383
384static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
385_mm512_setr4_ps(float e0, float e1, float e2, float e3) {
386 return _mm512_set4_ps(e3, e2, e1, e0);
387}
388
389static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
391 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
392 0, 0, 0, 0, 0, 0, 0, 0);
393}
394
395/* Cast between vector types */
396
397static __inline __m512d __DEFAULT_FN_ATTRS512
399{
400 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
401 1, 2, 3, 4, 5, 6, 7);
402}
403
404static __inline __m512 __DEFAULT_FN_ATTRS512
406{
407 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
408 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
409}
410
411static __inline __m128d __DEFAULT_FN_ATTRS512_CONSTEXPR
413{
414 return __builtin_shufflevector(__a, __a, 0, 1);
415}
416
417static __inline __m256d __DEFAULT_FN_ATTRS512_CONSTEXPR
419{
420 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
421}
422
423static __inline __m128 __DEFAULT_FN_ATTRS512_CONSTEXPR
425{
426 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
427}
428
429static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
431 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
432}
433
434static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
435_mm512_castpd_ps (__m512d __A)
436{
437 return (__m512) (__A);
438}
439
440static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
442{
443 return (__m512i) (__A);
444}
445
446static __inline__ __m512d __DEFAULT_FN_ATTRS512
448{
449 __m256d __B = __builtin_nondeterministic_value(__B);
450 return __builtin_shufflevector(
451 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
452 __B, 0, 1, 2, 3, 4, 5, 6, 7);
453}
454
455static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
457{
458 return (__m512d) (__A);
459}
460
461static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
463{
464 return (__m512i) (__A);
465}
466
467static __inline__ __m512 __DEFAULT_FN_ATTRS512
469{
470 __m256 __B = __builtin_nondeterministic_value(__B);
471 return __builtin_shufflevector(
472 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
473 __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
474}
475
476static __inline__ __m512i __DEFAULT_FN_ATTRS512
478{
479 __m256i __B = __builtin_nondeterministic_value(__B);
480 return __builtin_shufflevector(
481 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
482 __B, 0, 1, 2, 3, 4, 5, 6, 7);
483}
484
485static __inline__ __m512i __DEFAULT_FN_ATTRS512
487{
488 return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
489}
490
491static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
493{
494 return (__m512) (__A);
495}
496
497static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
499{
500 return (__m512d) (__A);
501}
502
503static __inline __m128i __DEFAULT_FN_ATTRS512_CONSTEXPR
505{
506 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
507}
508
509static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR
511 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
512}
513
516 return (__mmask16)__a;
517}
518
519static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
521 return (int)__a;
522}
523
524/// Constructs a 512-bit floating-point vector of [8 x double] from a
525/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
526/// contain the value of the source vector. The upper 384 bits are set
527/// to zero.
528///
529/// \headerfile <x86intrin.h>
530///
531/// This intrinsic has no corresponding instruction.
532///
533/// \param __a
534/// A 128-bit vector of [2 x double].
535/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
536/// contain the value of the parameter. The upper 384 bits are set to zero.
537static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
539 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
540}
541
542/// Constructs a 512-bit floating-point vector of [8 x double] from a
543/// 256-bit floating-point vector of [4 x double]. The lower 256 bits
544/// contain the value of the source vector. The upper 256 bits are set
545/// to zero.
546///
547/// \headerfile <x86intrin.h>
548///
549/// This intrinsic has no corresponding instruction.
550///
551/// \param __a
552/// A 256-bit vector of [4 x double].
553/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
554/// contain the value of the parameter. The upper 256 bits are set to zero.
555static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
557 return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
558}
559
560/// Constructs a 512-bit floating-point vector of [16 x float] from a
561/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
562/// the value of the source vector. The upper 384 bits are set to zero.
563///
564/// \headerfile <x86intrin.h>
565///
566/// This intrinsic has no corresponding instruction.
567///
568/// \param __a
569/// A 128-bit vector of [4 x float].
570/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
571/// contain the value of the parameter. The upper 384 bits are set to zero.
572static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
574 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
575}
576
577/// Constructs a 512-bit floating-point vector of [16 x float] from a
578/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
579/// the value of the source vector. The upper 256 bits are set to zero.
580///
581/// \headerfile <x86intrin.h>
582///
583/// This intrinsic has no corresponding instruction.
584///
585/// \param __a
586/// A 256-bit vector of [8 x float].
587/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
588/// contain the value of the parameter. The upper 256 bits are set to zero.
589static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
591 return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
592}
593
594/// Constructs a 512-bit integer vector from a 128-bit integer vector.
595/// The lower 128 bits contain the value of the source vector. The upper
596/// 384 bits are set to zero.
597///
598/// \headerfile <x86intrin.h>
599///
600/// This intrinsic has no corresponding instruction.
601///
602/// \param __a
603/// A 128-bit integer vector.
604/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
605/// the parameter. The upper 384 bits are set to zero.
606static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
608 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
609}
610
611/// Constructs a 512-bit integer vector from a 256-bit integer vector.
612/// The lower 256 bits contain the value of the source vector. The upper
613/// 256 bits are set to zero.
614///
615/// \headerfile <x86intrin.h>
616///
617/// This intrinsic has no corresponding instruction.
618///
619/// \param __a
620/// A 256-bit integer vector.
621/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
622/// the parameter. The upper 256 bits are set to zero.
623static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
625 return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
626}
627
628/* Bitwise operators */
629static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
630_mm512_and_epi32(__m512i __a, __m512i __b)
631{
632 return (__m512i)((__v16su)__a & (__v16su)__b);
633}
634
635static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
636_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) {
637 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
638 (__v16si) _mm512_and_epi32(__a, __b),
639 (__v16si) __src);
640}
641
642static __inline__ __m512i __DEFAULT_FN_ATTRS512
644{
646 __k, __a, __b);
647}
648
649static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
650_mm512_and_epi64(__m512i __a, __m512i __b)
651{
652 return (__m512i)((__v8du)__a & (__v8du)__b);
653}
654
655static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
656_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) {
657 return (__m512i)__builtin_ia32_selectq_512(
658 (__mmask8)__k, (__v8di)_mm512_and_epi64(__a, __b), (__v8di)__src);
659}
660
661static __inline__ __m512i __DEFAULT_FN_ATTRS512
663{
665 __k, __a, __b);
666}
667
668static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
669_mm512_andnot_si512 (__m512i __A, __m512i __B)
670{
671 return (__m512i)(~(__v8du)__A & (__v8du)__B);
672}
673
674static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
675_mm512_andnot_epi32 (__m512i __A, __m512i __B)
676{
677 return (__m512i)(~(__v16su)__A & (__v16su)__B);
678}
679
680static __inline__ __m512i __DEFAULT_FN_ATTRS512
681_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
682{
683 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
684 (__v16si)_mm512_andnot_epi32(__A, __B),
685 (__v16si)__W);
686}
687
688static __inline__ __m512i __DEFAULT_FN_ATTRS512
689_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
690{
692 __U, __A, __B);
693}
694
695static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
696_mm512_andnot_epi64(__m512i __A, __m512i __B)
697{
698 return (__m512i)(~(__v8du)__A & (__v8du)__B);
699}
700
701static __inline__ __m512i __DEFAULT_FN_ATTRS512
702_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
703{
704 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
705 (__v8di)_mm512_andnot_epi64(__A, __B),
706 (__v8di)__W);
707}
708
709static __inline__ __m512i __DEFAULT_FN_ATTRS512
710_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
711{
713 __U, __A, __B);
714}
715
716static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
717_mm512_or_epi32(__m512i __a, __m512i __b)
718{
719 return (__m512i)((__v16su)__a | (__v16su)__b);
720}
721
722static __inline__ __m512i __DEFAULT_FN_ATTRS512
723_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
724{
725 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
726 (__v16si)_mm512_or_epi32(__a, __b),
727 (__v16si)__src);
728}
729
730static __inline__ __m512i __DEFAULT_FN_ATTRS512
732{
733 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
734}
735
736static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
737_mm512_or_epi64(__m512i __a, __m512i __b)
738{
739 return (__m512i)((__v8du)__a | (__v8du)__b);
740}
741
742static __inline__ __m512i __DEFAULT_FN_ATTRS512
743_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
744{
745 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
746 (__v8di)_mm512_or_epi64(__a, __b),
747 (__v8di)__src);
748}
749
750static __inline__ __m512i __DEFAULT_FN_ATTRS512
751_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
752{
753 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
754}
755
756static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
757_mm512_xor_epi32(__m512i __a, __m512i __b)
758{
759 return (__m512i)((__v16su)__a ^ (__v16su)__b);
760}
761
762static __inline__ __m512i __DEFAULT_FN_ATTRS512
763_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
764{
765 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
766 (__v16si)_mm512_xor_epi32(__a, __b),
767 (__v16si)__src);
768}
769
770static __inline__ __m512i __DEFAULT_FN_ATTRS512
772{
773 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
774}
775
776static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
777_mm512_xor_epi64(__m512i __a, __m512i __b)
778{
779 return (__m512i)((__v8du)__a ^ (__v8du)__b);
780}
781
782static __inline__ __m512i __DEFAULT_FN_ATTRS512
783_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
784{
785 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
786 (__v8di)_mm512_xor_epi64(__a, __b),
787 (__v8di)__src);
788}
789
790static __inline__ __m512i __DEFAULT_FN_ATTRS512
792{
793 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
794}
795
796static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
797_mm512_and_si512(__m512i __a, __m512i __b)
798{
799 return (__m512i)((__v8du)__a & (__v8du)__b);
800}
801
802static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
803_mm512_or_si512(__m512i __a, __m512i __b)
804{
805 return (__m512i)((__v8du)__a | (__v8du)__b);
806}
807
808static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
809_mm512_xor_si512(__m512i __a, __m512i __b)
810{
811 return (__m512i)((__v8du)__a ^ (__v8du)__b);
812}
813
814/* Arithmetic */
815
816static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
817_mm512_add_pd(__m512d __a, __m512d __b) {
818 return (__m512d)((__v8df)__a + (__v8df)__b);
819}
820
821static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
822_mm512_add_ps(__m512 __a, __m512 __b) {
823 return (__m512)((__v16sf)__a + (__v16sf)__b);
824}
825
826static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
827_mm512_mul_pd(__m512d __a, __m512d __b) {
828 return (__m512d)((__v8df)__a * (__v8df)__b);
829}
830
831static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
832_mm512_mul_ps(__m512 __a, __m512 __b) {
833 return (__m512)((__v16sf)__a * (__v16sf)__b);
834}
835
836static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
837_mm512_sub_pd(__m512d __a, __m512d __b) {
838 return (__m512d)((__v8df)__a - (__v8df)__b);
839}
840
841static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
842_mm512_sub_ps(__m512 __a, __m512 __b) {
843 return (__m512)((__v16sf)__a - (__v16sf)__b);
844}
845
846static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
847_mm512_add_epi64(__m512i __A, __m512i __B) {
848 return (__m512i) ((__v8du) __A + (__v8du) __B);
849}
850
851static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
852_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
853 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
854 (__v8di)_mm512_add_epi64(__A, __B),
855 (__v8di)__W);
856}
857
858static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
859_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
860 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
861 (__v8di)_mm512_add_epi64(__A, __B),
862 (__v8di)_mm512_setzero_si512());
863}
864
865static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
866_mm512_sub_epi64(__m512i __A, __m512i __B) {
867 return (__m512i) ((__v8du) __A - (__v8du) __B);
868}
869
870static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
871_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
872 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
873 (__v8di)_mm512_sub_epi64(__A, __B),
874 (__v8di)__W);
875}
876
877static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
878_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
879 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
880 (__v8di)_mm512_sub_epi64(__A, __B),
881 (__v8di)_mm512_setzero_si512());
882}
883
884static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
885_mm512_add_epi32(__m512i __A, __m512i __B) {
886 return (__m512i) ((__v16su) __A + (__v16su) __B);
887}
888
889static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
890_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
891 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
892 (__v16si)_mm512_add_epi32(__A, __B),
893 (__v16si)__W);
894}
895
896static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
897_mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
898 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
899 (__v16si)_mm512_add_epi32(__A, __B),
900 (__v16si)_mm512_setzero_si512());
901}
902
903static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
904_mm512_sub_epi32(__m512i __A, __m512i __B) {
905 return (__m512i) ((__v16su) __A - (__v16su) __B);
906}
907
908static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
909_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
910 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
911 (__v16si)_mm512_sub_epi32(__A, __B),
912 (__v16si)__W);
913}
914
915static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
916_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
917 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
918 (__v16si)_mm512_sub_epi32(__A, __B),
919 (__v16si)_mm512_setzero_si512());
920}
921
922#define _mm512_max_round_pd(A, B, R) \
923 ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
924 (__v8df)(__m512d)(B), (int)(R)))
925
926#define _mm512_mask_max_round_pd(W, U, A, B, R) \
927 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
928 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
929 (__v8df)(W)))
930
931#define _mm512_maskz_max_round_pd(U, A, B, R) \
932 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
933 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
934 (__v8df)_mm512_setzero_pd()))
935
936static __inline__ __m512d
938 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
940}
941
942static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
943_mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
944 return (__m512d)__builtin_ia32_selectpd_512(__U,
945 (__v8df)_mm512_max_pd(__A, __B),
946 (__v8df)__W);
947}
948
949static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
950_mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B) {
951 return (__m512d)__builtin_ia32_selectpd_512(__U,
952 (__v8df)_mm512_max_pd(__A, __B),
953 (__v8df)_mm512_setzero_pd());
954}
955
956#define _mm512_max_round_ps(A, B, R) \
957 ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
958 (__v16sf)(__m512)(B), (int)(R)))
959
960#define _mm512_mask_max_round_ps(W, U, A, B, R) \
961 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
962 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
963 (__v16sf)(W)))
964
965#define _mm512_maskz_max_round_ps(U, A, B, R) \
966 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
967 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
968 (__v16sf)_mm512_setzero_ps()))
969
970static __inline__ __m512
972 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
974}
975
976static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
977_mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
978 return (__m512)__builtin_ia32_selectps_512(__U,
979 (__v16sf)_mm512_max_ps(__A, __B),
980 (__v16sf)__W);
981}
982
983static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
984_mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B) {
985 return (__m512)__builtin_ia32_selectps_512(__U,
986 (__v16sf)_mm512_max_ps(__A, __B),
987 (__v16sf)_mm512_setzero_ps());
988}
989
990static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
991_mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
992 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
993 (__v4sf) __B,
994 (__v4sf) __W,
995 (__mmask8) __U,
997}
998
999static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1000_mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B) {
1001 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1002 (__v4sf) __B,
1003 (__v4sf) _mm_setzero_ps (),
1004 (__mmask8) __U,
1006}
1007
1008#define _mm_max_round_ss(A, B, R) \
1009 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1010 (__v4sf)(__m128)(B), \
1011 (__v4sf)_mm_setzero_ps(), \
1012 (__mmask8)-1, (int)(R)))
1013
1014#define _mm_mask_max_round_ss(W, U, A, B, R) \
1015 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1016 (__v4sf)(__m128)(B), \
1017 (__v4sf)(__m128)(W), (__mmask8)(U), \
1018 (int)(R)))
1019
1020#define _mm_maskz_max_round_ss(U, A, B, R) \
1021 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1022 (__v4sf)(__m128)(B), \
1023 (__v4sf)_mm_setzero_ps(), \
1024 (__mmask8)(U), (int)(R)))
1025
1026static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1027_mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1028 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1029 (__v2df) __B,
1030 (__v2df) __W,
1031 (__mmask8) __U,
1033}
1034
1035static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1036_mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B) {
1037 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1038 (__v2df) __B,
1039 (__v2df) _mm_setzero_pd (),
1040 (__mmask8) __U,
1042}
1043
1044#define _mm_max_round_sd(A, B, R) \
1045 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1046 (__v2df)(__m128d)(B), \
1047 (__v2df)_mm_setzero_pd(), \
1048 (__mmask8)-1, (int)(R)))
1049
1050#define _mm_mask_max_round_sd(W, U, A, B, R) \
1051 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1052 (__v2df)(__m128d)(B), \
1053 (__v2df)(__m128d)(W), \
1054 (__mmask8)(U), (int)(R)))
1055
1056#define _mm_maskz_max_round_sd(U, A, B, R) \
1057 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1058 (__v2df)(__m128d)(B), \
1059 (__v2df)_mm_setzero_pd(), \
1060 (__mmask8)(U), (int)(R)))
1061
1062static __inline __m512i
1064 return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1065}
1066
1067static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1068_mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1069 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1070 (__v16si)_mm512_max_epi32(__A, __B),
1071 (__v16si)__W);
1072}
1073
1074static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1075_mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1076 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1077 (__v16si)_mm512_max_epi32(__A, __B),
1078 (__v16si)_mm512_setzero_si512());
1079}
1080
1081static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1082_mm512_max_epu32(__m512i __A, __m512i __B) {
1083 return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1084}
1085
1086static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1087_mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1088 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1089 (__v16si)_mm512_max_epu32(__A, __B),
1090 (__v16si)__W);
1091}
1092
1093static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1094_mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
1095 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1096 (__v16si)_mm512_max_epu32(__A, __B),
1097 (__v16si)_mm512_setzero_si512());
1098}
1099
1100static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1101_mm512_max_epi64(__m512i __A, __m512i __B) {
1102 return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1103}
1104
1105static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1106_mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1107 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1108 (__v8di)_mm512_max_epi64(__A, __B),
1109 (__v8di)__W);
1110}
1111
1112static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1113_mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
1114 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1115 (__v8di)_mm512_max_epi64(__A, __B),
1116 (__v8di)_mm512_setzero_si512());
1117}
1118
1119static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1120_mm512_max_epu64(__m512i __A, __m512i __B) {
1121 return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1122}
1123
1124static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1125_mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1126 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1127 (__v8di)_mm512_max_epu64(__A, __B),
1128 (__v8di)__W);
1129}
1130
1131static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1132_mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
1133 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1134 (__v8di)_mm512_max_epu64(__A, __B),
1135 (__v8di)_mm512_setzero_si512());
1136}
1137
1138#define _mm512_min_round_pd(A, B, R) \
1139 ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1140 (__v8df)(__m512d)(B), (int)(R)))
1141
1142#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1143 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1144 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1145 (__v8df)(W)))
1146
1147#define _mm512_maskz_min_round_pd(U, A, B, R) \
1148 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1149 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1150 (__v8df)_mm512_setzero_pd()))
1151
1152static __inline__ __m512d
1154 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1156}
1157
1158static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1159_mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1160 return (__m512d)__builtin_ia32_selectpd_512(__U,
1161 (__v8df)_mm512_min_pd(__A, __B),
1162 (__v8df)__W);
1163}
1164
1165static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1166_mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1167 return (__m512d)__builtin_ia32_selectpd_512(__U,
1168 (__v8df)_mm512_min_pd(__A, __B),
1169 (__v8df)_mm512_setzero_pd());
1170}
1171
1172#define _mm512_min_round_ps(A, B, R) \
1173 ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1174 (__v16sf)(__m512)(B), (int)(R)))
1175
1176#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1177 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1178 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1179 (__v16sf)(W)))
1180
1181#define _mm512_maskz_min_round_ps(U, A, B, R) \
1182 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1183 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1184 (__v16sf)_mm512_setzero_ps()))
1185
1186static __inline__ __m512
1188 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1190}
1191
1192static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1193_mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1194 return (__m512)__builtin_ia32_selectps_512(__U,
1195 (__v16sf)_mm512_min_ps(__A, __B),
1196 (__v16sf)__W);
1197}
1198
1199static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1200_mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1201 return (__m512)__builtin_ia32_selectps_512(__U,
1202 (__v16sf)_mm512_min_ps(__A, __B),
1203 (__v16sf)_mm512_setzero_ps());
1204}
1205
1206static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1207_mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1208 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1209 (__v4sf) __B,
1210 (__v4sf) __W,
1211 (__mmask8) __U,
1213}
1214
1215static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1216_mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B) {
1217 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1218 (__v4sf) __B,
1219 (__v4sf) _mm_setzero_ps (),
1220 (__mmask8) __U,
1222}
1223
1224#define _mm_min_round_ss(A, B, R) \
1225 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1226 (__v4sf)(__m128)(B), \
1227 (__v4sf)_mm_setzero_ps(), \
1228 (__mmask8)-1, (int)(R)))
1229
1230#define _mm_mask_min_round_ss(W, U, A, B, R) \
1231 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1232 (__v4sf)(__m128)(B), \
1233 (__v4sf)(__m128)(W), (__mmask8)(U), \
1234 (int)(R)))
1235
1236#define _mm_maskz_min_round_ss(U, A, B, R) \
1237 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1238 (__v4sf)(__m128)(B), \
1239 (__v4sf)_mm_setzero_ps(), \
1240 (__mmask8)(U), (int)(R)))
1241
1242static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1243_mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1244 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1245 (__v2df) __B,
1246 (__v2df) __W,
1247 (__mmask8) __U,
1249}
1250
1251static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1252_mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B) {
1253 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1254 (__v2df) __B,
1255 (__v2df) _mm_setzero_pd (),
1256 (__mmask8) __U,
1258}
1259
1260#define _mm_min_round_sd(A, B, R) \
1261 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1262 (__v2df)(__m128d)(B), \
1263 (__v2df)_mm_setzero_pd(), \
1264 (__mmask8)-1, (int)(R)))
1265
1266#define _mm_mask_min_round_sd(W, U, A, B, R) \
1267 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1268 (__v2df)(__m128d)(B), \
1269 (__v2df)(__m128d)(W), \
1270 (__mmask8)(U), (int)(R)))
1271
1272#define _mm_maskz_min_round_sd(U, A, B, R) \
1273 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1274 (__v2df)(__m128d)(B), \
1275 (__v2df)_mm_setzero_pd(), \
1276 (__mmask8)(U), (int)(R)))
1277
1278static __inline __m512i
1280 return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1281}
1282
1283static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1284_mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1285 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1286 (__v16si)_mm512_min_epi32(__A, __B),
1287 (__v16si)__W);
1288}
1289
1290static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1291_mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1292 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1293 (__v16si)_mm512_min_epi32(__A, __B),
1294 (__v16si)_mm512_setzero_si512());
1295}
1296
1297static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1298_mm512_min_epu32(__m512i __A, __m512i __B) {
1299 return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1300}
1301
1302static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1303_mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1304 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1305 (__v16si)_mm512_min_epu32(__A, __B),
1306 (__v16si)__W);
1307}
1308
1309static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1310_mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
1311 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1312 (__v16si)_mm512_min_epu32(__A, __B),
1313 (__v16si)_mm512_setzero_si512());
1314}
1315
1316static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1317_mm512_min_epi64(__m512i __A, __m512i __B) {
1318 return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1319}
1320
1321static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1322_mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1323 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1324 (__v8di)_mm512_min_epi64(__A, __B),
1325 (__v8di)__W);
1326}
1327
1328static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1329_mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
1330 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1331 (__v8di)_mm512_min_epi64(__A, __B),
1332 (__v8di)_mm512_setzero_si512());
1333}
1334
1335static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1336_mm512_min_epu64(__m512i __A, __m512i __B) {
1337 return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1338}
1339
1340static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1341_mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
1342 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1343 (__v8di)_mm512_min_epu64(__A, __B),
1344 (__v8di)__W);
1345}
1346
1347static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1348_mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
1349 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1350 (__v8di)_mm512_min_epu64(__A, __B),
1351 (__v8di)_mm512_setzero_si512());
1352}
1353
1354static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1355_mm512_mul_epi32(__m512i __X, __m512i __Y) {
1356 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1357}
1358
1359static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1360_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
1361 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1362 (__v8di)_mm512_mul_epi32(__X, __Y),
1363 (__v8di)__W);
1364}
1365
1366static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1367_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) {
1368 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1369 (__v8di)_mm512_mul_epi32(__X, __Y),
1370 (__v8di)_mm512_setzero_si512 ());
1371}
1372
1373static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1374_mm512_mul_epu32(__m512i __X, __m512i __Y) {
1375 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1376}
1377
1378static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1379_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
1380 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1381 (__v8di)_mm512_mul_epu32(__X, __Y),
1382 (__v8di)__W);
1383}
1384
1385static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1386_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) {
1387 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1388 (__v8di)_mm512_mul_epu32(__X, __Y),
1389 (__v8di)_mm512_setzero_si512 ());
1390}
1391
1392static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1393_mm512_mullo_epi32(__m512i __A, __m512i __B) {
1394 return (__m512i) ((__v16su) __A * (__v16su) __B);
1395}
1396
1397static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1398_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
1399 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1400 (__v16si)_mm512_mullo_epi32(__A, __B),
1401 (__v16si)_mm512_setzero_si512());
1402}
1403
1404static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1405_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
1406 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1407 (__v16si)_mm512_mullo_epi32(__A, __B),
1408 (__v16si)__W);
1409}
1410
1411static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1412_mm512_mullox_epi64(__m512i __A, __m512i __B) {
1413 return (__m512i) ((__v8du) __A * (__v8du) __B);
1414}
1415
1416static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1417_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1418 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1419 (__v8di)_mm512_mullox_epi64(__A, __B),
1420 (__v8di)__W);
1421}
1422
1423#define _mm512_sqrt_round_pd(A, R) \
1424 ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1425
1426#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1427 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1428 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1429 (__v8df)(__m512d)(W)))
1430
1431#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1432 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1433 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1434 (__v8df)_mm512_setzero_pd()))
1435
1436static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A) {
1437 return (__m512d)__builtin_elementwise_sqrt((__v8df)__A);
1438}
1439
1440static __inline__ __m512d __DEFAULT_FN_ATTRS512
1441_mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A) {
1442 return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A),
1443 (__v8df)__W);
1444}
1445
1446static __inline__ __m512d __DEFAULT_FN_ATTRS512
1448 return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A),
1449 (__v8df)_mm512_setzero_pd());
1450}
1451
1452#define _mm512_sqrt_round_ps(A, R) \
1453 ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1454
1455#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1456 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1457 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1458 (__v16sf)(__m512)(W)))
1459
1460#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1461 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1462 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1463 (__v16sf)_mm512_setzero_ps()))
1464
1465static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A) {
1466 return (__m512)__builtin_elementwise_sqrt((__v16sf)__A);
1467}
1468
1469static __inline__ __m512 __DEFAULT_FN_ATTRS512
1470_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) {
1471 return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A),
1472 (__v16sf)__W);
1473}
1474
1475static __inline__ __m512 __DEFAULT_FN_ATTRS512
1477 return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A),
1478 (__v16sf)_mm512_setzero_ps());
1479}
1480
1481static __inline__ __m512d __DEFAULT_FN_ATTRS512
1483{
1484 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1485 (__v8df)
1487 (__mmask8) -1);}
1488
1489static __inline__ __m512d __DEFAULT_FN_ATTRS512
1490_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1491{
1492 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1493 (__v8df) __W,
1494 (__mmask8) __U);
1495}
1496
1497static __inline__ __m512d __DEFAULT_FN_ATTRS512
1499{
1500 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1501 (__v8df)
1503 (__mmask8) __U);
1504}
1505
1506static __inline__ __m512 __DEFAULT_FN_ATTRS512
1508{
1509 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1510 (__v16sf)
1512 (__mmask16) -1);
1513}
1514
1515static __inline__ __m512 __DEFAULT_FN_ATTRS512
1516_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1517{
1518 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1519 (__v16sf) __W,
1520 (__mmask16) __U);
1521}
1522
1523static __inline__ __m512 __DEFAULT_FN_ATTRS512
1525{
1526 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1527 (__v16sf)
1529 (__mmask16) __U);
1530}
1531
1532static __inline__ __m128 __DEFAULT_FN_ATTRS128
1533_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1534{
1535 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1536 (__v4sf) __B,
1537 (__v4sf)
1538 _mm_setzero_ps (),
1539 (__mmask8) -1);
1540}
1541
1542static __inline__ __m128 __DEFAULT_FN_ATTRS128
1543_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1544{
1545 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1546 (__v4sf) __B,
1547 (__v4sf) __W,
1548 (__mmask8) __U);
1549}
1550
1551static __inline__ __m128 __DEFAULT_FN_ATTRS128
1552_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1553{
1554 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1555 (__v4sf) __B,
1556 (__v4sf) _mm_setzero_ps (),
1557 (__mmask8) __U);
1558}
1559
1560static __inline__ __m128d __DEFAULT_FN_ATTRS128
1561_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1562{
1563 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1564 (__v2df) __B,
1565 (__v2df)
1566 _mm_setzero_pd (),
1567 (__mmask8) -1);
1568}
1569
1570static __inline__ __m128d __DEFAULT_FN_ATTRS128
1571_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1572{
1573 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1574 (__v2df) __B,
1575 (__v2df) __W,
1576 (__mmask8) __U);
1577}
1578
1579static __inline__ __m128d __DEFAULT_FN_ATTRS128
1580_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1581{
1582 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1583 (__v2df) __B,
1584 (__v2df) _mm_setzero_pd (),
1585 (__mmask8) __U);
1586}
1587
1588static __inline__ __m512d __DEFAULT_FN_ATTRS512
1590{
1591 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1592 (__v8df)
1594 (__mmask8) -1);
1595}
1596
1597static __inline__ __m512d __DEFAULT_FN_ATTRS512
1598_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1599{
1600 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1601 (__v8df) __W,
1602 (__mmask8) __U);
1603}
1604
1605static __inline__ __m512d __DEFAULT_FN_ATTRS512
1607{
1608 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1609 (__v8df)
1611 (__mmask8) __U);
1612}
1613
1614static __inline__ __m512 __DEFAULT_FN_ATTRS512
1616{
1617 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1618 (__v16sf)
1620 (__mmask16) -1);
1621}
1622
1623static __inline__ __m512 __DEFAULT_FN_ATTRS512
1624_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1625{
1626 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1627 (__v16sf) __W,
1628 (__mmask16) __U);
1629}
1630
1631static __inline__ __m512 __DEFAULT_FN_ATTRS512
1633{
1634 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1635 (__v16sf)
1637 (__mmask16) __U);
1638}
1639
1640static __inline__ __m128 __DEFAULT_FN_ATTRS128
1641_mm_rcp14_ss(__m128 __A, __m128 __B)
1642{
1643 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1644 (__v4sf) __B,
1645 (__v4sf)
1646 _mm_setzero_ps (),
1647 (__mmask8) -1);
1648}
1649
1650static __inline__ __m128 __DEFAULT_FN_ATTRS128
1651_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1652{
1653 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1654 (__v4sf) __B,
1655 (__v4sf) __W,
1656 (__mmask8) __U);
1657}
1658
1659static __inline__ __m128 __DEFAULT_FN_ATTRS128
1660_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1661{
1662 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1663 (__v4sf) __B,
1664 (__v4sf) _mm_setzero_ps (),
1665 (__mmask8) __U);
1666}
1667
1668static __inline__ __m128d __DEFAULT_FN_ATTRS128
1669_mm_rcp14_sd(__m128d __A, __m128d __B)
1670{
1671 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1672 (__v2df) __B,
1673 (__v2df)
1674 _mm_setzero_pd (),
1675 (__mmask8) -1);
1676}
1677
1678static __inline__ __m128d __DEFAULT_FN_ATTRS128
1679_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1680{
1681 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1682 (__v2df) __B,
1683 (__v2df) __W,
1684 (__mmask8) __U);
1685}
1686
1687static __inline__ __m128d __DEFAULT_FN_ATTRS128
1688_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1689{
1690 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1691 (__v2df) __B,
1692 (__v2df) _mm_setzero_pd (),
1693 (__mmask8) __U);
1694}
1695
1696static __inline __m512 __DEFAULT_FN_ATTRS512
1698{
1699 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1701 (__v16sf) __A, (unsigned short)-1,
1703}
1704
1705static __inline__ __m512 __DEFAULT_FN_ATTRS512
1706_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1707{
1708 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1710 (__v16sf) __W, __U,
1712}
1713
1714static __inline __m512d __DEFAULT_FN_ATTRS512
1716{
1717 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1719 (__v8df) __A, (unsigned char)-1,
1721}
1722
1723static __inline__ __m512d __DEFAULT_FN_ATTRS512
1724_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1725{
1726 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1728 (__v8df) __W, __U,
1730}
1731
1732static __inline__ __m512 __DEFAULT_FN_ATTRS512
1733_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1734{
1735 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1737 (__v16sf) __W, __U,
1739}
1740
1741static __inline __m512 __DEFAULT_FN_ATTRS512
1743{
1744 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1746 (__v16sf) __A, (unsigned short)-1,
1748}
1749
1750static __inline __m512d __DEFAULT_FN_ATTRS512
1751_mm512_ceil_pd(__m512d __A)
1752{
1753 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1755 (__v8df) __A, (unsigned char)-1,
1757}
1758
1759static __inline__ __m512d __DEFAULT_FN_ATTRS512
1760_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1761{
1762 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1764 (__v8df) __W, __U,
1766}
1767
1768static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1769_mm512_abs_epi64(__m512i __A) {
1770 return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1771}
1772
1773static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1774_mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
1775 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1776 (__v8di)_mm512_abs_epi64(__A),
1777 (__v8di)__W);
1778}
1779
1780static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1782 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1783 (__v8di)_mm512_abs_epi64(__A),
1784 (__v8di)_mm512_setzero_si512());
1785}
1786
1787static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1788_mm512_abs_epi32(__m512i __A) {
1789 return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1790}
1791
1792static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1793_mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
1794 return (__m512i)__builtin_ia32_selectd_512(__U,
1795 (__v16si)_mm512_abs_epi32(__A),
1796 (__v16si)__W);
1797}
1798
1799static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1801 return (__m512i)__builtin_ia32_selectd_512(__U,
1802 (__v16si)_mm512_abs_epi32(__A),
1803 (__v16si)_mm512_setzero_si512());
1804}
1805
1806static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1807_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1808 __A = _mm_add_ss(__A, __B);
1809 return __builtin_ia32_selectss_128(__U, __A, __W);
1810}
1811
1812static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1813_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
1814 __A = _mm_add_ss(__A, __B);
1815 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1816}
1817
1818#define _mm_add_round_ss(A, B, R) \
1819 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1820 (__v4sf)(__m128)(B), \
1821 (__v4sf)_mm_setzero_ps(), \
1822 (__mmask8)-1, (int)(R)))
1823
1824#define _mm_mask_add_round_ss(W, U, A, B, R) \
1825 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1826 (__v4sf)(__m128)(B), \
1827 (__v4sf)(__m128)(W), (__mmask8)(U), \
1828 (int)(R)))
1829
1830#define _mm_maskz_add_round_ss(U, A, B, R) \
1831 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1832 (__v4sf)(__m128)(B), \
1833 (__v4sf)_mm_setzero_ps(), \
1834 (__mmask8)(U), (int)(R)))
1835
1836static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1837_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1838 __A = _mm_add_sd(__A, __B);
1839 return __builtin_ia32_selectsd_128(__U, __A, __W);
1840}
1841
1842static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1843_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
1844 __A = _mm_add_sd(__A, __B);
1845 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1846}
1847#define _mm_add_round_sd(A, B, R) \
1848 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1849 (__v2df)(__m128d)(B), \
1850 (__v2df)_mm_setzero_pd(), \
1851 (__mmask8)-1, (int)(R)))
1852
1853#define _mm_mask_add_round_sd(W, U, A, B, R) \
1854 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1855 (__v2df)(__m128d)(B), \
1856 (__v2df)(__m128d)(W), \
1857 (__mmask8)(U), (int)(R)))
1858
1859#define _mm_maskz_add_round_sd(U, A, B, R) \
1860 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1861 (__v2df)(__m128d)(B), \
1862 (__v2df)_mm_setzero_pd(), \
1863 (__mmask8)(U), (int)(R)))
1864
1865static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1866_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1867 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1868 (__v8df)_mm512_add_pd(__A, __B),
1869 (__v8df)__W);
1870}
1871
1872static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1873_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1874 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1875 (__v8df)_mm512_add_pd(__A, __B),
1876 (__v8df)_mm512_setzero_pd());
1877}
1878
1879static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1880_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1881 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1882 (__v16sf)_mm512_add_ps(__A, __B),
1883 (__v16sf)__W);
1884}
1885
1886static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1887_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1888 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1889 (__v16sf)_mm512_add_ps(__A, __B),
1890 (__v16sf)_mm512_setzero_ps());
1891}
1892
1893#define _mm512_add_round_pd(A, B, R) \
1894 ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1895 (__v8df)(__m512d)(B), (int)(R)))
1896
1897#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1898 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1899 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1900 (__v8df)(__m512d)(W)))
1901
1902#define _mm512_maskz_add_round_pd(U, A, B, R) \
1903 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1904 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1905 (__v8df)_mm512_setzero_pd()))
1906
1907#define _mm512_add_round_ps(A, B, R) \
1908 ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1909 (__v16sf)(__m512)(B), (int)(R)))
1910
1911#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1912 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1913 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1914 (__v16sf)(__m512)(W)))
1915
1916#define _mm512_maskz_add_round_ps(U, A, B, R) \
1917 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1918 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1919 (__v16sf)_mm512_setzero_ps()))
1920
1921static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1922_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1923 __A = _mm_sub_ss(__A, __B);
1924 return __builtin_ia32_selectss_128(__U, __A, __W);
1925}
1926
1927static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1928_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
1929 __A = _mm_sub_ss(__A, __B);
1930 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1931}
1932#define _mm_sub_round_ss(A, B, R) \
1933 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1934 (__v4sf)(__m128)(B), \
1935 (__v4sf)_mm_setzero_ps(), \
1936 (__mmask8)-1, (int)(R)))
1937
1938#define _mm_mask_sub_round_ss(W, U, A, B, R) \
1939 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1940 (__v4sf)(__m128)(B), \
1941 (__v4sf)(__m128)(W), (__mmask8)(U), \
1942 (int)(R)))
1943
1944#define _mm_maskz_sub_round_ss(U, A, B, R) \
1945 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
1946 (__v4sf)(__m128)(B), \
1947 (__v4sf)_mm_setzero_ps(), \
1948 (__mmask8)(U), (int)(R)))
1949
1950static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1951_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1952 __A = _mm_sub_sd(__A, __B);
1953 return __builtin_ia32_selectsd_128(__U, __A, __W);
1954}
1955
1956static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1957_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
1958 __A = _mm_sub_sd(__A, __B);
1959 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1960}
1961
1962#define _mm_sub_round_sd(A, B, R) \
1963 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1964 (__v2df)(__m128d)(B), \
1965 (__v2df)_mm_setzero_pd(), \
1966 (__mmask8)-1, (int)(R)))
1967
1968#define _mm_mask_sub_round_sd(W, U, A, B, R) \
1969 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1970 (__v2df)(__m128d)(B), \
1971 (__v2df)(__m128d)(W), \
1972 (__mmask8)(U), (int)(R)))
1973
1974#define _mm_maskz_sub_round_sd(U, A, B, R) \
1975 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
1976 (__v2df)(__m128d)(B), \
1977 (__v2df)_mm_setzero_pd(), \
1978 (__mmask8)(U), (int)(R)))
1979
1980static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1981_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1982 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1983 (__v8df)_mm512_sub_pd(__A, __B),
1984 (__v8df)__W);
1985}
1986
1987static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1988_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1989 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1990 (__v8df)_mm512_sub_pd(__A, __B),
1991 (__v8df)_mm512_setzero_pd());
1992}
1993
1994static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1995_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1996 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1997 (__v16sf)_mm512_sub_ps(__A, __B),
1998 (__v16sf)__W);
1999}
2000
2001static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2002_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2003 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2004 (__v16sf)_mm512_sub_ps(__A, __B),
2005 (__v16sf)_mm512_setzero_ps());
2006}
2007
2008#define _mm512_sub_round_pd(A, B, R) \
2009 ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2010 (__v8df)(__m512d)(B), (int)(R)))
2011
2012#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2013 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2014 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2015 (__v8df)(__m512d)(W)))
2016
2017#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2018 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2019 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2020 (__v8df)_mm512_setzero_pd()))
2021
2022#define _mm512_sub_round_ps(A, B, R) \
2023 ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2024 (__v16sf)(__m512)(B), (int)(R)))
2025
2026#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2027 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2028 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2029 (__v16sf)(__m512)(W)))
2030
2031#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2032 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2033 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2034 (__v16sf)_mm512_setzero_ps()))
2035
2036static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2037_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2038 __A = _mm_mul_ss(__A, __B);
2039 return __builtin_ia32_selectss_128(__U, __A, __W);
2040}
2041
2042static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2043_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
2044 __A = _mm_mul_ss(__A, __B);
2045 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2046}
2047#define _mm_mul_round_ss(A, B, R) \
2048 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2049 (__v4sf)(__m128)(B), \
2050 (__v4sf)_mm_setzero_ps(), \
2051 (__mmask8)-1, (int)(R)))
2052
2053#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2054 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2055 (__v4sf)(__m128)(B), \
2056 (__v4sf)(__m128)(W), (__mmask8)(U), \
2057 (int)(R)))
2058
2059#define _mm_maskz_mul_round_ss(U, A, B, R) \
2060 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2061 (__v4sf)(__m128)(B), \
2062 (__v4sf)_mm_setzero_ps(), \
2063 (__mmask8)(U), (int)(R)))
2064
2065static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2066_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2067 __A = _mm_mul_sd(__A, __B);
2068 return __builtin_ia32_selectsd_128(__U, __A, __W);
2069}
2070
2071static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2072_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
2073 __A = _mm_mul_sd(__A, __B);
2074 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2075}
2076
2077#define _mm_mul_round_sd(A, B, R) \
2078 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2079 (__v2df)(__m128d)(B), \
2080 (__v2df)_mm_setzero_pd(), \
2081 (__mmask8)-1, (int)(R)))
2082
2083#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2084 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2085 (__v2df)(__m128d)(B), \
2086 (__v2df)(__m128d)(W), \
2087 (__mmask8)(U), (int)(R)))
2088
2089#define _mm_maskz_mul_round_sd(U, A, B, R) \
2090 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2091 (__v2df)(__m128d)(B), \
2092 (__v2df)_mm_setzero_pd(), \
2093 (__mmask8)(U), (int)(R)))
2094
2095static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2096_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2097 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2098 (__v8df)_mm512_mul_pd(__A, __B),
2099 (__v8df)__W);
2100}
2101
2102static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2103_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2104 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2105 (__v8df)_mm512_mul_pd(__A, __B),
2106 (__v8df)_mm512_setzero_pd());
2107}
2108
2109static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2110_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2111 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2112 (__v16sf)_mm512_mul_ps(__A, __B),
2113 (__v16sf)__W);
2114}
2115
2116static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2117_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2118 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2119 (__v16sf)_mm512_mul_ps(__A, __B),
2120 (__v16sf)_mm512_setzero_ps());
2121}
2122
2123#define _mm512_mul_round_pd(A, B, R) \
2124 ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2125 (__v8df)(__m512d)(B), (int)(R)))
2126
2127#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2128 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2129 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2130 (__v8df)(__m512d)(W)))
2131
2132#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2133 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2134 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2135 (__v8df)_mm512_setzero_pd()))
2136
2137#define _mm512_mul_round_ps(A, B, R) \
2138 ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2139 (__v16sf)(__m512)(B), (int)(R)))
2140
2141#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2142 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2143 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2144 (__v16sf)(__m512)(W)))
2145
2146#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2147 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2148 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2149 (__v16sf)_mm512_setzero_ps()))
2150
2151static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2152_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2153 __A = _mm_div_ss(__A, __B);
2154 return __builtin_ia32_selectss_128(__U, __A, __W);
2155}
2156
2157static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2158_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
2159 __A = _mm_div_ss(__A, __B);
2160 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2161}
2162
2163#define _mm_div_round_ss(A, B, R) \
2164 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2165 (__v4sf)(__m128)(B), \
2166 (__v4sf)_mm_setzero_ps(), \
2167 (__mmask8)-1, (int)(R)))
2168
2169#define _mm_mask_div_round_ss(W, U, A, B, R) \
2170 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2171 (__v4sf)(__m128)(B), \
2172 (__v4sf)(__m128)(W), (__mmask8)(U), \
2173 (int)(R)))
2174
2175#define _mm_maskz_div_round_ss(U, A, B, R) \
2176 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2177 (__v4sf)(__m128)(B), \
2178 (__v4sf)_mm_setzero_ps(), \
2179 (__mmask8)(U), (int)(R)))
2180
2181static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2182_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2183 __A = _mm_div_sd(__A, __B);
2184 return __builtin_ia32_selectsd_128(__U, __A, __W);
2185}
2186
2187static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2188_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
2189 __A = _mm_div_sd(__A, __B);
2190 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2191}
2192
2193#define _mm_div_round_sd(A, B, R) \
2194 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2195 (__v2df)(__m128d)(B), \
2196 (__v2df)_mm_setzero_pd(), \
2197 (__mmask8)-1, (int)(R)))
2198
2199#define _mm_mask_div_round_sd(W, U, A, B, R) \
2200 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2201 (__v2df)(__m128d)(B), \
2202 (__v2df)(__m128d)(W), \
2203 (__mmask8)(U), (int)(R)))
2204
2205#define _mm_maskz_div_round_sd(U, A, B, R) \
2206 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2207 (__v2df)(__m128d)(B), \
2208 (__v2df)_mm_setzero_pd(), \
2209 (__mmask8)(U), (int)(R)))
2210
2211static __inline __m512d
2213 return (__m512d)((__v8df)__a/(__v8df)__b);
2214}
2215
2216static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2217_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2218 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2219 (__v8df)_mm512_div_pd(__A, __B),
2220 (__v8df)__W);
2221}
2222
2223static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2224_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2225 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2226 (__v8df)_mm512_div_pd(__A, __B),
2227 (__v8df)_mm512_setzero_pd());
2228}
2229
2230static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2231_mm512_div_ps(__m512 __a, __m512 __b) {
2232 return (__m512)((__v16sf)__a/(__v16sf)__b);
2233}
2234
2235static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2236_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2237 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2238 (__v16sf)_mm512_div_ps(__A, __B),
2239 (__v16sf)__W);
2240}
2241
2242static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2243_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2244 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2245 (__v16sf)_mm512_div_ps(__A, __B),
2246 (__v16sf)_mm512_setzero_ps());
2247}
2248
2249#define _mm512_div_round_pd(A, B, R) \
2250 ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2251 (__v8df)(__m512d)(B), (int)(R)))
2252
2253#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2254 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2255 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2256 (__v8df)(__m512d)(W)))
2257
2258#define _mm512_maskz_div_round_pd(U, A, B, R) \
2259 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2260 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2261 (__v8df)_mm512_setzero_pd()))
2262
2263#define _mm512_div_round_ps(A, B, R) \
2264 ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2265 (__v16sf)(__m512)(B), (int)(R)))
2266
2267#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2268 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2269 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2270 (__v16sf)(__m512)(W)))
2271
2272#define _mm512_maskz_div_round_ps(U, A, B, R) \
2273 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2274 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2275 (__v16sf)_mm512_setzero_ps()))
2276
2277#define _mm512_roundscale_ps(A, B) \
2278 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2279 (__v16sf)_mm512_undefined_ps(), \
2280 (__mmask16)-1, \
2281 _MM_FROUND_CUR_DIRECTION))
2282
2283#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2284 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2285 (__v16sf)(__m512)(A), (__mmask16)(B), \
2286 _MM_FROUND_CUR_DIRECTION))
2287
2288#define _mm512_maskz_roundscale_ps(A, B, imm) \
2289 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2290 (__v16sf)_mm512_setzero_ps(), \
2291 (__mmask16)(A), \
2292 _MM_FROUND_CUR_DIRECTION))
2293
2294#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2295 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2296 (__v16sf)(__m512)(A), (__mmask16)(B), \
2297 (int)(R)))
2298
2299#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2300 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2301 (__v16sf)_mm512_setzero_ps(), \
2302 (__mmask16)(A), (int)(R)))
2303
2304#define _mm512_roundscale_round_ps(A, imm, R) \
2305 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2306 (__v16sf)_mm512_undefined_ps(), \
2307 (__mmask16)-1, (int)(R)))
2308
2309#define _mm512_roundscale_pd(A, B) \
2310 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2311 (__v8df)_mm512_undefined_pd(), \
2312 (__mmask8)-1, \
2313 _MM_FROUND_CUR_DIRECTION))
2314
2315#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2316 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2317 (__v8df)(__m512d)(A), (__mmask8)(B), \
2318 _MM_FROUND_CUR_DIRECTION))
2319
2320#define _mm512_maskz_roundscale_pd(A, B, imm) \
2321 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2322 (__v8df)_mm512_setzero_pd(), \
2323 (__mmask8)(A), \
2324 _MM_FROUND_CUR_DIRECTION))
2325
2326#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2327 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2328 (__v8df)(__m512d)(A), (__mmask8)(B), \
2329 (int)(R)))
2330
2331#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2332 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2333 (__v8df)_mm512_setzero_pd(), \
2334 (__mmask8)(A), (int)(R)))
2335
2336#define _mm512_roundscale_round_pd(A, imm, R) \
2337 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2338 (__v8df)_mm512_undefined_pd(), \
2339 (__mmask8)-1, (int)(R)))
2340
2341#define _mm512_fmadd_round_pd(A, B, C, R) \
2342 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2343 (__v8df)(__m512d)(B), \
2344 (__v8df)(__m512d)(C), \
2345 (__mmask8)-1, (int)(R)))
2346
2347
2348#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2349 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2350 (__v8df)(__m512d)(B), \
2351 (__v8df)(__m512d)(C), \
2352 (__mmask8)(U), (int)(R)))
2353
2354
2355#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2356 ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2357 (__v8df)(__m512d)(B), \
2358 (__v8df)(__m512d)(C), \
2359 (__mmask8)(U), (int)(R)))
2360
2361
2362#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2363 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2364 (__v8df)(__m512d)(B), \
2365 (__v8df)(__m512d)(C), \
2366 (__mmask8)(U), (int)(R)))
2367
2368
2369#define _mm512_fmsub_round_pd(A, B, C, R) \
2370 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2371 (__v8df)(__m512d)(B), \
2372 -(__v8df)(__m512d)(C), \
2373 (__mmask8)-1, (int)(R)))
2374
2375
2376#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2377 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2378 (__v8df)(__m512d)(B), \
2379 -(__v8df)(__m512d)(C), \
2380 (__mmask8)(U), (int)(R)))
2381
2382
2383#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2384 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2385 (__v8df)(__m512d)(B), \
2386 -(__v8df)(__m512d)(C), \
2387 (__mmask8)(U), (int)(R)))
2388
2389
2390#define _mm512_fnmadd_round_pd(A, B, C, R) \
2391 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2392 (__v8df)(__m512d)(B), \
2393 (__v8df)(__m512d)(C), \
2394 (__mmask8)-1, (int)(R)))
2395
2396
2397#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2398 ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2399 (__v8df)(__m512d)(B), \
2400 (__v8df)(__m512d)(C), \
2401 (__mmask8)(U), (int)(R)))
2402
2403
2404#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2405 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2406 (__v8df)(__m512d)(B), \
2407 (__v8df)(__m512d)(C), \
2408 (__mmask8)(U), (int)(R)))
2409
2410
2411#define _mm512_fnmsub_round_pd(A, B, C, R) \
2412 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2413 (__v8df)(__m512d)(B), \
2414 -(__v8df)(__m512d)(C), \
2415 (__mmask8)-1, (int)(R)))
2416
2417
2418#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2419 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2420 (__v8df)(__m512d)(B), \
2421 -(__v8df)(__m512d)(C), \
2422 (__mmask8)(U), (int)(R)))
2423
2424static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2425_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
2426 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2427 (__v8df)__C);
2428}
2429
2430static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2431_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2432 return (__m512d)__builtin_ia32_selectpd_512(
2433 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__A);
2434}
2435
2436static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2437_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2438 return (__m512d)__builtin_ia32_selectpd_512(
2439 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__C);
2440}
2441
2442static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2443_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2444 return (__m512d)__builtin_ia32_selectpd_512(
2445 (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C),
2446 (__v8df)_mm512_setzero_pd());
2447}
2448
2449static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2450_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
2451 return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
2452 -(__v8df)__C);
2453}
2454
2455static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2456_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2457 return (__m512d)__builtin_ia32_selectpd_512(
2458 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__A);
2459}
2460
2461static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2462_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2463 return (__m512d)__builtin_ia32_selectpd_512(
2464 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__C);
2465}
2466
2467static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2468_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2469 return (__m512d)__builtin_ia32_selectpd_512(
2470 (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C),
2471 (__v8df)_mm512_setzero_pd());
2472}
2473
2474static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2475_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
2476 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2477 (__v8df)__C);
2478}
2479
2480static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2481_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2482 return (__m512d)__builtin_ia32_selectpd_512(
2483 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__A);
2484}
2485
2486static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2487_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2488 return (__m512d)__builtin_ia32_selectpd_512(
2489 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__C);
2490}
2491
2492static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2493_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2494 return (__m512d)__builtin_ia32_selectpd_512(
2495 (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C),
2496 (__v8df)_mm512_setzero_pd());
2497}
2498
2499static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2500_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
2501 return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
2502 -(__v8df)__C);
2503}
2504
2505static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2506_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
2507 return (__m512d)__builtin_ia32_selectpd_512(
2508 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__A);
2509}
2510
2511static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2512_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
2513 return (__m512d)__builtin_ia32_selectpd_512(
2514 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__C);
2515}
2516
2517static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
2518_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2519 return (__m512d)__builtin_ia32_selectpd_512(
2520 (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C),
2521 (__v8df)_mm512_setzero_pd());
2522}
2523
2524#define _mm512_fmadd_round_ps(A, B, C, R) \
2525 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2526 (__v16sf)(__m512)(B), \
2527 (__v16sf)(__m512)(C), \
2528 (__mmask16)-1, (int)(R)))
2529
2530
2531#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2532 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2533 (__v16sf)(__m512)(B), \
2534 (__v16sf)(__m512)(C), \
2535 (__mmask16)(U), (int)(R)))
2536
2537
2538#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2539 ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2540 (__v16sf)(__m512)(B), \
2541 (__v16sf)(__m512)(C), \
2542 (__mmask16)(U), (int)(R)))
2543
2544
2545#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2546 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2547 (__v16sf)(__m512)(B), \
2548 (__v16sf)(__m512)(C), \
2549 (__mmask16)(U), (int)(R)))
2550
2551
2552#define _mm512_fmsub_round_ps(A, B, C, R) \
2553 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2554 (__v16sf)(__m512)(B), \
2555 -(__v16sf)(__m512)(C), \
2556 (__mmask16)-1, (int)(R)))
2557
2558
2559#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2560 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2561 (__v16sf)(__m512)(B), \
2562 -(__v16sf)(__m512)(C), \
2563 (__mmask16)(U), (int)(R)))
2564
2565
2566#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2567 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2568 (__v16sf)(__m512)(B), \
2569 -(__v16sf)(__m512)(C), \
2570 (__mmask16)(U), (int)(R)))
2571
2572
2573#define _mm512_fnmadd_round_ps(A, B, C, R) \
2574 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2575 -(__v16sf)(__m512)(B), \
2576 (__v16sf)(__m512)(C), \
2577 (__mmask16)-1, (int)(R)))
2578
2579
2580#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2581 ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2582 (__v16sf)(__m512)(B), \
2583 (__v16sf)(__m512)(C), \
2584 (__mmask16)(U), (int)(R)))
2585
2586
2587#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2588 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2589 (__v16sf)(__m512)(B), \
2590 (__v16sf)(__m512)(C), \
2591 (__mmask16)(U), (int)(R)))
2592
2593
2594#define _mm512_fnmsub_round_ps(A, B, C, R) \
2595 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2596 -(__v16sf)(__m512)(B), \
2597 -(__v16sf)(__m512)(C), \
2598 (__mmask16)-1, (int)(R)))
2599
2600
2601#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2602 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2603 (__v16sf)(__m512)(B), \
2604 -(__v16sf)(__m512)(C), \
2605 (__mmask16)(U), (int)(R)))
2606
2607static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2608_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
2609 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2610 (__v16sf)__C);
2611}
2612
2613static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2614_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2615 return (__m512)__builtin_ia32_selectps_512(
2616 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__A);
2617}
2618
2619static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2620_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2621 return (__m512)__builtin_ia32_selectps_512(
2622 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__C);
2623}
2624
2625static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2626_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2627 return (__m512)__builtin_ia32_selectps_512(
2628 (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C),
2629 (__v16sf)_mm512_setzero_ps());
2630}
2631
2632static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2633_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
2634 return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
2635 -(__v16sf)__C);
2636}
2637
2638static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2639_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2640 return (__m512)__builtin_ia32_selectps_512(
2641 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__A);
2642}
2643
2644static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2645_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2646 return (__m512)__builtin_ia32_selectps_512(
2647 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__C);
2648}
2649
2650static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2651_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2652 return (__m512)__builtin_ia32_selectps_512(
2653 (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C),
2654 (__v16sf)_mm512_setzero_ps());
2655}
2656
2657static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2658_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
2659 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2660 (__v16sf)__C);
2661}
2662
2663static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2664_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2665 return (__m512)__builtin_ia32_selectps_512(
2666 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__A);
2667}
2668
2669static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2670_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2671 return (__m512)__builtin_ia32_selectps_512(
2672 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__C);
2673}
2674
2675static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2676_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2677 return (__m512)__builtin_ia32_selectps_512(
2678 (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C),
2679 (__v16sf)_mm512_setzero_ps());
2680}
2681
2682static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2683_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
2684 return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
2685 -(__v16sf)__C);
2686}
2687
2688static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2689_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
2690 return (__m512)__builtin_ia32_selectps_512(
2691 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__A);
2692}
2693
2694static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2695_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
2696 return (__m512)__builtin_ia32_selectps_512(
2697 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__C);
2698}
2699
2700static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2701_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2702 return (__m512)__builtin_ia32_selectps_512(
2703 (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C),
2704 (__v16sf)_mm512_setzero_ps());
2705}
2706
2707#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2708 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2709 (__v8df)(__m512d)(B), \
2710 (__v8df)(__m512d)(C), \
2711 (__mmask8)-1, (int)(R)))
2712
2713
2714#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2715 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2716 (__v8df)(__m512d)(B), \
2717 (__v8df)(__m512d)(C), \
2718 (__mmask8)(U), (int)(R)))
2719
2720
2721#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2722 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2723 (__v8df)(__m512d)(B), \
2724 (__v8df)(__m512d)(C), \
2725 (__mmask8)(U), (int)(R)))
2726
2727
2728#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2729 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2730 (__v8df)(__m512d)(B), \
2731 (__v8df)(__m512d)(C), \
2732 (__mmask8)(U), (int)(R)))
2733
2734
2735#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2736 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2737 (__v8df)(__m512d)(B), \
2738 -(__v8df)(__m512d)(C), \
2739 (__mmask8)-1, (int)(R)))
2740
2741
2742#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2743 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2744 (__v8df)(__m512d)(B), \
2745 -(__v8df)(__m512d)(C), \
2746 (__mmask8)(U), (int)(R)))
2747
2748
2749#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2750 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2751 (__v8df)(__m512d)(B), \
2752 -(__v8df)(__m512d)(C), \
2753 (__mmask8)(U), (int)(R)))
2754
2755
2756static __inline__ __m512d __DEFAULT_FN_ATTRS512
2757_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2758{
2759 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2760 (__v8df) __B,
2761 (__v8df) __C,
2762 (__mmask8) -1,
2764}
2765
2766static __inline__ __m512d __DEFAULT_FN_ATTRS512
2767_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2768{
2769 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2770 (__v8df) __B,
2771 (__v8df) __C,
2772 (__mmask8) __U,
2774}
2775
2776static __inline__ __m512d __DEFAULT_FN_ATTRS512
2777_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2778{
2779 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2780 (__v8df) __B,
2781 (__v8df) __C,
2782 (__mmask8) __U,
2784}
2785
2786static __inline__ __m512d __DEFAULT_FN_ATTRS512
2787_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2788{
2789 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2790 (__v8df) __B,
2791 (__v8df) __C,
2792 (__mmask8) __U,
2794}
2795
2796static __inline__ __m512d __DEFAULT_FN_ATTRS512
2797_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2798{
2799 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2800 (__v8df) __B,
2801 -(__v8df) __C,
2802 (__mmask8) -1,
2804}
2805
2806static __inline__ __m512d __DEFAULT_FN_ATTRS512
2807_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2808{
2809 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2810 (__v8df) __B,
2811 -(__v8df) __C,
2812 (__mmask8) __U,
2814}
2815
2816static __inline__ __m512d __DEFAULT_FN_ATTRS512
2817_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2818{
2819 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2820 (__v8df) __B,
2821 -(__v8df) __C,
2822 (__mmask8) __U,
2824}
2825
2826#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2827 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2828 (__v16sf)(__m512)(B), \
2829 (__v16sf)(__m512)(C), \
2830 (__mmask16)-1, (int)(R)))
2831
2832
2833#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2834 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2835 (__v16sf)(__m512)(B), \
2836 (__v16sf)(__m512)(C), \
2837 (__mmask16)(U), (int)(R)))
2838
2839
2840#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2841 ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2842 (__v16sf)(__m512)(B), \
2843 (__v16sf)(__m512)(C), \
2844 (__mmask16)(U), (int)(R)))
2845
2846
2847#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2848 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2849 (__v16sf)(__m512)(B), \
2850 (__v16sf)(__m512)(C), \
2851 (__mmask16)(U), (int)(R)))
2852
2853
2854#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2855 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2856 (__v16sf)(__m512)(B), \
2857 -(__v16sf)(__m512)(C), \
2858 (__mmask16)-1, (int)(R)))
2859
2860
2861#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2862 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2863 (__v16sf)(__m512)(B), \
2864 -(__v16sf)(__m512)(C), \
2865 (__mmask16)(U), (int)(R)))
2866
2867
2868#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2869 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2870 (__v16sf)(__m512)(B), \
2871 -(__v16sf)(__m512)(C), \
2872 (__mmask16)(U), (int)(R)))
2873
2874
2875static __inline__ __m512 __DEFAULT_FN_ATTRS512
2876_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
2877{
2878 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2879 (__v16sf) __B,
2880 (__v16sf) __C,
2881 (__mmask16) -1,
2883}
2884
2885static __inline__ __m512 __DEFAULT_FN_ATTRS512
2886_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2887{
2888 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2889 (__v16sf) __B,
2890 (__v16sf) __C,
2891 (__mmask16) __U,
2893}
2894
2895static __inline__ __m512 __DEFAULT_FN_ATTRS512
2896_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2897{
2898 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2899 (__v16sf) __B,
2900 (__v16sf) __C,
2901 (__mmask16) __U,
2903}
2904
2905static __inline__ __m512 __DEFAULT_FN_ATTRS512
2906_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2907{
2908 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2909 (__v16sf) __B,
2910 (__v16sf) __C,
2911 (__mmask16) __U,
2913}
2914
2915static __inline__ __m512 __DEFAULT_FN_ATTRS512
2916_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
2917{
2918 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2919 (__v16sf) __B,
2920 -(__v16sf) __C,
2921 (__mmask16) -1,
2923}
2924
2925static __inline__ __m512 __DEFAULT_FN_ATTRS512
2926_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2927{
2928 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2929 (__v16sf) __B,
2930 -(__v16sf) __C,
2931 (__mmask16) __U,
2933}
2934
2935static __inline__ __m512 __DEFAULT_FN_ATTRS512
2936_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2937{
2938 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2939 (__v16sf) __B,
2940 -(__v16sf) __C,
2941 (__mmask16) __U,
2943}
2944
2945#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
2946 ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
2947 (__v8df)(__m512d)(B), \
2948 (__v8df)(__m512d)(C), \
2949 (__mmask8)(U), (int)(R)))
2950
2951#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
2952 ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
2953 (__v16sf)(__m512)(B), \
2954 (__v16sf)(__m512)(C), \
2955 (__mmask16)(U), (int)(R)))
2956
2957#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
2958 ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
2959 (__v8df)(__m512d)(B), \
2960 (__v8df)(__m512d)(C), \
2961 (__mmask8)(U), (int)(R)))
2962
2963
2964static __inline__ __m512d __DEFAULT_FN_ATTRS512
2965_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2966{
2967 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2968 (__v8df) __B,
2969 (__v8df) __C,
2970 (__mmask8) __U,
2972}
2973
2974#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
2975 ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
2976 (__v16sf)(__m512)(B), \
2977 (__v16sf)(__m512)(C), \
2978 (__mmask16)(U), (int)(R)))
2979
2980
2981static __inline__ __m512 __DEFAULT_FN_ATTRS512
2982_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2983{
2984 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2985 (__v16sf) __B,
2986 (__v16sf) __C,
2987 (__mmask16) __U,
2989}
2990
2991#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
2992 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2993 -(__v8df)(__m512d)(B), \
2994 (__v8df)(__m512d)(C), \
2995 (__mmask8)(U), (int)(R)))
2996
2997#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
2998 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2999 -(__v16sf)(__m512)(B), \
3000 (__v16sf)(__m512)(C), \
3001 (__mmask16)(U), (int)(R)))
3002
3003#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3004 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3005 -(__v8df)(__m512d)(B), \
3006 -(__v8df)(__m512d)(C), \
3007 (__mmask8)(U), (int)(R)))
3008
3009
3010#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3011 ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3012 (__v8df)(__m512d)(B), \
3013 (__v8df)(__m512d)(C), \
3014 (__mmask8)(U), (int)(R)))
3015
3016#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3017 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3018 -(__v16sf)(__m512)(B), \
3019 -(__v16sf)(__m512)(C), \
3020 (__mmask16)(U), (int)(R)))
3021
3022
3023#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3024 ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3025 (__v16sf)(__m512)(B), \
3026 (__v16sf)(__m512)(C), \
3027 (__mmask16)(U), (int)(R)))
3028
3029/* Vector permutations */
3030
3031static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3032_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) {
3033 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3034 (__v16si) __B);
3035}
3036
3037static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3038_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3039 __m512i __B) {
3040 return (__m512i)__builtin_ia32_selectd_512(__U,
3041 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3042 (__v16si)__A);
3043}
3044
3045static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3046_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3047 __m512i __B) {
3048 return (__m512i)__builtin_ia32_selectd_512(__U,
3049 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3050 (__v16si)__I);
3051}
3052
3053static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3054_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3055 __m512i __B) {
3056 return (__m512i)__builtin_ia32_selectd_512(__U,
3057 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3058 (__v16si)_mm512_setzero_si512());
3059}
3060
3061static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3062_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) {
3063 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3064 (__v8di) __B);
3065}
3066
3067static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3068_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3069 __m512i __B) {
3070 return (__m512i)__builtin_ia32_selectq_512(__U,
3071 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3072 (__v8di)__A);
3073}
3074
3075static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3076_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3077 __m512i __B) {
3078 return (__m512i)__builtin_ia32_selectq_512(__U,
3079 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3080 (__v8di)__I);
3081}
3082
3083static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3084_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3085 __m512i __B) {
3086 return (__m512i)__builtin_ia32_selectq_512(__U,
3087 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3088 (__v8di)_mm512_setzero_si512());
3089}
3090
3091#define _mm512_alignr_epi64(A, B, I) \
3092 ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3093 (__v8di)(__m512i)(B), (int)(I)))
3094
3095#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3096 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3097 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3098 (__v8di)(__m512i)(W)))
3099
3100#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3101 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3102 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3103 (__v8di)_mm512_setzero_si512()))
3104
3105#define _mm512_alignr_epi32(A, B, I) \
3106 ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3107 (__v16si)(__m512i)(B), (int)(I)))
3108
3109#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3110 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3111 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3112 (__v16si)(__m512i)(W)))
3113
3114#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3115 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3116 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3117 (__v16si)_mm512_setzero_si512()))
3118/* Vector Extract */
3119
3120#define _mm512_extractf64x4_pd(A, I) \
3121 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3122 (__v4df)_mm256_setzero_pd(), \
3123 (__mmask8) - 1))
3124
3125#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3126 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3127 (__v4df)(__m256d)(W), \
3128 (__mmask8)(U)))
3129
3130#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3131 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3132 (__v4df)_mm256_setzero_pd(), \
3133 (__mmask8)(U)))
3134
3135#define _mm512_extractf32x4_ps(A, I) \
3136 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3137 (__v4sf)_mm_setzero_ps(), \
3138 (__mmask8) - 1))
3139
3140#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3141 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3142 (__v4sf)(__m128)(W), \
3143 (__mmask8)(U)))
3144
3145#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3146 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3147 (__v4sf)_mm_setzero_ps(), \
3148 (__mmask8)(U)))
3149
3150/* Vector Blend */
3151
3152static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3153_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
3154 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3155 (__v8df) __W,
3156 (__v8df) __A);
3157}
3158
3159static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3160_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) {
3161 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3162 (__v16sf) __W,
3163 (__v16sf) __A);
3164}
3165
3166static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3167_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
3168 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3169 (__v8di) __W,
3170 (__v8di) __A);
3171}
3172
3173static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3174_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
3175 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3176 (__v16si) __W,
3177 (__v16si) __A);
3178}
3179
3180/* Compare */
3181
3182#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3183 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3184 (__v16sf)(__m512)(B), (int)(P), \
3185 (__mmask16)-1, (int)(R)))
3186
3187#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3188 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3189 (__v16sf)(__m512)(B), (int)(P), \
3190 (__mmask16)(U), (int)(R)))
3191
3192#define _mm512_cmp_ps_mask(A, B, P) \
3193 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3194#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3195 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3196
3197#define _mm512_cmpeq_ps_mask(A, B) \
3198 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3199#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3200 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3201
3202#define _mm512_cmplt_ps_mask(A, B) \
3203 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3204#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3205 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3206
3207#define _mm512_cmple_ps_mask(A, B) \
3208 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3209#define _mm512_mask_cmple_ps_mask(k, A, B) \
3210 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3211
3212#define _mm512_cmpunord_ps_mask(A, B) \
3213 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3214#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3215 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3216
3217#define _mm512_cmpneq_ps_mask(A, B) \
3218 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3219#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3220 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3221
3222#define _mm512_cmpnlt_ps_mask(A, B) \
3223 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3224#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3225 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3226
3227#define _mm512_cmpnle_ps_mask(A, B) \
3228 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3229#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3230 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3231
3232#define _mm512_cmpord_ps_mask(A, B) \
3233 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3234#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3235 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3236
3237#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3238 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3239 (__v8df)(__m512d)(B), (int)(P), \
3240 (__mmask8)-1, (int)(R)))
3241
3242#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3243 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3244 (__v8df)(__m512d)(B), (int)(P), \
3245 (__mmask8)(U), (int)(R)))
3246
3247#define _mm512_cmp_pd_mask(A, B, P) \
3248 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3249#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3250 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3251
3252#define _mm512_cmpeq_pd_mask(A, B) \
3253 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3254#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3255 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3256
3257#define _mm512_cmplt_pd_mask(A, B) \
3258 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3259#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3260 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3261
3262#define _mm512_cmple_pd_mask(A, B) \
3263 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3264#define _mm512_mask_cmple_pd_mask(k, A, B) \
3265 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3266
3267#define _mm512_cmpunord_pd_mask(A, B) \
3268 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3269#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3270 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3271
3272#define _mm512_cmpneq_pd_mask(A, B) \
3273 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3274#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3275 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3276
3277#define _mm512_cmpnlt_pd_mask(A, B) \
3278 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3279#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3280 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3281
3282#define _mm512_cmpnle_pd_mask(A, B) \
3283 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3284#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3285 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3286
3287#define _mm512_cmpord_pd_mask(A, B) \
3288 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3289#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3290 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3291
3292/* Conversion */
3293
3294#define _mm512_cvtt_roundps_epu32(A, R) \
3295 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3296 (__v16si)_mm512_undefined_epi32(), \
3297 (__mmask16)-1, (int)(R)))
3298
3299#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3300 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3301 (__v16si)(__m512i)(W), \
3302 (__mmask16)(U), (int)(R)))
3303
3304#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3305 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3306 (__v16si)_mm512_setzero_si512(), \
3307 (__mmask16)(U), (int)(R)))
3308
3309
3310static __inline __m512i __DEFAULT_FN_ATTRS512
3312{
3313 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3314 (__v16si)
3316 (__mmask16) -1,
3318}
3319
3320static __inline__ __m512i __DEFAULT_FN_ATTRS512
3321_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3322{
3323 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3324 (__v16si) __W,
3325 (__mmask16) __U,
3327}
3328
3329static __inline__ __m512i __DEFAULT_FN_ATTRS512
3331{
3332 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3333 (__v16si) _mm512_setzero_si512 (),
3334 (__mmask16) __U,
3336}
3337
3338#define _mm512_cvt_roundepi32_ps(A, R) \
3339 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3340 (__v16sf)_mm512_setzero_ps(), \
3341 (__mmask16)-1, (int)(R)))
3342
3343#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3344 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3345 (__v16sf)(__m512)(W), \
3346 (__mmask16)(U), (int)(R)))
3347
3348#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3349 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3350 (__v16sf)_mm512_setzero_ps(), \
3351 (__mmask16)(U), (int)(R)))
3352
3353#define _mm512_cvt_roundepu32_ps(A, R) \
3354 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3355 (__v16sf)_mm512_setzero_ps(), \
3356 (__mmask16)-1, (int)(R)))
3357
3358#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3359 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3360 (__v16sf)(__m512)(W), \
3361 (__mmask16)(U), (int)(R)))
3362
3363#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3364 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3365 (__v16sf)_mm512_setzero_ps(), \
3366 (__mmask16)(U), (int)(R)))
3367
3368static __inline__ __m512
3370 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3371}
3372
3373static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3374_mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
3375 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3376 (__v16sf)_mm512_cvtepu32_ps(__A),
3377 (__v16sf)__W);
3378}
3379
3380static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3382 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3383 (__v16sf)_mm512_cvtepu32_ps(__A),
3384 (__v16sf)_mm512_setzero_ps());
3385}
3386
3387static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3389 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3390}
3391
3392static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3393_mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
3394 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3395 (__v8df)_mm512_cvtepi32_pd(__A),
3396 (__v8df)__W);
3397}
3398
3399static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3401 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3402 (__v8df)_mm512_cvtepi32_pd(__A),
3403 (__v8df)_mm512_setzero_pd());
3404}
3405
3406static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3408 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3409}
3410
3411static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3412_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
3413 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3414}
3415
3416static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3418 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3419}
3420
3421static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3422_mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
3423 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3424 (__v16sf)_mm512_cvtepi32_ps(__A),
3425 (__v16sf)__W);
3426}
3427
3428static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3430 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3431 (__v16sf)_mm512_cvtepi32_ps(__A),
3432 (__v16sf)_mm512_setzero_ps());
3433}
3434
3435static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3437 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3438}
3439
3440static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3441_mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
3442 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3443 (__v8df)_mm512_cvtepu32_pd(__A),
3444 (__v8df)__W);
3445}
3446
3447static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3449 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3450 (__v8df)_mm512_cvtepu32_pd(__A),
3451 (__v8df)_mm512_setzero_pd());
3452}
3453
3454static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3456 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3457}
3458
3459static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3460_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
3461 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3462}
3463
3464#define _mm512_cvt_roundpd_ps(A, R) \
3465 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3466 (__v8sf)_mm256_setzero_ps(), \
3467 (__mmask8)-1, (int)(R)))
3468
3469#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3470 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3471 (__v8sf)(__m256)(W), (__mmask8)(U), \
3472 (int)(R)))
3473
3474#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3475 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3476 (__v8sf)_mm256_setzero_ps(), \
3477 (__mmask8)(U), (int)(R)))
3478
3479static __inline__ __m256
3481 return (__m256)__builtin_ia32_cvtpd2ps512_mask(
3482 (__v8df)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)-1,
3484}
3485
3486static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
3487_mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A) {
3488 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3489 (__v8sf) __W,
3490 (__mmask8) __U,
3492}
3493
3494static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
3496 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3497 (__v8sf) _mm256_setzero_ps (),
3498 (__mmask8) __U,
3500}
3501
3502static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3503_mm512_cvtpd_pslo(__m512d __A) {
3504 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3505 (__v8sf) _mm256_setzero_ps (),
3506 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3507}
3508
3509static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3510_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) {
3511 return (__m512) __builtin_shufflevector (
3513 __U, __A),
3514 (__v8sf) _mm256_setzero_ps (),
3515 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3516}
3517
3518#define _mm512_cvt_roundps_ph(A, I) \
3519 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3520 (__v16hi)_mm256_undefined_si256(), \
3521 (__mmask16)-1))
3522
3523#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3524 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3525 (__v16hi)(__m256i)(U), \
3526 (__mmask16)(W)))
3527
3528#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3529 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3530 (__v16hi)_mm256_setzero_si256(), \
3531 (__mmask16)(W)))
3532
3533#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3534#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3535#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3536
3537#define _mm512_cvt_roundph_ps(A, R) \
3538 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3539 (__v16sf)_mm512_undefined_ps(), \
3540 (__mmask16)-1, (int)(R)))
3541
3542#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3543 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3544 (__v16sf)(__m512)(W), \
3545 (__mmask16)(U), (int)(R)))
3546
3547#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3548 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3549 (__v16sf)_mm512_setzero_ps(), \
3550 (__mmask16)(U), (int)(R)))
3551
3552
3553static __inline __m512 __DEFAULT_FN_ATTRS512
3555{
3556 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3557 (__v16sf)
3559 (__mmask16) -1,
3561}
3562
3563static __inline__ __m512 __DEFAULT_FN_ATTRS512
3564_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3565{
3566 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3567 (__v16sf) __W,
3568 (__mmask16) __U,
3570}
3571
3572static __inline__ __m512 __DEFAULT_FN_ATTRS512
3574{
3575 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3576 (__v16sf) _mm512_setzero_ps (),
3577 (__mmask16) __U,
3579}
3580
3581#define _mm512_cvtt_roundpd_epi32(A, R) \
3582 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3583 (__v8si)_mm256_setzero_si256(), \
3584 (__mmask8)-1, (int)(R)))
3585
3586#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3587 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3588 (__v8si)(__m256i)(W), \
3589 (__mmask8)(U), (int)(R)))
3590
3591#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3592 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3593 (__v8si)_mm256_setzero_si256(), \
3594 (__mmask8)(U), (int)(R)))
3595
3596static __inline __m256i __DEFAULT_FN_ATTRS512
3598{
3599 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3600 (__v8si)_mm256_setzero_si256(),
3601 (__mmask8) -1,
3603}
3604
3605static __inline__ __m256i __DEFAULT_FN_ATTRS512
3606_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3607{
3608 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3609 (__v8si) __W,
3610 (__mmask8) __U,
3612}
3613
3614static __inline__ __m256i __DEFAULT_FN_ATTRS512
3616{
3617 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3618 (__v8si) _mm256_setzero_si256 (),
3619 (__mmask8) __U,
3621}
3622
3623#define _mm512_cvtt_roundps_epi32(A, R) \
3624 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3625 (__v16si)_mm512_setzero_si512(), \
3626 (__mmask16)-1, (int)(R)))
3627
3628#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3629 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3630 (__v16si)(__m512i)(W), \
3631 (__mmask16)(U), (int)(R)))
3632
3633#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3634 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3635 (__v16si)_mm512_setzero_si512(), \
3636 (__mmask16)(U), (int)(R)))
3637
3638static __inline __m512i __DEFAULT_FN_ATTRS512
3640{
3641 return (__m512i)
3642 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3643 (__v16si) _mm512_setzero_si512 (),
3645}
3646
3647static __inline__ __m512i __DEFAULT_FN_ATTRS512
3648_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3649{
3650 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3651 (__v16si) __W,
3652 (__mmask16) __U,
3654}
3655
3656static __inline__ __m512i __DEFAULT_FN_ATTRS512
3658{
3659 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3660 (__v16si) _mm512_setzero_si512 (),
3661 (__mmask16) __U,
3663}
3664
3665#define _mm512_cvt_roundps_epi32(A, R) \
3666 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3667 (__v16si)_mm512_setzero_si512(), \
3668 (__mmask16)-1, (int)(R)))
3669
3670#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3671 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3672 (__v16si)(__m512i)(W), \
3673 (__mmask16)(U), (int)(R)))
3674
3675#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3676 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3677 (__v16si)_mm512_setzero_si512(), \
3678 (__mmask16)(U), (int)(R)))
3679
3680static __inline__ __m512i __DEFAULT_FN_ATTRS512
3682{
3683 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3684 (__v16si) _mm512_undefined_epi32 (),
3685 (__mmask16) -1,
3687}
3688
3689static __inline__ __m512i __DEFAULT_FN_ATTRS512
3690_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3691{
3692 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3693 (__v16si) __W,
3694 (__mmask16) __U,
3696}
3697
3698static __inline__ __m512i __DEFAULT_FN_ATTRS512
3700{
3701 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3702 (__v16si)
3704 (__mmask16) __U,
3706}
3707
3708#define _mm512_cvt_roundpd_epi32(A, R) \
3709 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3710 (__v8si)_mm256_setzero_si256(), \
3711 (__mmask8)-1, (int)(R)))
3712
3713#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3714 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3715 (__v8si)(__m256i)(W), \
3716 (__mmask8)(U), (int)(R)))
3717
3718#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3719 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3720 (__v8si)_mm256_setzero_si256(), \
3721 (__mmask8)(U), (int)(R)))
3722
3723static __inline__ __m256i __DEFAULT_FN_ATTRS512
3725{
3726 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3727 (__v8si)
3729 (__mmask8) -1,
3731}
3732
3733static __inline__ __m256i __DEFAULT_FN_ATTRS512
3734_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3735{
3736 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3737 (__v8si) __W,
3738 (__mmask8) __U,
3740}
3741
3742static __inline__ __m256i __DEFAULT_FN_ATTRS512
3744{
3745 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3746 (__v8si)
3748 (__mmask8) __U,
3750}
3751
3752#define _mm512_cvt_roundps_epu32(A, R) \
3753 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3754 (__v16si)_mm512_setzero_si512(), \
3755 (__mmask16)-1, (int)(R)))
3756
3757#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
3758 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3759 (__v16si)(__m512i)(W), \
3760 (__mmask16)(U), (int)(R)))
3761
3762#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
3763 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3764 (__v16si)_mm512_setzero_si512(), \
3765 (__mmask16)(U), (int)(R)))
3766
3767static __inline__ __m512i __DEFAULT_FN_ATTRS512
3769{
3770 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
3771 (__v16si)\
3773 (__mmask16) -1,\
3775}
3776
3777static __inline__ __m512i __DEFAULT_FN_ATTRS512
3778_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3779{
3780 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3781 (__v16si) __W,
3782 (__mmask16) __U,
3784}
3785
3786static __inline__ __m512i __DEFAULT_FN_ATTRS512
3788{
3789 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
3790 (__v16si)
3792 (__mmask16) __U ,
3794}
3795
3796#define _mm512_cvt_roundpd_epu32(A, R) \
3797 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3798 (__v8si)_mm256_setzero_si256(), \
3799 (__mmask8)-1, (int)(R)))
3800
3801#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
3802 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3803 (__v8si)(__m256i)(W), \
3804 (__mmask8)(U), (int)(R)))
3805
3806#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
3807 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
3808 (__v8si)_mm256_setzero_si256(), \
3809 (__mmask8)(U), (int)(R)))
3810
3811static __inline__ __m256i __DEFAULT_FN_ATTRS512
3813{
3814 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3815 (__v8si)
3817 (__mmask8) -1,
3819}
3820
3821static __inline__ __m256i __DEFAULT_FN_ATTRS512
3822_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
3823{
3824 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3825 (__v8si) __W,
3826 (__mmask8) __U,
3828}
3829
3830static __inline__ __m256i __DEFAULT_FN_ATTRS512
3832{
3833 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
3834 (__v8si)
3836 (__mmask8) __U,
3838}
3839
3840static __inline__ double __DEFAULT_FN_ATTRS512
3842{
3843 return __a[0];
3844}
3845
3846static __inline__ float __DEFAULT_FN_ATTRS512
3848{
3849 return __a[0];
3850}
3851
3852/* Unpack and Interleave */
3853
3854static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3855_mm512_unpackhi_pd(__m512d __a, __m512d __b) {
3856 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
3857 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
3858}
3859
3860static __inline__ __m512d __DEFAULT_FN_ATTRS512
3861_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
3862{
3863 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3864 (__v8df)_mm512_unpackhi_pd(__A, __B),
3865 (__v8df)__W);
3866}
3867
3868static __inline__ __m512d __DEFAULT_FN_ATTRS512
3869_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
3870{
3871 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3872 (__v8df)_mm512_unpackhi_pd(__A, __B),
3873 (__v8df)_mm512_setzero_pd());
3874}
3875
3876static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3877_mm512_unpacklo_pd(__m512d __a, __m512d __b) {
3878 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
3879 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
3880}
3881
3882static __inline__ __m512d __DEFAULT_FN_ATTRS512
3883_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
3884{
3885 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3886 (__v8df)_mm512_unpacklo_pd(__A, __B),
3887 (__v8df)__W);
3888}
3889
3890static __inline__ __m512d __DEFAULT_FN_ATTRS512
3891_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
3892{
3893 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3894 (__v8df)_mm512_unpacklo_pd(__A, __B),
3895 (__v8df)_mm512_setzero_pd());
3896}
3897
3898static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3899_mm512_unpackhi_ps(__m512 __a, __m512 __b) {
3900 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
3901 2, 18, 3, 19,
3902 2+4, 18+4, 3+4, 19+4,
3903 2+8, 18+8, 3+8, 19+8,
3904 2+12, 18+12, 3+12, 19+12);
3905}
3906
3907static __inline__ __m512 __DEFAULT_FN_ATTRS512
3908_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
3909{
3910 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3911 (__v16sf)_mm512_unpackhi_ps(__A, __B),
3912 (__v16sf)__W);
3913}
3914
3915static __inline__ __m512 __DEFAULT_FN_ATTRS512
3916_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
3917{
3918 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3919 (__v16sf)_mm512_unpackhi_ps(__A, __B),
3920 (__v16sf)_mm512_setzero_ps());
3921}
3922
3923static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3924_mm512_unpacklo_ps(__m512 __a, __m512 __b) {
3925 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
3926 0, 16, 1, 17,
3927 0+4, 16+4, 1+4, 17+4,
3928 0+8, 16+8, 1+8, 17+8,
3929 0+12, 16+12, 1+12, 17+12);
3930}
3931
3932static __inline__ __m512 __DEFAULT_FN_ATTRS512
3933_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
3934{
3935 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3936 (__v16sf)_mm512_unpacklo_ps(__A, __B),
3937 (__v16sf)__W);
3938}
3939
3940static __inline__ __m512 __DEFAULT_FN_ATTRS512
3941_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
3942{
3943 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
3944 (__v16sf)_mm512_unpacklo_ps(__A, __B),
3945 (__v16sf)_mm512_setzero_ps());
3946}
3947
3948static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3949_mm512_unpackhi_epi32(__m512i __A, __m512i __B) {
3950 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3951 2, 18, 3, 19,
3952 2+4, 18+4, 3+4, 19+4,
3953 2+8, 18+8, 3+8, 19+8,
3954 2+12, 18+12, 3+12, 19+12);
3955}
3956
3957static __inline__ __m512i __DEFAULT_FN_ATTRS512
3958_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
3959{
3960 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3961 (__v16si)_mm512_unpackhi_epi32(__A, __B),
3962 (__v16si)__W);
3963}
3964
3965static __inline__ __m512i __DEFAULT_FN_ATTRS512
3966_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
3967{
3968 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3969 (__v16si)_mm512_unpackhi_epi32(__A, __B),
3970 (__v16si)_mm512_setzero_si512());
3971}
3972
3973static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3974_mm512_unpacklo_epi32(__m512i __A, __m512i __B) {
3975 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
3976 0, 16, 1, 17,
3977 0+4, 16+4, 1+4, 17+4,
3978 0+8, 16+8, 1+8, 17+8,
3979 0+12, 16+12, 1+12, 17+12);
3980}
3981
3982static __inline__ __m512i __DEFAULT_FN_ATTRS512
3983_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
3984{
3985 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3986 (__v16si)_mm512_unpacklo_epi32(__A, __B),
3987 (__v16si)__W);
3988}
3989
3990static __inline__ __m512i __DEFAULT_FN_ATTRS512
3991_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
3992{
3993 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
3994 (__v16si)_mm512_unpacklo_epi32(__A, __B),
3995 (__v16si)_mm512_setzero_si512());
3996}
3997
3998static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3999_mm512_unpackhi_epi64(__m512i __A, __m512i __B) {
4000 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4001 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4002}
4003
4004static __inline__ __m512i __DEFAULT_FN_ATTRS512
4005_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4006{
4007 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4008 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4009 (__v8di)__W);
4010}
4011
4012static __inline__ __m512i __DEFAULT_FN_ATTRS512
4013_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4014{
4015 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4016 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4017 (__v8di)_mm512_setzero_si512());
4018}
4019
4020static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4021_mm512_unpacklo_epi64(__m512i __A, __m512i __B) {
4022 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4023 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4024}
4025
4026static __inline__ __m512i __DEFAULT_FN_ATTRS512
4027_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4028{
4029 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4030 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4031 (__v8di)__W);
4032}
4033
4034static __inline__ __m512i __DEFAULT_FN_ATTRS512
4035_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4036{
4037 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4038 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4039 (__v8di)_mm512_setzero_si512());
4040}
4041
4042
4043/* SIMD load ops */
4044
4045static __inline __m512i __DEFAULT_FN_ATTRS512
4047{
4048 struct __loadu_si512 {
4049 __m512i_u __v;
4050 } __attribute__((__packed__, __may_alias__));
4051 return ((const struct __loadu_si512*)__P)->__v;
4052}
4053
4054static __inline __m512i __DEFAULT_FN_ATTRS512
4056{
4057 struct __loadu_epi32 {
4058 __m512i_u __v;
4059 } __attribute__((__packed__, __may_alias__));
4060 return ((const struct __loadu_epi32*)__P)->__v;
4061}
4062
4063static __inline __m512i __DEFAULT_FN_ATTRS512
4064_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4065{
4066 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4067 (__v16si) __W,
4068 (__mmask16) __U);
4069}
4070
4071
4072static __inline __m512i __DEFAULT_FN_ATTRS512
4074{
4075 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4076 (__v16si)
4078 (__mmask16) __U);
4079}
4080
4081static __inline __m512i __DEFAULT_FN_ATTRS512
4083{
4084 struct __loadu_epi64 {
4085 __m512i_u __v;
4086 } __attribute__((__packed__, __may_alias__));
4087 return ((const struct __loadu_epi64*)__P)->__v;
4088}
4089
4090static __inline __m512i __DEFAULT_FN_ATTRS512
4091_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4092{
4093 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4094 (__v8di) __W,
4095 (__mmask8) __U);
4096}
4097
4098static __inline __m512i __DEFAULT_FN_ATTRS512
4100{
4101 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4102 (__v8di)
4104 (__mmask8) __U);
4105}
4106
4107static __inline __m512 __DEFAULT_FN_ATTRS512
4108_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4109{
4110 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4111 (__v16sf) __W,
4112 (__mmask16) __U);
4113}
4114
4115static __inline __m512 __DEFAULT_FN_ATTRS512
4117{
4118 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4119 (__v16sf)
4121 (__mmask16) __U);
4122}
4123
4124static __inline __m512d __DEFAULT_FN_ATTRS512
4125_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4126{
4127 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4128 (__v8df) __W,
4129 (__mmask8) __U);
4130}
4131
4132static __inline __m512d __DEFAULT_FN_ATTRS512
4134{
4135 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4136 (__v8df)
4138 (__mmask8) __U);
4139}
4140
4141static __inline __m512d __DEFAULT_FN_ATTRS512
4143{
4144 struct __loadu_pd {
4145 __m512d_u __v;
4146 } __attribute__((__packed__, __may_alias__));
4147 return ((const struct __loadu_pd*)__p)->__v;
4148}
4149
4150static __inline __m512 __DEFAULT_FN_ATTRS512
4152{
4153 struct __loadu_ps {
4154 __m512_u __v;
4155 } __attribute__((__packed__, __may_alias__));
4156 return ((const struct __loadu_ps*)__p)->__v;
4157}
4158
4159static __inline __m512 __DEFAULT_FN_ATTRS512
4161{
4162 return *(const __m512*)__p;
4163}
4164
4165static __inline __m512 __DEFAULT_FN_ATTRS512
4166_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4167{
4168 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4169 (__v16sf) __W,
4170 (__mmask16) __U);
4171}
4172
4173static __inline __m512 __DEFAULT_FN_ATTRS512
4175{
4176 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4177 (__v16sf)
4179 (__mmask16) __U);
4180}
4181
4182static __inline __m512d __DEFAULT_FN_ATTRS512
4184{
4185 return *(const __m512d*)__p;
4186}
4187
4188static __inline __m512d __DEFAULT_FN_ATTRS512
4189_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4190{
4191 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4192 (__v8df) __W,
4193 (__mmask8) __U);
4194}
4195
4196static __inline __m512d __DEFAULT_FN_ATTRS512
4198{
4199 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4200 (__v8df)
4202 (__mmask8) __U);
4203}
4204
4205static __inline __m512i __DEFAULT_FN_ATTRS512
4207{
4208 return *(const __m512i *) __P;
4209}
4210
4211static __inline __m512i __DEFAULT_FN_ATTRS512
4213{
4214 return *(const __m512i *) __P;
4215}
4216
4217static __inline __m512i __DEFAULT_FN_ATTRS512
4219{
4220 return *(const __m512i *) __P;
4221}
4222
4223/* SIMD store ops */
4224
4225static __inline void __DEFAULT_FN_ATTRS512
4226_mm512_storeu_epi64 (void *__P, __m512i __A)
4227{
4228 struct __storeu_epi64 {
4229 __m512i_u __v;
4230 } __attribute__((__packed__, __may_alias__));
4231 ((struct __storeu_epi64*)__P)->__v = __A;
4232}
4233
4234static __inline void __DEFAULT_FN_ATTRS512
4235_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4236{
4237 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4238 (__mmask8) __U);
4239}
4240
4241static __inline void __DEFAULT_FN_ATTRS512
4242_mm512_storeu_si512 (void *__P, __m512i __A)
4243{
4244 struct __storeu_si512 {
4245 __m512i_u __v;
4246 } __attribute__((__packed__, __may_alias__));
4247 ((struct __storeu_si512*)__P)->__v = __A;
4248}
4249
4250static __inline void __DEFAULT_FN_ATTRS512
4251_mm512_storeu_epi32 (void *__P, __m512i __A)
4252{
4253 struct __storeu_epi32 {
4254 __m512i_u __v;
4255 } __attribute__((__packed__, __may_alias__));
4256 ((struct __storeu_epi32*)__P)->__v = __A;
4257}
4258
4259static __inline void __DEFAULT_FN_ATTRS512
4261{
4262 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4263 (__mmask16) __U);
4264}
4265
4266static __inline void __DEFAULT_FN_ATTRS512
4267_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4268{
4269 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4270}
4271
4272static __inline void __DEFAULT_FN_ATTRS512
4273_mm512_storeu_pd(void *__P, __m512d __A)
4274{
4275 struct __storeu_pd {
4276 __m512d_u __v;
4277 } __attribute__((__packed__, __may_alias__));
4278 ((struct __storeu_pd*)__P)->__v = __A;
4279}
4280
4281static __inline void __DEFAULT_FN_ATTRS512
4282_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4283{
4284 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4285 (__mmask16) __U);
4286}
4287
4288static __inline void __DEFAULT_FN_ATTRS512
4289_mm512_storeu_ps(void *__P, __m512 __A)
4290{
4291 struct __storeu_ps {
4292 __m512_u __v;
4293 } __attribute__((__packed__, __may_alias__));
4294 ((struct __storeu_ps*)__P)->__v = __A;
4295}
4296
4297static __inline void __DEFAULT_FN_ATTRS512
4298_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4299{
4300 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4301}
4302
4303static __inline void __DEFAULT_FN_ATTRS512
4304_mm512_store_pd(void *__P, __m512d __A)
4305{
4306 *(__m512d*)__P = __A;
4307}
4308
4309static __inline void __DEFAULT_FN_ATTRS512
4310_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4311{
4312 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4313 (__mmask16) __U);
4314}
4315
4316static __inline void __DEFAULT_FN_ATTRS512
4317_mm512_store_ps(void *__P, __m512 __A)
4318{
4319 *(__m512*)__P = __A;
4320}
4321
4322static __inline void __DEFAULT_FN_ATTRS512
4323_mm512_store_si512 (void *__P, __m512i __A)
4324{
4325 *(__m512i *) __P = __A;
4326}
4327
4328static __inline void __DEFAULT_FN_ATTRS512
4329_mm512_store_epi32 (void *__P, __m512i __A)
4330{
4331 *(__m512i *) __P = __A;
4332}
4333
4334static __inline void __DEFAULT_FN_ATTRS512
4335_mm512_store_epi64 (void *__P, __m512i __A)
4336{
4337 *(__m512i *) __P = __A;
4338}
4339
4340/* Mask ops */
4341
4344 return __builtin_ia32_knothi(__M);
4345}
4346
4347/* Integer compare */
4348
4349#define _mm512_cmpeq_epi32_mask(A, B) \
4350 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4351#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4352 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4353#define _mm512_cmpge_epi32_mask(A, B) \
4354 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4355#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4356 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4357#define _mm512_cmpgt_epi32_mask(A, B) \
4358 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4359#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4360 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4361#define _mm512_cmple_epi32_mask(A, B) \
4362 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4363#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4364 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4365#define _mm512_cmplt_epi32_mask(A, B) \
4366 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4367#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4368 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4369#define _mm512_cmpneq_epi32_mask(A, B) \
4370 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4371#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4372 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4373
4374#define _mm512_cmpeq_epu32_mask(A, B) \
4375 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4376#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4377 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4378#define _mm512_cmpge_epu32_mask(A, B) \
4379 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4380#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4381 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4382#define _mm512_cmpgt_epu32_mask(A, B) \
4383 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4384#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4385 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4386#define _mm512_cmple_epu32_mask(A, B) \
4387 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4388#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4389 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4390#define _mm512_cmplt_epu32_mask(A, B) \
4391 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4392#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4393 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4394#define _mm512_cmpneq_epu32_mask(A, B) \
4395 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4396#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4397 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4398
4399#define _mm512_cmpeq_epi64_mask(A, B) \
4400 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4401#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4402 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4403#define _mm512_cmpge_epi64_mask(A, B) \
4404 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4405#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4406 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4407#define _mm512_cmpgt_epi64_mask(A, B) \
4408 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4409#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4410 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4411#define _mm512_cmple_epi64_mask(A, B) \
4412 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4413#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4414 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4415#define _mm512_cmplt_epi64_mask(A, B) \
4416 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4417#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4418 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4419#define _mm512_cmpneq_epi64_mask(A, B) \
4420 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4421#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4422 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4423
4424#define _mm512_cmpeq_epu64_mask(A, B) \
4425 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4426#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4427 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4428#define _mm512_cmpge_epu64_mask(A, B) \
4429 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4430#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4431 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4432#define _mm512_cmpgt_epu64_mask(A, B) \
4433 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4434#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4435 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4436#define _mm512_cmple_epu64_mask(A, B) \
4437 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4438#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4439 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4440#define _mm512_cmplt_epu64_mask(A, B) \
4441 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4442#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4443 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4444#define _mm512_cmpneq_epu64_mask(A, B) \
4445 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4446#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4447 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4448
4449static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4451 /* This function always performs a signed extension, but __v16qi is a char
4452 which may be signed or unsigned, so use __v16qs. */
4453 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4454}
4455
4456static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4457_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
4458 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4459 (__v16si)_mm512_cvtepi8_epi32(__A),
4460 (__v16si)__W);
4461}
4462
4463static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4465 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4466 (__v16si)_mm512_cvtepi8_epi32(__A),
4467 (__v16si)_mm512_setzero_si512());
4468}
4469
4470static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4472 /* This function always performs a signed extension, but __v16qi is a char
4473 which may be signed or unsigned, so use __v16qs. */
4474 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4475}
4476
4477static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4478_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4479 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4480 (__v8di)_mm512_cvtepi8_epi64(__A),
4481 (__v8di)__W);
4482}
4483
4484static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4486 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4487 (__v8di)_mm512_cvtepi8_epi64(__A),
4488 (__v8di)_mm512_setzero_si512 ());
4489}
4490
4491static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4493 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4494}
4495
4496static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4497_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
4498 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4499 (__v8di)_mm512_cvtepi32_epi64(__X),
4500 (__v8di)__W);
4501}
4502
4503static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4505 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4506 (__v8di)_mm512_cvtepi32_epi64(__X),
4507 (__v8di)_mm512_setzero_si512());
4508}
4509
4510static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4512 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4513}
4514
4515static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4516_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
4517 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4518 (__v16si)_mm512_cvtepi16_epi32(__A),
4519 (__v16si)__W);
4520}
4521
4522static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4524 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4525 (__v16si)_mm512_cvtepi16_epi32(__A),
4526 (__v16si)_mm512_setzero_si512 ());
4527}
4528
4529static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4531 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4532}
4533
4534static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4535_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4536 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4537 (__v8di)_mm512_cvtepi16_epi64(__A),
4538 (__v8di)__W);
4539}
4540
4541static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4543 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4544 (__v8di)_mm512_cvtepi16_epi64(__A),
4545 (__v8di)_mm512_setzero_si512());
4546}
4547
4548static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4550 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4551}
4552
4553static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4554_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
4555 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4556 (__v16si)_mm512_cvtepu8_epi32(__A),
4557 (__v16si)__W);
4558}
4559
4560static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4562 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4563 (__v16si)_mm512_cvtepu8_epi32(__A),
4564 (__v16si)_mm512_setzero_si512());
4565}
4566
4567static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4569 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4570}
4571
4572static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4573_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4574 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4575 (__v8di)_mm512_cvtepu8_epi64(__A),
4576 (__v8di)__W);
4577}
4578
4579static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4581 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4582 (__v8di)_mm512_cvtepu8_epi64(__A),
4583 (__v8di)_mm512_setzero_si512());
4584}
4585
4586static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4588 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4589}
4590
4591static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4592_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
4593 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4594 (__v8di)_mm512_cvtepu32_epi64(__X),
4595 (__v8di)__W);
4596}
4597
4598static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4600 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4601 (__v8di)_mm512_cvtepu32_epi64(__X),
4602 (__v8di)_mm512_setzero_si512());
4603}
4604
4605static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4607 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4608}
4609
4610static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4611_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
4612 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4613 (__v16si)_mm512_cvtepu16_epi32(__A),
4614 (__v16si)__W);
4615}
4616
4617static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4619 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4620 (__v16si)_mm512_cvtepu16_epi32(__A),
4621 (__v16si)_mm512_setzero_si512());
4622}
4623
4624static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4626 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4627}
4628
4629static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4630_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
4631 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4632 (__v8di)_mm512_cvtepu16_epi64(__A),
4633 (__v8di)__W);
4634}
4635
4636static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4638 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4639 (__v8di)_mm512_cvtepu16_epi64(__A),
4640 (__v8di)_mm512_setzero_si512());
4641}
4642
4643static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4644_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4645{
4646 return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B);
4647}
4648
4649static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4650_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4651{
4652 return (__m512i)__builtin_ia32_selectd_512(__U,
4653 (__v16si)_mm512_rorv_epi32(__A, __B),
4654 (__v16si)__W);
4655}
4656
4657static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4658_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4659{
4660 return (__m512i)__builtin_ia32_selectd_512(__U,
4661 (__v16si)_mm512_rorv_epi32(__A, __B),
4662 (__v16si)_mm512_setzero_si512());
4663}
4664
4665static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4666_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4667{
4668 return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B);
4669}
4670
4671static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4672_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4673{
4674 return (__m512i)__builtin_ia32_selectq_512(__U,
4675 (__v8di)_mm512_rorv_epi64(__A, __B),
4676 (__v8di)__W);
4677}
4678
4679static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4680_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4681{
4682 return (__m512i)__builtin_ia32_selectq_512(__U,
4683 (__v8di)_mm512_rorv_epi64(__A, __B),
4684 (__v8di)_mm512_setzero_si512());
4685}
4686
4687
4688
4689#define _mm512_cmp_epi32_mask(a, b, p) \
4690 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4691 (__v16si)(__m512i)(b), (int)(p), \
4692 (__mmask16)-1))
4693
4694#define _mm512_cmp_epu32_mask(a, b, p) \
4695 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4696 (__v16si)(__m512i)(b), (int)(p), \
4697 (__mmask16)-1))
4698
4699#define _mm512_cmp_epi64_mask(a, b, p) \
4700 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4701 (__v8di)(__m512i)(b), (int)(p), \
4702 (__mmask8)-1))
4703
4704#define _mm512_cmp_epu64_mask(a, b, p) \
4705 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4706 (__v8di)(__m512i)(b), (int)(p), \
4707 (__mmask8)-1))
4708
4709#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
4710 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4711 (__v16si)(__m512i)(b), (int)(p), \
4712 (__mmask16)(m)))
4713
4714#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
4715 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4716 (__v16si)(__m512i)(b), (int)(p), \
4717 (__mmask16)(m)))
4718
4719#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
4720 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4721 (__v8di)(__m512i)(b), (int)(p), \
4722 (__mmask8)(m)))
4723
4724#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
4725 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4726 (__v8di)(__m512i)(b), (int)(p), \
4727 (__mmask8)(m)))
4728
4729#define _mm512_rol_epi32(a, b) \
4730 ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
4731
4732#define _mm512_mask_rol_epi32(W, U, a, b) \
4733 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4734 (__v16si)_mm512_rol_epi32((a), (b)), \
4735 (__v16si)(__m512i)(W)))
4736
4737#define _mm512_maskz_rol_epi32(U, a, b) \
4738 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4739 (__v16si)_mm512_rol_epi32((a), (b)), \
4740 (__v16si)_mm512_setzero_si512()))
4741
4742#define _mm512_rol_epi64(a, b) \
4743 ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
4744
4745#define _mm512_mask_rol_epi64(W, U, a, b) \
4746 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4747 (__v8di)_mm512_rol_epi64((a), (b)), \
4748 (__v8di)(__m512i)(W)))
4749
4750#define _mm512_maskz_rol_epi64(U, a, b) \
4751 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4752 (__v8di)_mm512_rol_epi64((a), (b)), \
4753 (__v8di)_mm512_setzero_si512()))
4754
4755static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4756_mm512_rolv_epi32 (__m512i __A, __m512i __B)
4757{
4758 return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B);
4759}
4760
4761static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4762_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4763{
4764 return (__m512i)__builtin_ia32_selectd_512(__U,
4765 (__v16si)_mm512_rolv_epi32(__A, __B),
4766 (__v16si)__W);
4767}
4768
4769static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4770_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4771{
4772 return (__m512i)__builtin_ia32_selectd_512(__U,
4773 (__v16si)_mm512_rolv_epi32(__A, __B),
4774 (__v16si)_mm512_setzero_si512());
4775}
4776
4777static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4778_mm512_rolv_epi64 (__m512i __A, __m512i __B)
4779{
4780 return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B);
4781}
4782
4783static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4784_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4785{
4786 return (__m512i)__builtin_ia32_selectq_512(__U,
4787 (__v8di)_mm512_rolv_epi64(__A, __B),
4788 (__v8di)__W);
4789}
4790
4791static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4792_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4793{
4794 return (__m512i)__builtin_ia32_selectq_512(__U,
4795 (__v8di)_mm512_rolv_epi64(__A, __B),
4796 (__v8di)_mm512_setzero_si512());
4797}
4798
4799#define _mm512_ror_epi32(A, B) \
4800 ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
4801
4802#define _mm512_mask_ror_epi32(W, U, A, B) \
4803 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4804 (__v16si)_mm512_ror_epi32((A), (B)), \
4805 (__v16si)(__m512i)(W)))
4806
4807#define _mm512_maskz_ror_epi32(U, A, B) \
4808 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4809 (__v16si)_mm512_ror_epi32((A), (B)), \
4810 (__v16si)_mm512_setzero_si512()))
4811
4812#define _mm512_ror_epi64(A, B) \
4813 ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
4814
4815#define _mm512_mask_ror_epi64(W, U, A, B) \
4816 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4817 (__v8di)_mm512_ror_epi64((A), (B)), \
4818 (__v8di)(__m512i)(W)))
4819
4820#define _mm512_maskz_ror_epi64(U, A, B) \
4821 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
4822 (__v8di)_mm512_ror_epi64((A), (B)), \
4823 (__v8di)_mm512_setzero_si512()))
4824
4825static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4826_mm512_slli_epi32(__m512i __A, unsigned int __B) {
4827 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B);
4828}
4829
4830static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4831_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
4832 unsigned int __B) {
4833 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4834 (__v16si)_mm512_slli_epi32(__A, __B),
4835 (__v16si)__W);
4836}
4837
4838static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4839_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
4840 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4841 (__v16si)_mm512_slli_epi32(__A, __B),
4842 (__v16si)_mm512_setzero_si512());
4843}
4844
4845static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4846_mm512_slli_epi64(__m512i __A, unsigned int __B) {
4847 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B);
4848}
4849
4850static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4851_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
4852 unsigned int __B) {
4853 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4854 (__v8di)_mm512_slli_epi64(__A, __B),
4855 (__v8di)__W);
4856}
4857
4858static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4859_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
4860 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4861 (__v8di)_mm512_slli_epi64(__A, __B),
4862 (__v8di)_mm512_setzero_si512());
4863}
4864
4865static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4866_mm512_srli_epi32(__m512i __A, unsigned int __B) {
4867 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B);
4868}
4869
4870static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4871_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
4872 unsigned int __B) {
4873 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4874 (__v16si)_mm512_srli_epi32(__A, __B),
4875 (__v16si)__W);
4876}
4877
4878static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4879_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
4880 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4881 (__v16si)_mm512_srli_epi32(__A, __B),
4882 (__v16si)_mm512_setzero_si512());
4883}
4884
4885static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4886_mm512_srli_epi64(__m512i __A, unsigned int __B) {
4887 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B);
4888}
4889
4890static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4891_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
4892 unsigned int __B) {
4893 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4894 (__v8di)_mm512_srli_epi64(__A, __B),
4895 (__v8di)__W);
4896}
4897
4898static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4899_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
4900 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4901 (__v8di)_mm512_srli_epi64(__A, __B),
4902 (__v8di)_mm512_setzero_si512());
4903}
4904
4905static __inline__ __m512i __DEFAULT_FN_ATTRS512
4906_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4907{
4908 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4909 (__v16si) __W,
4910 (__mmask16) __U);
4911}
4912
4913static __inline__ __m512i __DEFAULT_FN_ATTRS512
4915{
4916 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4917 (__v16si)
4919 (__mmask16) __U);
4920}
4921
4922static __inline__ void __DEFAULT_FN_ATTRS512
4923_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
4924{
4925 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
4926 (__mmask16) __U);
4927}
4928
4929static __inline__ __m512i __DEFAULT_FN_ATTRS512
4930_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
4931{
4932 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
4933 (__v16si) __A,
4934 (__v16si) __W);
4935}
4936
4937static __inline__ __m512i __DEFAULT_FN_ATTRS512
4939{
4940 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
4941 (__v16si) __A,
4942 (__v16si) _mm512_setzero_si512 ());
4943}
4944
4945static __inline__ __m512i __DEFAULT_FN_ATTRS512
4946_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
4947{
4948 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
4949 (__v8di) __A,
4950 (__v8di) __W);
4951}
4952
4953static __inline__ __m512i __DEFAULT_FN_ATTRS512
4955{
4956 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
4957 (__v8di) __A,
4958 (__v8di) _mm512_setzero_si512 ());
4959}
4960
4961static __inline__ __m512i __DEFAULT_FN_ATTRS512
4962_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4963{
4964 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4965 (__v8di) __W,
4966 (__mmask8) __U);
4967}
4968
4969static __inline__ __m512i __DEFAULT_FN_ATTRS512
4971{
4972 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4973 (__v8di)
4975 (__mmask8) __U);
4976}
4977
4978static __inline__ void __DEFAULT_FN_ATTRS512
4979_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
4980{
4981 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
4982 (__mmask8) __U);
4983}
4984
4985static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
4987{
4988 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
4989 0, 0, 2, 2, 4, 4, 6, 6);
4990}
4991
4992static __inline__ __m512d __DEFAULT_FN_ATTRS512
4993_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
4994{
4995 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
4996 (__v8df)_mm512_movedup_pd(__A),
4997 (__v8df)__W);
4998}
4999
5000static __inline__ __m512d __DEFAULT_FN_ATTRS512
5002{
5003 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5004 (__v8df)_mm512_movedup_pd(__A),
5005 (__v8df)_mm512_setzero_pd());
5006}
5007
5008#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5009 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5010 (__v8df)(__m512d)(B), \
5011 (__v8di)(__m512i)(C), (int)(imm), \
5012 (__mmask8)-1, (int)(R)))
5013
5014#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5015 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5016 (__v8df)(__m512d)(B), \
5017 (__v8di)(__m512i)(C), (int)(imm), \
5018 (__mmask8)(U), (int)(R)))
5019
5020#define _mm512_fixupimm_pd(A, B, C, imm) \
5021 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5022 (__v8df)(__m512d)(B), \
5023 (__v8di)(__m512i)(C), (int)(imm), \
5024 (__mmask8)-1, \
5025 _MM_FROUND_CUR_DIRECTION))
5026
5027#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5028 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5029 (__v8df)(__m512d)(B), \
5030 (__v8di)(__m512i)(C), (int)(imm), \
5031 (__mmask8)(U), \
5032 _MM_FROUND_CUR_DIRECTION))
5033
5034#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5035 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5036 (__v8df)(__m512d)(B), \
5037 (__v8di)(__m512i)(C), \
5038 (int)(imm), (__mmask8)(U), \
5039 (int)(R)))
5040
5041#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5042 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5043 (__v8df)(__m512d)(B), \
5044 (__v8di)(__m512i)(C), \
5045 (int)(imm), (__mmask8)(U), \
5046 _MM_FROUND_CUR_DIRECTION))
5047
5048#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5049 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5050 (__v16sf)(__m512)(B), \
5051 (__v16si)(__m512i)(C), (int)(imm), \
5052 (__mmask16)-1, (int)(R)))
5053
5054#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5055 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5056 (__v16sf)(__m512)(B), \
5057 (__v16si)(__m512i)(C), (int)(imm), \
5058 (__mmask16)(U), (int)(R)))
5059
5060#define _mm512_fixupimm_ps(A, B, C, imm) \
5061 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5062 (__v16sf)(__m512)(B), \
5063 (__v16si)(__m512i)(C), (int)(imm), \
5064 (__mmask16)-1, \
5065 _MM_FROUND_CUR_DIRECTION))
5066
5067#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5068 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5069 (__v16sf)(__m512)(B), \
5070 (__v16si)(__m512i)(C), (int)(imm), \
5071 (__mmask16)(U), \
5072 _MM_FROUND_CUR_DIRECTION))
5073
5074#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5075 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5076 (__v16sf)(__m512)(B), \
5077 (__v16si)(__m512i)(C), \
5078 (int)(imm), (__mmask16)(U), \
5079 (int)(R)))
5080
5081#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5082 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5083 (__v16sf)(__m512)(B), \
5084 (__v16si)(__m512i)(C), \
5085 (int)(imm), (__mmask16)(U), \
5086 _MM_FROUND_CUR_DIRECTION))
5087
5088#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5089 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5090 (__v2df)(__m128d)(B), \
5091 (__v2di)(__m128i)(C), (int)(imm), \
5092 (__mmask8)-1, (int)(R)))
5093
5094#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5095 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5096 (__v2df)(__m128d)(B), \
5097 (__v2di)(__m128i)(C), (int)(imm), \
5098 (__mmask8)(U), (int)(R)))
5099
5100#define _mm_fixupimm_sd(A, B, C, imm) \
5101 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5102 (__v2df)(__m128d)(B), \
5103 (__v2di)(__m128i)(C), (int)(imm), \
5104 (__mmask8)-1, \
5105 _MM_FROUND_CUR_DIRECTION))
5106
5107#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5108 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5109 (__v2df)(__m128d)(B), \
5110 (__v2di)(__m128i)(C), (int)(imm), \
5111 (__mmask8)(U), \
5112 _MM_FROUND_CUR_DIRECTION))
5113
5114#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5115 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5116 (__v2df)(__m128d)(B), \
5117 (__v2di)(__m128i)(C), (int)(imm), \
5118 (__mmask8)(U), (int)(R)))
5119
5120#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5121 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5122 (__v2df)(__m128d)(B), \
5123 (__v2di)(__m128i)(C), (int)(imm), \
5124 (__mmask8)(U), \
5125 _MM_FROUND_CUR_DIRECTION))
5126
5127#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5128 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5129 (__v4sf)(__m128)(B), \
5130 (__v4si)(__m128i)(C), (int)(imm), \
5131 (__mmask8)-1, (int)(R)))
5132
5133#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5134 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5135 (__v4sf)(__m128)(B), \
5136 (__v4si)(__m128i)(C), (int)(imm), \
5137 (__mmask8)(U), (int)(R)))
5138
5139#define _mm_fixupimm_ss(A, B, C, imm) \
5140 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5141 (__v4sf)(__m128)(B), \
5142 (__v4si)(__m128i)(C), (int)(imm), \
5143 (__mmask8)-1, \
5144 _MM_FROUND_CUR_DIRECTION))
5145
5146#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5147 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5148 (__v4sf)(__m128)(B), \
5149 (__v4si)(__m128i)(C), (int)(imm), \
5150 (__mmask8)(U), \
5151 _MM_FROUND_CUR_DIRECTION))
5152
5153#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5154 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5155 (__v4sf)(__m128)(B), \
5156 (__v4si)(__m128i)(C), (int)(imm), \
5157 (__mmask8)(U), (int)(R)))
5158
5159#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5160 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5161 (__v4sf)(__m128)(B), \
5162 (__v4si)(__m128i)(C), (int)(imm), \
5163 (__mmask8)(U), \
5164 _MM_FROUND_CUR_DIRECTION))
5165
5166#define _mm_getexp_round_sd(A, B, R) \
5167 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5168 (__v2df)(__m128d)(B), \
5169 (__v2df)_mm_setzero_pd(), \
5170 (__mmask8)-1, (int)(R)))
5171
5172
5173static __inline__ __m128d __DEFAULT_FN_ATTRS128
5174_mm_getexp_sd (__m128d __A, __m128d __B)
5175{
5176 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5177 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5178}
5179
5180static __inline__ __m128d __DEFAULT_FN_ATTRS128
5181_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5182{
5183 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5184 (__v2df) __B,
5185 (__v2df) __W,
5186 (__mmask8) __U,
5188}
5189
5190#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5191 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5192 (__v2df)(__m128d)(B), \
5193 (__v2df)(__m128d)(W), \
5194 (__mmask8)(U), (int)(R)))
5195
5196static __inline__ __m128d __DEFAULT_FN_ATTRS128
5197_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5198{
5199 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5200 (__v2df) __B,
5201 (__v2df) _mm_setzero_pd (),
5202 (__mmask8) __U,
5204}
5205
5206#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5207 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5208 (__v2df)(__m128d)(B), \
5209 (__v2df)_mm_setzero_pd(), \
5210 (__mmask8)(U), (int)(R)))
5211
5212#define _mm_getexp_round_ss(A, B, R) \
5213 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5214 (__v4sf)(__m128)(B), \
5215 (__v4sf)_mm_setzero_ps(), \
5216 (__mmask8)-1, (int)(R)))
5217
5218static __inline__ __m128 __DEFAULT_FN_ATTRS128
5219_mm_getexp_ss (__m128 __A, __m128 __B)
5220{
5221 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5222 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5223}
5224
5225static __inline__ __m128 __DEFAULT_FN_ATTRS128
5226_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5227{
5228 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5229 (__v4sf) __B,
5230 (__v4sf) __W,
5231 (__mmask8) __U,
5233}
5234
5235#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5236 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5237 (__v4sf)(__m128)(B), \
5238 (__v4sf)(__m128)(W), \
5239 (__mmask8)(U), (int)(R)))
5240
5241static __inline__ __m128 __DEFAULT_FN_ATTRS128
5242_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5243{
5244 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5245 (__v4sf) __B,
5246 (__v4sf) _mm_setzero_ps (),
5247 (__mmask8) __U,
5249}
5250
5251#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5252 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5253 (__v4sf)(__m128)(B), \
5254 (__v4sf)_mm_setzero_ps(), \
5255 (__mmask8)(U), (int)(R)))
5256
5257#define _mm_getmant_round_sd(A, B, C, D, R) \
5258 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5259 (__v2df)(__m128d)(B), \
5260 (int)(((D)<<2) | (C)), \
5261 (__v2df)_mm_setzero_pd(), \
5262 (__mmask8)-1, (int)(R)))
5263
5264#define _mm_getmant_sd(A, B, C, D) \
5265 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5266 (__v2df)(__m128d)(B), \
5267 (int)(((D)<<2) | (C)), \
5268 (__v2df)_mm_setzero_pd(), \
5269 (__mmask8)-1, \
5270 _MM_FROUND_CUR_DIRECTION))
5271
5272#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5273 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5274 (__v2df)(__m128d)(B), \
5275 (int)(((D)<<2) | (C)), \
5276 (__v2df)(__m128d)(W), \
5277 (__mmask8)(U), \
5278 _MM_FROUND_CUR_DIRECTION))
5279
5280#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5281 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5282 (__v2df)(__m128d)(B), \
5283 (int)(((D)<<2) | (C)), \
5284 (__v2df)(__m128d)(W), \
5285 (__mmask8)(U), (int)(R)))
5286
5287#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5288 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5289 (__v2df)(__m128d)(B), \
5290 (int)(((D)<<2) | (C)), \
5291 (__v2df)_mm_setzero_pd(), \
5292 (__mmask8)(U), \
5293 _MM_FROUND_CUR_DIRECTION))
5294
5295#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5296 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5297 (__v2df)(__m128d)(B), \
5298 (int)(((D)<<2) | (C)), \
5299 (__v2df)_mm_setzero_pd(), \
5300 (__mmask8)(U), (int)(R)))
5301
5302#define _mm_getmant_round_ss(A, B, C, D, R) \
5303 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5304 (__v4sf)(__m128)(B), \
5305 (int)(((D)<<2) | (C)), \
5306 (__v4sf)_mm_setzero_ps(), \
5307 (__mmask8)-1, (int)(R)))
5308
5309#define _mm_getmant_ss(A, B, C, D) \
5310 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5311 (__v4sf)(__m128)(B), \
5312 (int)(((D)<<2) | (C)), \
5313 (__v4sf)_mm_setzero_ps(), \
5314 (__mmask8)-1, \
5315 _MM_FROUND_CUR_DIRECTION))
5316
5317#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5318 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5319 (__v4sf)(__m128)(B), \
5320 (int)(((D)<<2) | (C)), \
5321 (__v4sf)(__m128)(W), \
5322 (__mmask8)(U), \
5323 _MM_FROUND_CUR_DIRECTION))
5324
5325#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5326 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5327 (__v4sf)(__m128)(B), \
5328 (int)(((D)<<2) | (C)), \
5329 (__v4sf)(__m128)(W), \
5330 (__mmask8)(U), (int)(R)))
5331
5332#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5333 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5334 (__v4sf)(__m128)(B), \
5335 (int)(((D)<<2) | (C)), \
5336 (__v4sf)_mm_setzero_ps(), \
5337 (__mmask8)(U), \
5338 _MM_FROUND_CUR_DIRECTION))
5339
5340#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5341 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5342 (__v4sf)(__m128)(B), \
5343 (int)(((D)<<2) | (C)), \
5344 (__v4sf)_mm_setzero_ps(), \
5345 (__mmask8)(U), (int)(R)))
5346
5347static __inline__ __mmask16
5351
5352#define _mm_comi_round_sd(A, B, P, R) \
5353 ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5354 (int)(P), (int)(R)))
5355
5356#define _mm_comi_round_ss(A, B, P, R) \
5357 ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5358 (int)(P), (int)(R)))
5359
5360#ifdef __x86_64__
5361#define _mm_cvt_roundsd_si64(A, R) \
5362 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5363#endif
5364
5365static __inline__ __m512i
5367 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5368}
5369
5370static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5371_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
5372 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5373 (__v16si)_mm512_sll_epi32(__A, __B),
5374 (__v16si)__W);
5375}
5376
5377static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5378_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
5379 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5380 (__v16si)_mm512_sll_epi32(__A, __B),
5381 (__v16si)_mm512_setzero_si512());
5382}
5383
5384static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5385_mm512_sll_epi64(__m512i __A, __m128i __B) {
5386 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5387}
5388
5389static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5390_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
5391 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5392 (__v8di)_mm512_sll_epi64(__A, __B),
5393 (__v8di)__W);
5394}
5395
5396static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5397_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
5398 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5399 (__v8di)_mm512_sll_epi64(__A, __B),
5400 (__v8di)_mm512_setzero_si512());
5401}
5402
5403static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5404_mm512_sllv_epi32(__m512i __X, __m512i __Y) {
5405 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5406}
5407
5408static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5409_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5410 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5411 (__v16si)_mm512_sllv_epi32(__X, __Y),
5412 (__v16si)__W);
5413}
5414
5415static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5416_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5417 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5418 (__v16si)_mm512_sllv_epi32(__X, __Y),
5419 (__v16si)_mm512_setzero_si512());
5420}
5421
5422static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5423_mm512_sllv_epi64(__m512i __X, __m512i __Y)
5424{
5425 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5426}
5427
5428static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5429_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5430{
5431 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5432 (__v8di)_mm512_sllv_epi64(__X, __Y),
5433 (__v8di)__W);
5434}
5435
5436static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5437_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5438{
5439 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5440 (__v8di)_mm512_sllv_epi64(__X, __Y),
5441 (__v8di)_mm512_setzero_si512());
5442}
5443
5444static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5445_mm512_sra_epi32(__m512i __A, __m128i __B) {
5446 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5447}
5448
5449static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5450_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
5451 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5452 (__v16si)_mm512_sra_epi32(__A, __B),
5453 (__v16si)__W);
5454}
5455
5456static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5457_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
5458 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5459 (__v16si)_mm512_sra_epi32(__A, __B),
5460 (__v16si)_mm512_setzero_si512());
5461}
5462
5463static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5464_mm512_sra_epi64(__m512i __A, __m128i __B) {
5465 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5466}
5467
5468static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5469_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
5470 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5471 (__v8di)_mm512_sra_epi64(__A, __B),
5472 (__v8di)__W);
5473}
5474
5475static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5476_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
5477 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5478 (__v8di)_mm512_sra_epi64(__A, __B),
5479 (__v8di)_mm512_setzero_si512());
5480}
5481
5482static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5483_mm512_srav_epi32(__m512i __X, __m512i __Y) {
5484 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5485}
5486
5487static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5488_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5489 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5490 (__v16si)_mm512_srav_epi32(__X, __Y),
5491 (__v16si)__W);
5492}
5493
5494static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5495_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5496 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5497 (__v16si)_mm512_srav_epi32(__X, __Y),
5498 (__v16si)_mm512_setzero_si512());
5499}
5500
5501static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5502_mm512_srav_epi64(__m512i __X, __m512i __Y)
5503{
5504 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5505}
5506
5507static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5508_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5509{
5510 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5511 (__v8di)_mm512_srav_epi64(__X, __Y),
5512 (__v8di)__W);
5513}
5514
5515static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5516_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5517{
5518 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5519 (__v8di)_mm512_srav_epi64(__X, __Y),
5520 (__v8di)_mm512_setzero_si512());
5521}
5522
5523static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5524_mm512_srl_epi32(__m512i __A, __m128i __B) {
5525 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5526}
5527
5528static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5529_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
5530 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5531 (__v16si)_mm512_srl_epi32(__A, __B),
5532 (__v16si)__W);
5533}
5534
5535static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5536_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
5537 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5538 (__v16si)_mm512_srl_epi32(__A, __B),
5539 (__v16si)_mm512_setzero_si512());
5540}
5541
5542static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5543_mm512_srl_epi64(__m512i __A, __m128i __B) {
5544 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5545}
5546
5547static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5548_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
5549 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5550 (__v8di)_mm512_srl_epi64(__A, __B),
5551 (__v8di)__W);
5552}
5553
5554static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5555_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
5556 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5557 (__v8di)_mm512_srl_epi64(__A, __B),
5558 (__v8di)_mm512_setzero_si512());
5559}
5560
5561static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5562_mm512_srlv_epi32(__m512i __X, __m512i __Y) {
5563 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5564}
5565
5566static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5567_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
5568 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5569 (__v16si)_mm512_srlv_epi32(__X, __Y),
5570 (__v16si)__W);
5571}
5572
5573static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5574_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
5575 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5576 (__v16si)_mm512_srlv_epi32(__X, __Y),
5577 (__v16si)_mm512_setzero_si512());
5578}
5579
5580static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5581_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5582{
5583 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5584}
5585
5586static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5587_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5588{
5589 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5590 (__v8di)_mm512_srlv_epi64(__X, __Y),
5591 (__v8di)__W);
5592}
5593
5594static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5595_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5596{
5597 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5598 (__v8di)_mm512_srlv_epi64(__X, __Y),
5599 (__v8di)_mm512_setzero_si512());
5600}
5601
5602/// \enum _MM_TERNLOG_ENUM
5603/// A helper to represent the ternary logic operations among vector \a A,
5604/// \a B and \a C. The representation is passed to \a imm.
5610
5611#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5612 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5613 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5614 (unsigned char)(imm), (__mmask16)-1))
5615
5616#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5617 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5618 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5619 (unsigned char)(imm), (__mmask16)(U)))
5620
5621#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5622 ((__m512i)__builtin_ia32_pternlogd512_maskz( \
5623 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5624 (unsigned char)(imm), (__mmask16)(U)))
5625
5626#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5627 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5628 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5629 (unsigned char)(imm), (__mmask8)-1))
5630
5631#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5632 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5633 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5634 (unsigned char)(imm), (__mmask8)(U)))
5635
5636#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5637 ((__m512i)__builtin_ia32_pternlogq512_maskz( \
5638 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5639 (unsigned char)(imm), (__mmask8)(U)))
5640
5641#ifdef __x86_64__
5642#define _mm_cvt_roundsd_i64(A, R) \
5643 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5644#endif
5645
5646#define _mm_cvt_roundsd_si32(A, R) \
5647 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5648
5649#define _mm_cvt_roundsd_i32(A, R) \
5650 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5651
5652#define _mm_cvt_roundsd_u32(A, R) \
5653 ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5654
5655static __inline__ unsigned __DEFAULT_FN_ATTRS128
5656_mm_cvtsd_u32 (__m128d __A)
5657{
5658 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5660}
5661
5662#ifdef __x86_64__
5663#define _mm_cvt_roundsd_u64(A, R) \
5664 ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5665 (int)(R)))
5666
5667static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5668_mm_cvtsd_u64 (__m128d __A)
5669{
5670 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5671 __A,
5673}
5674#endif
5675
5676#define _mm_cvt_roundss_si32(A, R) \
5677 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5678
5679#define _mm_cvt_roundss_i32(A, R) \
5680 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5681
5682#ifdef __x86_64__
5683#define _mm_cvt_roundss_si64(A, R) \
5684 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5685
5686#define _mm_cvt_roundss_i64(A, R) \
5687 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5688#endif
5689
5690#define _mm_cvt_roundss_u32(A, R) \
5691 ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
5692
5693static __inline__ unsigned __DEFAULT_FN_ATTRS128
5694_mm_cvtss_u32 (__m128 __A)
5695{
5696 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
5698}
5699
5700#ifdef __x86_64__
5701#define _mm_cvt_roundss_u64(A, R) \
5702 ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
5703 (int)(R)))
5704
5705static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5706_mm_cvtss_u64 (__m128 __A)
5707{
5708 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
5709 __A,
5711}
5712#endif
5713
5714#define _mm_cvtt_roundsd_i32(A, R) \
5715 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5716
5717#define _mm_cvtt_roundsd_si32(A, R) \
5718 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
5719
5720static __inline__ int __DEFAULT_FN_ATTRS128
5721_mm_cvttsd_i32 (__m128d __A)
5722{
5723 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
5725}
5726
5727#ifdef __x86_64__
5728#define _mm_cvtt_roundsd_si64(A, R) \
5729 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5730
5731#define _mm_cvtt_roundsd_i64(A, R) \
5732 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
5733
5734static __inline__ long long __DEFAULT_FN_ATTRS128
5735_mm_cvttsd_i64 (__m128d __A)
5736{
5737 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
5739}
5740#endif
5741
5742#define _mm_cvtt_roundsd_u32(A, R) \
5743 ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5744
5745static __inline__ unsigned __DEFAULT_FN_ATTRS128
5746_mm_cvttsd_u32 (__m128d __A)
5747{
5748 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
5750}
5751
5752#ifdef __x86_64__
5753#define _mm_cvtt_roundsd_u64(A, R) \
5754 ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
5755 (int)(R)))
5756
5757static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5758_mm_cvttsd_u64 (__m128d __A)
5759{
5760 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
5761 __A,
5763}
5764#endif
5765
5766#define _mm_cvtt_roundss_i32(A, R) \
5767 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5768
5769#define _mm_cvtt_roundss_si32(A, R) \
5770 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
5771
5772static __inline__ int __DEFAULT_FN_ATTRS128
5773_mm_cvttss_i32 (__m128 __A)
5774{
5775 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
5777}
5778
5779#ifdef __x86_64__
5780#define _mm_cvtt_roundss_i64(A, R) \
5781 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5782
5783#define _mm_cvtt_roundss_si64(A, R) \
5784 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
5785
5786static __inline__ long long __DEFAULT_FN_ATTRS128
5787_mm_cvttss_i64 (__m128 __A)
5788{
5789 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
5791}
5792#endif
5793
5794#define _mm_cvtt_roundss_u32(A, R) \
5795 ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
5796
5797static __inline__ unsigned __DEFAULT_FN_ATTRS128
5798_mm_cvttss_u32 (__m128 __A)
5799{
5800 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
5802}
5803
5804#ifdef __x86_64__
5805#define _mm_cvtt_roundss_u64(A, R) \
5806 ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
5807 (int)(R)))
5808
5809static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5810_mm_cvttss_u64 (__m128 __A)
5811{
5812 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
5813 __A,
5815}
5816#endif
5817
5818#define _mm512_permute_pd(X, C) \
5819 ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
5820
5821#define _mm512_mask_permute_pd(W, U, X, C) \
5822 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5823 (__v8df)_mm512_permute_pd((X), (C)), \
5824 (__v8df)(__m512d)(W)))
5825
5826#define _mm512_maskz_permute_pd(U, X, C) \
5827 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
5828 (__v8df)_mm512_permute_pd((X), (C)), \
5829 (__v8df)_mm512_setzero_pd()))
5830
5831#define _mm512_permute_ps(X, C) \
5832 ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
5833
5834#define _mm512_mask_permute_ps(W, U, X, C) \
5835 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5836 (__v16sf)_mm512_permute_ps((X), (C)), \
5837 (__v16sf)(__m512)(W)))
5838
5839#define _mm512_maskz_permute_ps(U, X, C) \
5840 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
5841 (__v16sf)_mm512_permute_ps((X), (C)), \
5842 (__v16sf)_mm512_setzero_ps()))
5843
5844static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5845_mm512_permutevar_pd(__m512d __A, __m512i __C) {
5846 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
5847}
5848
5849static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5850_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) {
5851 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5852 (__v8df)_mm512_permutevar_pd(__A, __C),
5853 (__v8df)__W);
5854}
5855
5856static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5857_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
5858 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5859 (__v8df)_mm512_permutevar_pd(__A, __C),
5860 (__v8df)_mm512_setzero_pd());
5861}
5862
5863static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5864_mm512_permutevar_ps(__m512 __A, __m512i __C) {
5865 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
5866}
5867
5868static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5869_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) {
5870 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
5871 (__v16sf)_mm512_permutevar_ps(__A, __C),
5872 (__v16sf)__W);
5873}
5874
5875static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5876_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
5877 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
5878 (__v16sf)_mm512_permutevar_ps(__A, __C),
5879 (__v16sf)_mm512_setzero_ps());
5880}
5881
5882static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5883_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) {
5884 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
5885 (__v8df)__B);
5886}
5887
5888static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5889_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I,
5890 __m512d __B) {
5891 return (__m512d)__builtin_ia32_selectpd_512(__U,
5892 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5893 (__v8df)__A);
5894}
5895
5896static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5897_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
5898 __m512d __B) {
5899 return (__m512d)__builtin_ia32_selectpd_512(__U,
5900 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5901 (__v8df)(__m512d)__I);
5902}
5903
5904static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5905_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
5906 __m512d __B) {
5907 return (__m512d)__builtin_ia32_selectpd_512(__U,
5908 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
5909 (__v8df)_mm512_setzero_pd());
5910}
5911
5912static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5913_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) {
5914 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
5915 (__v16sf) __B);
5916}
5917
5918static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5919_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I,
5920 __m512 __B) {
5921 return (__m512)__builtin_ia32_selectps_512(__U,
5922 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5923 (__v16sf)__A);
5924}
5925
5926static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5927_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U,
5928 __m512 __B) {
5929 return (__m512)__builtin_ia32_selectps_512(__U,
5930 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5931 (__v16sf)(__m512)__I);
5932}
5933
5934static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
5935_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I,
5936 __m512 __B) {
5937 return (__m512)__builtin_ia32_selectps_512(__U,
5938 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
5939 (__v16sf)_mm512_setzero_ps());
5940}
5941
5942#define _mm512_cvtt_roundpd_epu32(A, R) \
5943 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5944 (__v8si)_mm256_undefined_si256(), \
5945 (__mmask8)-1, (int)(R)))
5946
5947#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
5948 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5949 (__v8si)(__m256i)(W), \
5950 (__mmask8)(U), (int)(R)))
5951
5952#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
5953 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
5954 (__v8si)_mm256_setzero_si256(), \
5955 (__mmask8)(U), (int)(R)))
5956
5957static __inline__ __m256i __DEFAULT_FN_ATTRS512
5959{
5960 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5961 (__v8si)
5963 (__mmask8) -1,
5965}
5966
5967static __inline__ __m256i __DEFAULT_FN_ATTRS512
5968_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
5969{
5970 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5971 (__v8si) __W,
5972 (__mmask8) __U,
5974}
5975
5976static __inline__ __m256i __DEFAULT_FN_ATTRS512
5978{
5979 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
5980 (__v8si)
5982 (__mmask8) __U,
5984}
5985
5986#define _mm_roundscale_round_sd(A, B, imm, R) \
5987 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
5988 (__v2df)(__m128d)(B), \
5989 (__v2df)_mm_setzero_pd(), \
5990 (__mmask8)-1, (int)(imm), \
5991 (int)(R)))
5992
5993#define _mm_roundscale_sd(A, B, imm) \
5994 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
5995 (__v2df)(__m128d)(B), \
5996 (__v2df)_mm_setzero_pd(), \
5997 (__mmask8)-1, (int)(imm), \
5998 _MM_FROUND_CUR_DIRECTION))
5999
6000#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6001 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6002 (__v2df)(__m128d)(B), \
6003 (__v2df)(__m128d)(W), \
6004 (__mmask8)(U), (int)(imm), \
6005 _MM_FROUND_CUR_DIRECTION))
6006
6007#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6008 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6009 (__v2df)(__m128d)(B), \
6010 (__v2df)(__m128d)(W), \
6011 (__mmask8)(U), (int)(I), \
6012 (int)(R)))
6013
6014#define _mm_maskz_roundscale_sd(U, A, B, I) \
6015 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6016 (__v2df)(__m128d)(B), \
6017 (__v2df)_mm_setzero_pd(), \
6018 (__mmask8)(U), (int)(I), \
6019 _MM_FROUND_CUR_DIRECTION))
6020
6021#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6022 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6023 (__v2df)(__m128d)(B), \
6024 (__v2df)_mm_setzero_pd(), \
6025 (__mmask8)(U), (int)(I), \
6026 (int)(R)))
6027
6028#define _mm_roundscale_round_ss(A, B, imm, R) \
6029 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6030 (__v4sf)(__m128)(B), \
6031 (__v4sf)_mm_setzero_ps(), \
6032 (__mmask8)-1, (int)(imm), \
6033 (int)(R)))
6034
6035#define _mm_roundscale_ss(A, B, imm) \
6036 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6037 (__v4sf)(__m128)(B), \
6038 (__v4sf)_mm_setzero_ps(), \
6039 (__mmask8)-1, (int)(imm), \
6040 _MM_FROUND_CUR_DIRECTION))
6041
6042#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6043 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6044 (__v4sf)(__m128)(B), \
6045 (__v4sf)(__m128)(W), \
6046 (__mmask8)(U), (int)(I), \
6047 _MM_FROUND_CUR_DIRECTION))
6048
6049#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6050 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6051 (__v4sf)(__m128)(B), \
6052 (__v4sf)(__m128)(W), \
6053 (__mmask8)(U), (int)(I), \
6054 (int)(R)))
6055
6056#define _mm_maskz_roundscale_ss(U, A, B, I) \
6057 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6058 (__v4sf)(__m128)(B), \
6059 (__v4sf)_mm_setzero_ps(), \
6060 (__mmask8)(U), (int)(I), \
6061 _MM_FROUND_CUR_DIRECTION))
6062
6063#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6064 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6065 (__v4sf)(__m128)(B), \
6066 (__v4sf)_mm_setzero_ps(), \
6067 (__mmask8)(U), (int)(I), \
6068 (int)(R)))
6069
6070#define _mm512_scalef_round_pd(A, B, R) \
6071 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6072 (__v8df)(__m512d)(B), \
6073 (__v8df)_mm512_undefined_pd(), \
6074 (__mmask8)-1, (int)(R)))
6075
6076#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6077 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6078 (__v8df)(__m512d)(B), \
6079 (__v8df)(__m512d)(W), \
6080 (__mmask8)(U), (int)(R)))
6081
6082#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6083 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6084 (__v8df)(__m512d)(B), \
6085 (__v8df)_mm512_setzero_pd(), \
6086 (__mmask8)(U), (int)(R)))
6087
6088static __inline__ __m512d __DEFAULT_FN_ATTRS512
6089_mm512_scalef_pd (__m512d __A, __m512d __B)
6090{
6091 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6092 (__v8df) __B,
6093 (__v8df)
6095 (__mmask8) -1,
6097}
6098
6099static __inline__ __m512d __DEFAULT_FN_ATTRS512
6100_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6101{
6102 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6103 (__v8df) __B,
6104 (__v8df) __W,
6105 (__mmask8) __U,
6107}
6108
6109static __inline__ __m512d __DEFAULT_FN_ATTRS512
6110_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6111{
6112 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6113 (__v8df) __B,
6114 (__v8df)
6116 (__mmask8) __U,
6118}
6119
6120#define _mm512_scalef_round_ps(A, B, R) \
6121 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6122 (__v16sf)(__m512)(B), \
6123 (__v16sf)_mm512_undefined_ps(), \
6124 (__mmask16)-1, (int)(R)))
6125
6126#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6127 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6128 (__v16sf)(__m512)(B), \
6129 (__v16sf)(__m512)(W), \
6130 (__mmask16)(U), (int)(R)))
6131
6132#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6133 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6134 (__v16sf)(__m512)(B), \
6135 (__v16sf)_mm512_setzero_ps(), \
6136 (__mmask16)(U), (int)(R)))
6137
6138static __inline__ __m512 __DEFAULT_FN_ATTRS512
6139_mm512_scalef_ps (__m512 __A, __m512 __B)
6140{
6141 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6142 (__v16sf) __B,
6143 (__v16sf)
6145 (__mmask16) -1,
6147}
6148
6149static __inline__ __m512 __DEFAULT_FN_ATTRS512
6150_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6151{
6152 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6153 (__v16sf) __B,
6154 (__v16sf) __W,
6155 (__mmask16) __U,
6157}
6158
6159static __inline__ __m512 __DEFAULT_FN_ATTRS512
6160_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6161{
6162 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6163 (__v16sf) __B,
6164 (__v16sf)
6166 (__mmask16) __U,
6168}
6169
6170#define _mm_scalef_round_sd(A, B, R) \
6171 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6172 (__v2df)(__m128d)(B), \
6173 (__v2df)_mm_setzero_pd(), \
6174 (__mmask8)-1, (int)(R)))
6175
6176static __inline__ __m128d __DEFAULT_FN_ATTRS128
6177_mm_scalef_sd (__m128d __A, __m128d __B)
6178{
6179 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6180 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6181 (__mmask8) -1,
6183}
6184
6185static __inline__ __m128d __DEFAULT_FN_ATTRS128
6186_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6187{
6188 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6189 (__v2df) __B,
6190 (__v2df) __W,
6191 (__mmask8) __U,
6193}
6194
6195#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6196 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6197 (__v2df)(__m128d)(B), \
6198 (__v2df)(__m128d)(W), \
6199 (__mmask8)(U), (int)(R)))
6200
6201static __inline__ __m128d __DEFAULT_FN_ATTRS128
6202_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6203{
6204 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6205 (__v2df) __B,
6206 (__v2df) _mm_setzero_pd (),
6207 (__mmask8) __U,
6209}
6210
6211#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6212 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6213 (__v2df)(__m128d)(B), \
6214 (__v2df)_mm_setzero_pd(), \
6215 (__mmask8)(U), (int)(R)))
6216
6217#define _mm_scalef_round_ss(A, B, R) \
6218 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6219 (__v4sf)(__m128)(B), \
6220 (__v4sf)_mm_setzero_ps(), \
6221 (__mmask8)-1, (int)(R)))
6222
6223static __inline__ __m128 __DEFAULT_FN_ATTRS128
6224_mm_scalef_ss (__m128 __A, __m128 __B)
6225{
6226 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6227 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6228 (__mmask8) -1,
6230}
6231
6232static __inline__ __m128 __DEFAULT_FN_ATTRS128
6233_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6234{
6235 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6236 (__v4sf) __B,
6237 (__v4sf) __W,
6238 (__mmask8) __U,
6240}
6241
6242#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6243 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6244 (__v4sf)(__m128)(B), \
6245 (__v4sf)(__m128)(W), \
6246 (__mmask8)(U), (int)(R)))
6247
6248static __inline__ __m128 __DEFAULT_FN_ATTRS128
6249_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6250{
6251 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6252 (__v4sf) __B,
6253 (__v4sf) _mm_setzero_ps (),
6254 (__mmask8) __U,
6256}
6257
6258#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6259 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6260 (__v4sf)(__m128)(B), \
6261 (__v4sf)_mm_setzero_ps(), \
6262 (__mmask8)(U), \
6263 (int)(R)))
6264
6265static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6266_mm512_srai_epi32(__m512i __A, unsigned int __B) {
6267 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B);
6268}
6269
6270static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6271_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
6272 unsigned int __B) {
6273 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6274 (__v16si)_mm512_srai_epi32(__A, __B),
6275 (__v16si)__W);
6276}
6277
6278static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6279_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
6280 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6281 (__v16si)_mm512_srai_epi32(__A, __B),
6282 (__v16si)_mm512_setzero_si512());
6283}
6284
6285static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6286_mm512_srai_epi64(__m512i __A, unsigned int __B) {
6287 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B);
6288}
6289
6290static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6291_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A,
6292 unsigned int __B) {
6293 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6294 (__v8di)_mm512_srai_epi64(__A, __B),
6295 (__v8di)__W);
6296}
6297
6298static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6299_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
6300 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6301 (__v8di)_mm512_srai_epi64(__A, __B),
6302 (__v8di)_mm512_setzero_si512());
6303}
6304
6305#define _mm512_shuffle_f32x4(A, B, imm) \
6306 ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6307 (__v16sf)(__m512)(B), (int)(imm)))
6308
6309#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6310 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6311 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6312 (__v16sf)(__m512)(W)))
6313
6314#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6315 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6316 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6317 (__v16sf)_mm512_setzero_ps()))
6318
6319#define _mm512_shuffle_f64x2(A, B, imm) \
6320 ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6321 (__v8df)(__m512d)(B), (int)(imm)))
6322
6323#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6324 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6325 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6326 (__v8df)(__m512d)(W)))
6327
6328#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6329 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6330 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6331 (__v8df)_mm512_setzero_pd()))
6332
6333#define _mm512_shuffle_i32x4(A, B, imm) \
6334 ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6335 (__v16si)(__m512i)(B), (int)(imm)))
6336
6337#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6338 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6339 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6340 (__v16si)(__m512i)(W)))
6341
6342#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6343 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6344 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6345 (__v16si)_mm512_setzero_si512()))
6346
6347#define _mm512_shuffle_i64x2(A, B, imm) \
6348 ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6349 (__v8di)(__m512i)(B), (int)(imm)))
6350
6351#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6352 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6353 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6354 (__v8di)(__m512i)(W)))
6355
6356#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6357 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6358 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6359 (__v8di)_mm512_setzero_si512()))
6360
6361#define _mm512_shuffle_pd(A, B, M) \
6362 ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6363 (__v8df)(__m512d)(B), (int)(M)))
6364
6365#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6366 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6367 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6368 (__v8df)(__m512d)(W)))
6369
6370#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6371 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6372 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6373 (__v8df)_mm512_setzero_pd()))
6374
6375#define _mm512_shuffle_ps(A, B, M) \
6376 ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6377 (__v16sf)(__m512)(B), (int)(M)))
6378
6379#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6380 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6381 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6382 (__v16sf)(__m512)(W)))
6383
6384#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6385 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6386 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6387 (__v16sf)_mm512_setzero_ps()))
6388
6389#define _mm_sqrt_round_sd(A, B, R) \
6390 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6391 (__v2df)(__m128d)(B), \
6392 (__v2df)_mm_setzero_pd(), \
6393 (__mmask8)-1, (int)(R)))
6394
6395static __inline__ __m128d __DEFAULT_FN_ATTRS128
6396_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6397{
6398 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6399 (__v2df) __B,
6400 (__v2df) __W,
6401 (__mmask8) __U,
6403}
6404
6405#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6406 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6407 (__v2df)(__m128d)(B), \
6408 (__v2df)(__m128d)(W), \
6409 (__mmask8)(U), (int)(R)))
6410
6411static __inline__ __m128d __DEFAULT_FN_ATTRS128
6412_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6413{
6414 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6415 (__v2df) __B,
6416 (__v2df) _mm_setzero_pd (),
6417 (__mmask8) __U,
6419}
6420
6421#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6422 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6423 (__v2df)(__m128d)(B), \
6424 (__v2df)_mm_setzero_pd(), \
6425 (__mmask8)(U), (int)(R)))
6426
6427#define _mm_sqrt_round_ss(A, B, R) \
6428 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6429 (__v4sf)(__m128)(B), \
6430 (__v4sf)_mm_setzero_ps(), \
6431 (__mmask8)-1, (int)(R)))
6432
6433static __inline__ __m128 __DEFAULT_FN_ATTRS128
6434_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6435{
6436 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6437 (__v4sf) __B,
6438 (__v4sf) __W,
6439 (__mmask8) __U,
6441}
6442
6443#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6444 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6445 (__v4sf)(__m128)(B), \
6446 (__v4sf)(__m128)(W), (__mmask8)(U), \
6447 (int)(R)))
6448
6449static __inline__ __m128 __DEFAULT_FN_ATTRS128
6450_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6451{
6452 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6453 (__v4sf) __B,
6454 (__v4sf) _mm_setzero_ps (),
6455 (__mmask8) __U,
6457}
6458
6459#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6460 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6461 (__v4sf)(__m128)(B), \
6462 (__v4sf)_mm_setzero_ps(), \
6463 (__mmask8)(U), (int)(R)))
6464
6465static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6467 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6468 0, 1, 2, 3, 0, 1, 2, 3,
6469 0, 1, 2, 3, 0, 1, 2, 3);
6470}
6471
6472static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6473_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) {
6474 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6475 (__v16sf)_mm512_broadcast_f32x4(__A),
6476 (__v16sf)__O);
6477}
6478
6479static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6481 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6482 (__v16sf)_mm512_broadcast_f32x4(__A),
6483 (__v16sf)_mm512_setzero_ps());
6484}
6485
6486static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6488 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6489 0, 1, 2, 3, 0, 1, 2, 3);
6490}
6491
6492static __inline__ __m512d __DEFAULT_FN_ATTRS512
6493_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
6494{
6495 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6496 (__v8df)_mm512_broadcast_f64x4(__A),
6497 (__v8df)__O);
6498}
6499
6500static __inline__ __m512d __DEFAULT_FN_ATTRS512
6502{
6503 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6504 (__v8df)_mm512_broadcast_f64x4(__A),
6505 (__v8df)_mm512_setzero_pd());
6506}
6507
6508static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6510 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6511 0, 1, 2, 3, 0, 1, 2, 3,
6512 0, 1, 2, 3, 0, 1, 2, 3);
6513}
6514
6515static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6516_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) {
6517 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6518 (__v16si)_mm512_broadcast_i32x4(__A),
6519 (__v16si)__O);
6520}
6521
6522static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6524 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6525 (__v16si)_mm512_broadcast_i32x4(__A),
6526 (__v16si)_mm512_setzero_si512());
6527}
6528
6529static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6531 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6532 0, 1, 2, 3, 0, 1, 2, 3);
6533}
6534
6535static __inline__ __m512i __DEFAULT_FN_ATTRS512
6536_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
6537{
6538 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6539 (__v8di)_mm512_broadcast_i64x4(__A),
6540 (__v8di)__O);
6541}
6542
6543static __inline__ __m512i __DEFAULT_FN_ATTRS512
6545{
6546 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6547 (__v8di)_mm512_broadcast_i64x4(__A),
6548 (__v8di)_mm512_setzero_si512());
6549}
6550
6551static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6552_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) {
6553 return (__m512d)__builtin_ia32_selectpd_512(__M,
6554 (__v8df) _mm512_broadcastsd_pd(__A),
6555 (__v8df) __O);
6556}
6557
6558static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6560 return (__m512d)__builtin_ia32_selectpd_512(__M,
6561 (__v8df) _mm512_broadcastsd_pd(__A),
6562 (__v8df) _mm512_setzero_pd());
6563}
6564
6565static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6566_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) {
6567 return (__m512)__builtin_ia32_selectps_512(__M,
6568 (__v16sf) _mm512_broadcastss_ps(__A),
6569 (__v16sf) __O);
6570}
6571
6572static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6574 return (__m512)__builtin_ia32_selectps_512(__M,
6575 (__v16sf) _mm512_broadcastss_ps(__A),
6576 (__v16sf) _mm512_setzero_ps());
6577}
6578
6579static __inline__ __m128i __DEFAULT_FN_ATTRS512
6581{
6582 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6583 (__v16qi) _mm_undefined_si128 (),
6584 (__mmask16) -1);
6585}
6586
6587static __inline__ __m128i __DEFAULT_FN_ATTRS512
6588_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6589{
6590 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6591 (__v16qi) __O, __M);
6592}
6593
6594static __inline__ __m128i __DEFAULT_FN_ATTRS512
6596{
6597 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6598 (__v16qi) _mm_setzero_si128 (),
6599 __M);
6600}
6601
6602static __inline__ void __DEFAULT_FN_ATTRS512
6604{
6605 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6606}
6607
6608static __inline__ __m256i __DEFAULT_FN_ATTRS512
6610{
6611 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6612 (__v16hi) _mm256_undefined_si256 (),
6613 (__mmask16) -1);
6614}
6615
6616static __inline__ __m256i __DEFAULT_FN_ATTRS512
6617_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6618{
6619 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6620 (__v16hi) __O, __M);
6621}
6622
6623static __inline__ __m256i __DEFAULT_FN_ATTRS512
6625{
6626 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6627 (__v16hi) _mm256_setzero_si256 (),
6628 __M);
6629}
6630
6631static __inline__ void __DEFAULT_FN_ATTRS512
6633{
6634 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6635}
6636
6637static __inline__ __m128i __DEFAULT_FN_ATTRS512
6639{
6640 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6641 (__v16qi) _mm_undefined_si128 (),
6642 (__mmask8) -1);
6643}
6644
6645static __inline__ __m128i __DEFAULT_FN_ATTRS512
6646_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6647{
6648 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6649 (__v16qi) __O, __M);
6650}
6651
6652static __inline__ __m128i __DEFAULT_FN_ATTRS512
6654{
6655 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6656 (__v16qi) _mm_setzero_si128 (),
6657 __M);
6658}
6659
6660static __inline__ void __DEFAULT_FN_ATTRS512
6662{
6663 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6664}
6665
6666static __inline__ __m256i __DEFAULT_FN_ATTRS512
6668{
6669 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6670 (__v8si) _mm256_undefined_si256 (),
6671 (__mmask8) -1);
6672}
6673
6674static __inline__ __m256i __DEFAULT_FN_ATTRS512
6675_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6676{
6677 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6678 (__v8si) __O, __M);
6679}
6680
6681static __inline__ __m256i __DEFAULT_FN_ATTRS512
6683{
6684 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6685 (__v8si) _mm256_setzero_si256 (),
6686 __M);
6687}
6688
6689static __inline__ void __DEFAULT_FN_ATTRS512
6691{
6692 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
6693}
6694
6695static __inline__ __m128i __DEFAULT_FN_ATTRS512
6697{
6698 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6699 (__v8hi) _mm_undefined_si128 (),
6700 (__mmask8) -1);
6701}
6702
6703static __inline__ __m128i __DEFAULT_FN_ATTRS512
6704_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6705{
6706 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6707 (__v8hi) __O, __M);
6708}
6709
6710static __inline__ __m128i __DEFAULT_FN_ATTRS512
6712{
6713 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
6714 (__v8hi) _mm_setzero_si128 (),
6715 __M);
6716}
6717
6718static __inline__ void __DEFAULT_FN_ATTRS512
6720{
6721 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
6722}
6723
6724static __inline__ __m128i __DEFAULT_FN_ATTRS512
6726{
6727 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6728 (__v16qi) _mm_undefined_si128 (),
6729 (__mmask16) -1);
6730}
6731
6732static __inline__ __m128i __DEFAULT_FN_ATTRS512
6733_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6734{
6735 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6736 (__v16qi) __O,
6737 __M);
6738}
6739
6740static __inline__ __m128i __DEFAULT_FN_ATTRS512
6742{
6743 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
6744 (__v16qi) _mm_setzero_si128 (),
6745 __M);
6746}
6747
6748static __inline__ void __DEFAULT_FN_ATTRS512
6750{
6751 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6752}
6753
6754static __inline__ __m256i __DEFAULT_FN_ATTRS512
6756{
6757 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6758 (__v16hi) _mm256_undefined_si256 (),
6759 (__mmask16) -1);
6760}
6761
6762static __inline__ __m256i __DEFAULT_FN_ATTRS512
6763_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6764{
6765 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6766 (__v16hi) __O,
6767 __M);
6768}
6769
6770static __inline__ __m256i __DEFAULT_FN_ATTRS512
6772{
6773 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
6774 (__v16hi) _mm256_setzero_si256 (),
6775 __M);
6776}
6777
6778static __inline__ void __DEFAULT_FN_ATTRS512
6780{
6781 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6782}
6783
6784static __inline__ __m128i __DEFAULT_FN_ATTRS512
6786{
6787 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6788 (__v16qi) _mm_undefined_si128 (),
6789 (__mmask8) -1);
6790}
6791
6792static __inline__ __m128i __DEFAULT_FN_ATTRS512
6793_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6794{
6795 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6796 (__v16qi) __O,
6797 __M);
6798}
6799
6800static __inline__ __m128i __DEFAULT_FN_ATTRS512
6802{
6803 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
6804 (__v16qi) _mm_setzero_si128 (),
6805 __M);
6806}
6807
6808static __inline__ void __DEFAULT_FN_ATTRS512
6810{
6811 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6812}
6813
6814static __inline__ __m256i __DEFAULT_FN_ATTRS512
6816{
6817 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6818 (__v8si) _mm256_undefined_si256 (),
6819 (__mmask8) -1);
6820}
6821
6822static __inline__ __m256i __DEFAULT_FN_ATTRS512
6823_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6824{
6825 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6826 (__v8si) __O, __M);
6827}
6828
6829static __inline__ __m256i __DEFAULT_FN_ATTRS512
6831{
6832 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
6833 (__v8si) _mm256_setzero_si256 (),
6834 __M);
6835}
6836
6837static __inline__ void __DEFAULT_FN_ATTRS512
6839{
6840 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
6841}
6842
6843static __inline__ __m128i __DEFAULT_FN_ATTRS512
6845{
6846 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6847 (__v8hi) _mm_undefined_si128 (),
6848 (__mmask8) -1);
6849}
6850
6851static __inline__ __m128i __DEFAULT_FN_ATTRS512
6852_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6853{
6854 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6855 (__v8hi) __O, __M);
6856}
6857
6858static __inline__ __m128i __DEFAULT_FN_ATTRS512
6860{
6861 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
6862 (__v8hi) _mm_setzero_si128 (),
6863 __M);
6864}
6865
6866static __inline__ void __DEFAULT_FN_ATTRS512
6868{
6869 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
6870}
6871
6872static __inline__ __m128i __DEFAULT_FN_ATTRS512
6874{
6875 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6876 (__v16qi) _mm_undefined_si128 (),
6877 (__mmask16) -1);
6878}
6879
6880static __inline__ __m128i __DEFAULT_FN_ATTRS512
6881_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6882{
6883 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6884 (__v16qi) __O, __M);
6885}
6886
6887static __inline__ __m128i __DEFAULT_FN_ATTRS512
6889{
6890 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
6891 (__v16qi) _mm_setzero_si128 (),
6892 __M);
6893}
6894
6895static __inline__ void __DEFAULT_FN_ATTRS512
6897{
6898 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6899}
6900
6901static __inline__ __m256i __DEFAULT_FN_ATTRS512
6903{
6904 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6905 (__v16hi) _mm256_undefined_si256 (),
6906 (__mmask16) -1);
6907}
6908
6909static __inline__ __m256i __DEFAULT_FN_ATTRS512
6910_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6911{
6912 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6913 (__v16hi) __O, __M);
6914}
6915
6916static __inline__ __m256i __DEFAULT_FN_ATTRS512
6918{
6919 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
6920 (__v16hi) _mm256_setzero_si256 (),
6921 __M);
6922}
6923
6924static __inline__ void __DEFAULT_FN_ATTRS512
6926{
6927 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
6928}
6929
6930static __inline__ __m128i __DEFAULT_FN_ATTRS512
6932{
6933 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6934 (__v16qi) _mm_undefined_si128 (),
6935 (__mmask8) -1);
6936}
6937
6938static __inline__ __m128i __DEFAULT_FN_ATTRS512
6939_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6940{
6941 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6942 (__v16qi) __O, __M);
6943}
6944
6945static __inline__ __m128i __DEFAULT_FN_ATTRS512
6947{
6948 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
6949 (__v16qi) _mm_setzero_si128 (),
6950 __M);
6951}
6952
6953static __inline__ void __DEFAULT_FN_ATTRS512
6955{
6956 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6957}
6958
6959static __inline__ __m256i __DEFAULT_FN_ATTRS512
6961{
6962 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6963 (__v8si) _mm256_undefined_si256 (),
6964 (__mmask8) -1);
6965}
6966
6967static __inline__ __m256i __DEFAULT_FN_ATTRS512
6968_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6969{
6970 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6971 (__v8si) __O, __M);
6972}
6973
6974static __inline__ __m256i __DEFAULT_FN_ATTRS512
6976{
6977 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
6978 (__v8si) _mm256_setzero_si256 (),
6979 __M);
6980}
6981
6982static __inline__ void __DEFAULT_FN_ATTRS512
6984{
6985 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
6986}
6987
6988static __inline__ __m128i __DEFAULT_FN_ATTRS512
6990{
6991 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
6992 (__v8hi) _mm_undefined_si128 (),
6993 (__mmask8) -1);
6994}
6995
6996static __inline__ __m128i __DEFAULT_FN_ATTRS512
6997_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
6998{
6999 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7000 (__v8hi) __O, __M);
7001}
7002
7003static __inline__ __m128i __DEFAULT_FN_ATTRS512
7005{
7006 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7007 (__v8hi) _mm_setzero_si128 (),
7008 __M);
7009}
7010
7011static __inline__ void __DEFAULT_FN_ATTRS512
7013{
7014 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7015}
7016
7017#define _mm512_extracti32x4_epi32(A, imm) \
7018 ((__m128i)__builtin_ia32_extracti32x4_mask( \
7019 (__v16si)(__m512i)(A), (int)(imm), (__v4si)_mm_setzero_si128(), \
7020 (__mmask8) - 1))
7021
7022#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7023 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7024 (__v4si)(__m128i)(W), \
7025 (__mmask8)(U)))
7026
7027#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7028 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7029 (__v4si)_mm_setzero_si128(), \
7030 (__mmask8)(U)))
7031
7032#define _mm512_extracti64x4_epi64(A, imm) \
7033 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7034 (__v4di)_mm256_setzero_si256(), \
7035 (__mmask8) - 1))
7036
7037#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7038 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7039 (__v4di)(__m256i)(W), \
7040 (__mmask8)(U)))
7041
7042#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7043 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7044 (__v4di)_mm256_setzero_si256(), \
7045 (__mmask8)(U)))
7046
7047#define _mm512_insertf64x4(A, B, imm) \
7048 ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7049 (__v4df)(__m256d)(B), (int)(imm)))
7050
7051#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7052 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7053 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7054 (__v8df)(__m512d)(W)))
7055
7056#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7057 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7058 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7059 (__v8df)_mm512_setzero_pd()))
7060
7061#define _mm512_inserti64x4(A, B, imm) \
7062 ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7063 (__v4di)(__m256i)(B), (int)(imm)))
7064
7065#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7066 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7067 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7068 (__v8di)(__m512i)(W)))
7069
7070#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7071 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7072 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7073 (__v8di)_mm512_setzero_si512()))
7074
7075#define _mm512_insertf32x4(A, B, imm) \
7076 ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7077 (__v4sf)(__m128)(B), (int)(imm)))
7078
7079#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7080 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7081 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7082 (__v16sf)(__m512)(W)))
7083
7084#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7085 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7086 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7087 (__v16sf)_mm512_setzero_ps()))
7088
7089#define _mm512_inserti32x4(A, B, imm) \
7090 ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7091 (__v4si)(__m128i)(B), (int)(imm)))
7092
7093#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7094 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7095 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7096 (__v16si)(__m512i)(W)))
7097
7098#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7099 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7100 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7101 (__v16si)_mm512_setzero_si512()))
7102
7103#define _mm512_getmant_round_pd(A, B, C, R) \
7104 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7105 (int)(((C)<<2) | (B)), \
7106 (__v8df)_mm512_undefined_pd(), \
7107 (__mmask8)-1, (int)(R)))
7108
7109#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7110 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7111 (int)(((C)<<2) | (B)), \
7112 (__v8df)(__m512d)(W), \
7113 (__mmask8)(U), (int)(R)))
7114
7115#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7116 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7117 (int)(((C)<<2) | (B)), \
7118 (__v8df)_mm512_setzero_pd(), \
7119 (__mmask8)(U), (int)(R)))
7120
7121#define _mm512_getmant_pd(A, B, C) \
7122 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7123 (int)(((C)<<2) | (B)), \
7124 (__v8df)_mm512_setzero_pd(), \
7125 (__mmask8)-1, \
7126 _MM_FROUND_CUR_DIRECTION))
7127
7128#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7129 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7130 (int)(((C)<<2) | (B)), \
7131 (__v8df)(__m512d)(W), \
7132 (__mmask8)(U), \
7133 _MM_FROUND_CUR_DIRECTION))
7134
7135#define _mm512_maskz_getmant_pd(U, A, B, C) \
7136 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7137 (int)(((C)<<2) | (B)), \
7138 (__v8df)_mm512_setzero_pd(), \
7139 (__mmask8)(U), \
7140 _MM_FROUND_CUR_DIRECTION))
7141
7142#define _mm512_getmant_round_ps(A, B, C, R) \
7143 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7144 (int)(((C)<<2) | (B)), \
7145 (__v16sf)_mm512_undefined_ps(), \
7146 (__mmask16)-1, (int)(R)))
7147
7148#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7149 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7150 (int)(((C)<<2) | (B)), \
7151 (__v16sf)(__m512)(W), \
7152 (__mmask16)(U), (int)(R)))
7153
7154#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7155 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7156 (int)(((C)<<2) | (B)), \
7157 (__v16sf)_mm512_setzero_ps(), \
7158 (__mmask16)(U), (int)(R)))
7159
7160#define _mm512_getmant_ps(A, B, C) \
7161 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7162 (int)(((C)<<2)|(B)), \
7163 (__v16sf)_mm512_undefined_ps(), \
7164 (__mmask16)-1, \
7165 _MM_FROUND_CUR_DIRECTION))
7166
7167#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7168 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7169 (int)(((C)<<2)|(B)), \
7170 (__v16sf)(__m512)(W), \
7171 (__mmask16)(U), \
7172 _MM_FROUND_CUR_DIRECTION))
7173
7174#define _mm512_maskz_getmant_ps(U, A, B, C) \
7175 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7176 (int)(((C)<<2)|(B)), \
7177 (__v16sf)_mm512_setzero_ps(), \
7178 (__mmask16)(U), \
7179 _MM_FROUND_CUR_DIRECTION))
7180
7181#define _mm512_getexp_round_pd(A, R) \
7182 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7183 (__v8df)_mm512_undefined_pd(), \
7184 (__mmask8)-1, (int)(R)))
7185
7186#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7187 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7188 (__v8df)(__m512d)(W), \
7189 (__mmask8)(U), (int)(R)))
7190
7191#define _mm512_maskz_getexp_round_pd(U, A, R) \
7192 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7193 (__v8df)_mm512_setzero_pd(), \
7194 (__mmask8)(U), (int)(R)))
7195
7196static __inline__ __m512d __DEFAULT_FN_ATTRS512
7197_mm512_getexp_pd (__m512d __A)
7198{
7199 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7200 (__v8df) _mm512_undefined_pd (),
7201 (__mmask8) -1,
7203}
7204
7205static __inline__ __m512d __DEFAULT_FN_ATTRS512
7206_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7207{
7208 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7209 (__v8df) __W,
7210 (__mmask8) __U,
7212}
7213
7214static __inline__ __m512d __DEFAULT_FN_ATTRS512
7216{
7217 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7218 (__v8df) _mm512_setzero_pd (),
7219 (__mmask8) __U,
7221}
7222
7223#define _mm512_getexp_round_ps(A, R) \
7224 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7225 (__v16sf)_mm512_undefined_ps(), \
7226 (__mmask16)-1, (int)(R)))
7227
7228#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7229 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7230 (__v16sf)(__m512)(W), \
7231 (__mmask16)(U), (int)(R)))
7232
7233#define _mm512_maskz_getexp_round_ps(U, A, R) \
7234 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7235 (__v16sf)_mm512_setzero_ps(), \
7236 (__mmask16)(U), (int)(R)))
7237
7238static __inline__ __m512 __DEFAULT_FN_ATTRS512
7240{
7241 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7242 (__v16sf) _mm512_undefined_ps (),
7243 (__mmask16) -1,
7245}
7246
7247static __inline__ __m512 __DEFAULT_FN_ATTRS512
7248_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7249{
7250 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7251 (__v16sf) __W,
7252 (__mmask16) __U,
7254}
7255
7256static __inline__ __m512 __DEFAULT_FN_ATTRS512
7258{
7259 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7260 (__v16sf) _mm512_setzero_ps (),
7261 (__mmask16) __U,
7263}
7264
7265#define _mm512_i64gather_ps(index, addr, scale) \
7266 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7267 (void const *)(addr), \
7268 (__v8di)(__m512i)(index), (__mmask8)-1, \
7269 (int)(scale)))
7270
7271#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7272 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7273 (void const *)(addr), \
7274 (__v8di)(__m512i)(index), \
7275 (__mmask8)(mask), (int)(scale)))
7276
7277#define _mm512_i64gather_epi32(index, addr, scale) \
7278 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7279 (void const *)(addr), \
7280 (__v8di)(__m512i)(index), \
7281 (__mmask8)-1, (int)(scale)))
7282
7283#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7284 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7285 (void const *)(addr), \
7286 (__v8di)(__m512i)(index), \
7287 (__mmask8)(mask), (int)(scale)))
7288
7289#define _mm512_i64gather_pd(index, addr, scale) \
7290 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7291 (void const *)(addr), \
7292 (__v8di)(__m512i)(index), (__mmask8)-1, \
7293 (int)(scale)))
7294
7295#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7296 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7297 (void const *)(addr), \
7298 (__v8di)(__m512i)(index), \
7299 (__mmask8)(mask), (int)(scale)))
7300
7301#define _mm512_i64gather_epi64(index, addr, scale) \
7302 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7303 (void const *)(addr), \
7304 (__v8di)(__m512i)(index), (__mmask8)-1, \
7305 (int)(scale)))
7306
7307#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7308 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7309 (void const *)(addr), \
7310 (__v8di)(__m512i)(index), \
7311 (__mmask8)(mask), (int)(scale)))
7312
7313#define _mm512_i32gather_ps(index, addr, scale) \
7314 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7315 (void const *)(addr), \
7316 (__v16si)(__m512)(index), \
7317 (__mmask16)-1, (int)(scale)))
7318
7319#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7320 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7321 (void const *)(addr), \
7322 (__v16si)(__m512)(index), \
7323 (__mmask16)(mask), (int)(scale)))
7324
7325#define _mm512_i32gather_epi32(index, addr, scale) \
7326 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7327 (void const *)(addr), \
7328 (__v16si)(__m512i)(index), \
7329 (__mmask16)-1, (int)(scale)))
7330
7331#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7332 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7333 (void const *)(addr), \
7334 (__v16si)(__m512i)(index), \
7335 (__mmask16)(mask), (int)(scale)))
7336
7337#define _mm512_i32gather_pd(index, addr, scale) \
7338 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7339 (void const *)(addr), \
7340 (__v8si)(__m256i)(index), (__mmask8)-1, \
7341 (int)(scale)))
7342
7343#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7344 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7345 (void const *)(addr), \
7346 (__v8si)(__m256i)(index), \
7347 (__mmask8)(mask), (int)(scale)))
7348
7349#define _mm512_i32gather_epi64(index, addr, scale) \
7350 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7351 (void const *)(addr), \
7352 (__v8si)(__m256i)(index), (__mmask8)-1, \
7353 (int)(scale)))
7354
7355#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7356 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7357 (void const *)(addr), \
7358 (__v8si)(__m256i)(index), \
7359 (__mmask8)(mask), (int)(scale)))
7360
7361#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7362 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7363 (__v8di)(__m512i)(index), \
7364 (__v8sf)(__m256)(v1), (int)(scale))
7365
7366#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7367 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7368 (__v8di)(__m512i)(index), \
7369 (__v8sf)(__m256)(v1), (int)(scale))
7370
7371#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7372 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7373 (__v8di)(__m512i)(index), \
7374 (__v8si)(__m256i)(v1), (int)(scale))
7375
7376#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7377 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7378 (__v8di)(__m512i)(index), \
7379 (__v8si)(__m256i)(v1), (int)(scale))
7380
7381#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7382 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7383 (__v8di)(__m512i)(index), \
7384 (__v8df)(__m512d)(v1), (int)(scale))
7385
7386#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7387 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7388 (__v8di)(__m512i)(index), \
7389 (__v8df)(__m512d)(v1), (int)(scale))
7390
7391#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7392 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7393 (__v8di)(__m512i)(index), \
7394 (__v8di)(__m512i)(v1), (int)(scale))
7395
7396#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7397 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7398 (__v8di)(__m512i)(index), \
7399 (__v8di)(__m512i)(v1), (int)(scale))
7400
7401#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7402 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7403 (__v16si)(__m512i)(index), \
7404 (__v16sf)(__m512)(v1), (int)(scale))
7405
7406#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7407 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7408 (__v16si)(__m512i)(index), \
7409 (__v16sf)(__m512)(v1), (int)(scale))
7410
7411#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7412 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7413 (__v16si)(__m512i)(index), \
7414 (__v16si)(__m512i)(v1), (int)(scale))
7415
7416#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7417 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7418 (__v16si)(__m512i)(index), \
7419 (__v16si)(__m512i)(v1), (int)(scale))
7420
7421#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7422 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7423 (__v8si)(__m256i)(index), \
7424 (__v8df)(__m512d)(v1), (int)(scale))
7425
7426#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7427 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7428 (__v8si)(__m256i)(index), \
7429 (__v8df)(__m512d)(v1), (int)(scale))
7430
7431#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7432 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7433 (__v8si)(__m256i)(index), \
7434 (__v8di)(__m512i)(v1), (int)(scale))
7435
7436#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7437 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7438 (__v8si)(__m256i)(index), \
7439 (__v8di)(__m512i)(v1), (int)(scale))
7440
7441static __inline__ __m128 __DEFAULT_FN_ATTRS128
7442_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7443{
7444 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7445 (__v4sf)__A,
7446 (__v4sf)__B,
7447 (__mmask8)__U,
7449}
7450
7451#define _mm_fmadd_round_ss(A, B, C, R) \
7452 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7453 (__v4sf)(__m128)(B), \
7454 (__v4sf)(__m128)(C), (__mmask8)-1, \
7455 (int)(R)))
7456
7457#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7458 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7459 (__v4sf)(__m128)(A), \
7460 (__v4sf)(__m128)(B), (__mmask8)(U), \
7461 (int)(R)))
7462
7463static __inline__ __m128 __DEFAULT_FN_ATTRS128
7464_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7465{
7466 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7467 (__v4sf)__B,
7468 (__v4sf)__C,
7469 (__mmask8)__U,
7471}
7472
7473#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7474 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7475 (__v4sf)(__m128)(B), \
7476 (__v4sf)(__m128)(C), (__mmask8)(U), \
7477 (int)(R)))
7478
7479static __inline__ __m128 __DEFAULT_FN_ATTRS128
7480_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7481{
7482 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7483 (__v4sf)__X,
7484 (__v4sf)__Y,
7485 (__mmask8)__U,
7487}
7488
7489#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7490 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7491 (__v4sf)(__m128)(X), \
7492 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7493 (int)(R)))
7494
7495static __inline__ __m128 __DEFAULT_FN_ATTRS128
7496_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7497{
7498 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7499 (__v4sf)__A,
7500 -(__v4sf)__B,
7501 (__mmask8)__U,
7503}
7504
7505#define _mm_fmsub_round_ss(A, B, C, R) \
7506 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7507 (__v4sf)(__m128)(B), \
7508 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7509 (int)(R)))
7510
7511#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7512 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7513 (__v4sf)(__m128)(A), \
7514 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7515 (int)(R)))
7516
7517static __inline__ __m128 __DEFAULT_FN_ATTRS128
7518_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7519{
7520 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7521 (__v4sf)__B,
7522 -(__v4sf)__C,
7523 (__mmask8)__U,
7525}
7526
7527#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7528 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7529 (__v4sf)(__m128)(B), \
7530 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7531 (int)(R)))
7532
7533static __inline__ __m128 __DEFAULT_FN_ATTRS128
7534_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7535{
7536 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7537 (__v4sf)__X,
7538 (__v4sf)__Y,
7539 (__mmask8)__U,
7541}
7542
7543#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7544 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7545 (__v4sf)(__m128)(X), \
7546 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7547 (int)(R)))
7548
7549static __inline__ __m128 __DEFAULT_FN_ATTRS128
7550_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7551{
7552 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7553 -(__v4sf)__A,
7554 (__v4sf)__B,
7555 (__mmask8)__U,
7557}
7558
7559#define _mm_fnmadd_round_ss(A, B, C, R) \
7560 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7561 -(__v4sf)(__m128)(B), \
7562 (__v4sf)(__m128)(C), (__mmask8)-1, \
7563 (int)(R)))
7564
7565#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7566 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7567 -(__v4sf)(__m128)(A), \
7568 (__v4sf)(__m128)(B), (__mmask8)(U), \
7569 (int)(R)))
7570
7571static __inline__ __m128 __DEFAULT_FN_ATTRS128
7572_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7573{
7574 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7575 -(__v4sf)__B,
7576 (__v4sf)__C,
7577 (__mmask8)__U,
7579}
7580
7581#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7582 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7583 -(__v4sf)(__m128)(B), \
7584 (__v4sf)(__m128)(C), (__mmask8)(U), \
7585 (int)(R)))
7586
7587static __inline__ __m128 __DEFAULT_FN_ATTRS128
7588_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7589{
7590 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7591 -(__v4sf)__X,
7592 (__v4sf)__Y,
7593 (__mmask8)__U,
7595}
7596
7597#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7598 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7599 -(__v4sf)(__m128)(X), \
7600 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7601 (int)(R)))
7602
7603static __inline__ __m128 __DEFAULT_FN_ATTRS128
7604_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7605{
7606 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7607 -(__v4sf)__A,
7608 -(__v4sf)__B,
7609 (__mmask8)__U,
7611}
7612
7613#define _mm_fnmsub_round_ss(A, B, C, R) \
7614 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7615 -(__v4sf)(__m128)(B), \
7616 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7617 (int)(R)))
7618
7619#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7620 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7621 -(__v4sf)(__m128)(A), \
7622 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7623 (int)(R)))
7624
7625static __inline__ __m128 __DEFAULT_FN_ATTRS128
7626_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7627{
7628 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7629 -(__v4sf)__B,
7630 -(__v4sf)__C,
7631 (__mmask8)__U,
7633}
7634
7635#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
7636 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7637 -(__v4sf)(__m128)(B), \
7638 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7639 (int)(R)))
7640
7641static __inline__ __m128 __DEFAULT_FN_ATTRS128
7642_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7643{
7644 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7645 -(__v4sf)__X,
7646 (__v4sf)__Y,
7647 (__mmask8)__U,
7649}
7650
7651#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
7652 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7653 -(__v4sf)(__m128)(X), \
7654 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7655 (int)(R)))
7656
7657static __inline__ __m128d __DEFAULT_FN_ATTRS128
7658_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7659{
7660 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7661 (__v2df)__A,
7662 (__v2df)__B,
7663 (__mmask8)__U,
7665}
7666
7667#define _mm_fmadd_round_sd(A, B, C, R) \
7668 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7669 (__v2df)(__m128d)(B), \
7670 (__v2df)(__m128d)(C), (__mmask8)-1, \
7671 (int)(R)))
7672
7673#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
7674 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7675 (__v2df)(__m128d)(A), \
7676 (__v2df)(__m128d)(B), (__mmask8)(U), \
7677 (int)(R)))
7678
7679static __inline__ __m128d __DEFAULT_FN_ATTRS128
7680_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7681{
7682 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7683 (__v2df)__B,
7684 (__v2df)__C,
7685 (__mmask8)__U,
7687}
7688
7689#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
7690 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7691 (__v2df)(__m128d)(B), \
7692 (__v2df)(__m128d)(C), (__mmask8)(U), \
7693 (int)(R)))
7694
7695static __inline__ __m128d __DEFAULT_FN_ATTRS128
7696_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7697{
7698 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7699 (__v2df)__X,
7700 (__v2df)__Y,
7701 (__mmask8)__U,
7703}
7704
7705#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
7706 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7707 (__v2df)(__m128d)(X), \
7708 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7709 (int)(R)))
7710
7711static __inline__ __m128d __DEFAULT_FN_ATTRS128
7712_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7713{
7714 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7715 (__v2df)__A,
7716 -(__v2df)__B,
7717 (__mmask8)__U,
7719}
7720
7721#define _mm_fmsub_round_sd(A, B, C, R) \
7722 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7723 (__v2df)(__m128d)(B), \
7724 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7725 (int)(R)))
7726
7727#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
7728 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7729 (__v2df)(__m128d)(A), \
7730 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7731 (int)(R)))
7732
7733static __inline__ __m128d __DEFAULT_FN_ATTRS128
7734_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7735{
7736 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7737 (__v2df)__B,
7738 -(__v2df)__C,
7739 (__mmask8)__U,
7741}
7742
7743#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
7744 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7745 (__v2df)(__m128d)(B), \
7746 -(__v2df)(__m128d)(C), \
7747 (__mmask8)(U), (int)(R)))
7748
7749static __inline__ __m128d __DEFAULT_FN_ATTRS128
7750_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7751{
7752 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7753 (__v2df)__X,
7754 (__v2df)__Y,
7755 (__mmask8)__U,
7757}
7758
7759#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
7760 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7761 (__v2df)(__m128d)(X), \
7762 (__v2df)(__m128d)(Y), \
7763 (__mmask8)(U), (int)(R)))
7764
7765static __inline__ __m128d __DEFAULT_FN_ATTRS128
7766_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7767{
7768 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7769 -(__v2df)__A,
7770 (__v2df)__B,
7771 (__mmask8)__U,
7773}
7774
7775#define _mm_fnmadd_round_sd(A, B, C, R) \
7776 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7777 -(__v2df)(__m128d)(B), \
7778 (__v2df)(__m128d)(C), (__mmask8)-1, \
7779 (int)(R)))
7780
7781#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
7782 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7783 -(__v2df)(__m128d)(A), \
7784 (__v2df)(__m128d)(B), (__mmask8)(U), \
7785 (int)(R)))
7786
7787static __inline__ __m128d __DEFAULT_FN_ATTRS128
7788_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7789{
7790 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7791 -(__v2df)__B,
7792 (__v2df)__C,
7793 (__mmask8)__U,
7795}
7796
7797#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
7798 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7799 -(__v2df)(__m128d)(B), \
7800 (__v2df)(__m128d)(C), (__mmask8)(U), \
7801 (int)(R)))
7802
7803static __inline__ __m128d __DEFAULT_FN_ATTRS128
7804_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7805{
7806 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
7807 -(__v2df)__X,
7808 (__v2df)__Y,
7809 (__mmask8)__U,
7811}
7812
7813#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
7814 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
7815 -(__v2df)(__m128d)(X), \
7816 (__v2df)(__m128d)(Y), (__mmask8)(U), \
7817 (int)(R)))
7818
7819static __inline__ __m128d __DEFAULT_FN_ATTRS128
7820_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7821{
7822 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7823 -(__v2df)__A,
7824 -(__v2df)__B,
7825 (__mmask8)__U,
7827}
7828
7829#define _mm_fnmsub_round_sd(A, B, C, R) \
7830 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7831 -(__v2df)(__m128d)(B), \
7832 -(__v2df)(__m128d)(C), (__mmask8)-1, \
7833 (int)(R)))
7834
7835#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
7836 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7837 -(__v2df)(__m128d)(A), \
7838 -(__v2df)(__m128d)(B), (__mmask8)(U), \
7839 (int)(R)))
7840
7841static __inline__ __m128d __DEFAULT_FN_ATTRS128
7842_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7843{
7844 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7845 -(__v2df)__B,
7846 -(__v2df)__C,
7847 (__mmask8)__U,
7849}
7850
7851#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
7852 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
7853 -(__v2df)(__m128d)(B), \
7854 -(__v2df)(__m128d)(C), \
7855 (__mmask8)(U), \
7856 (int)(R)))
7857
7858static __inline__ __m128d __DEFAULT_FN_ATTRS128
7859_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
7860{
7861 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
7862 -(__v2df)__X,
7863 (__v2df)__Y,
7864 (__mmask8)__U,
7866}
7867
7868#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
7869 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
7870 -(__v2df)(__m128d)(X), \
7871 (__v2df)(__m128d)(Y), \
7872 (__mmask8)(U), (int)(R)))
7873
7874#define _mm512_permutex_pd(X, C) \
7875 ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
7876
7877#define _mm512_mask_permutex_pd(W, U, X, C) \
7878 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7879 (__v8df)_mm512_permutex_pd((X), (C)), \
7880 (__v8df)(__m512d)(W)))
7881
7882#define _mm512_maskz_permutex_pd(U, X, C) \
7883 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7884 (__v8df)_mm512_permutex_pd((X), (C)), \
7885 (__v8df)_mm512_setzero_pd()))
7886
7887#define _mm512_permutex_epi64(X, C) \
7888 ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
7889
7890#define _mm512_mask_permutex_epi64(W, U, X, C) \
7891 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7892 (__v8di)_mm512_permutex_epi64((X), (C)), \
7893 (__v8di)(__m512i)(W)))
7894
7895#define _mm512_maskz_permutex_epi64(U, X, C) \
7896 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7897 (__v8di)_mm512_permutex_epi64((X), (C)), \
7898 (__v8di)_mm512_setzero_si512()))
7899
7900static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
7901_mm512_permutexvar_pd(__m512i __X, __m512d __Y) {
7902 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
7903}
7904
7905static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
7906_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X,
7907 __m512d __Y) {
7908 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
7909 (__v8df)_mm512_permutexvar_pd(__X, __Y),
7910 (__v8df)__W);
7911}
7912
7913static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
7914_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) {
7915 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
7916 (__v8df)_mm512_permutexvar_pd(__X, __Y),
7917 (__v8df)_mm512_setzero_pd());
7918}
7919
7920static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7921_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) {
7922 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
7923}
7924
7925static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7926_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) {
7927 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7928 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
7929 (__v8di)_mm512_setzero_si512());
7930}
7931
7932static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7933_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X,
7934 __m512i __Y) {
7935 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7936 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
7937 (__v8di)__W);
7938}
7939
7940static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
7941_mm512_permutexvar_ps(__m512i __X, __m512 __Y) {
7942 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
7943}
7944
7945static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
7946_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) {
7947 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
7948 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
7949 (__v16sf)__W);
7950}
7951
7952static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
7953_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) {
7954 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
7955 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
7956 (__v16sf)_mm512_setzero_ps());
7957}
7958
7959static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7960_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) {
7961 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
7962}
7963
7964#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
7965
7966static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7968 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7969 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
7970 (__v16si)_mm512_setzero_si512());
7971}
7972
7973static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
7974_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X,
7975 __m512i __Y) {
7976 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7977 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
7978 (__v16si)__W);
7979}
7980
7981#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
7982
7983static __inline__ __mmask16
7985 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
7986}
7987
7990 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
7991}
7992
7995 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
7996}
7997
7998static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
8000 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8001}
8002
8003static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
8005 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8006}
8007
8008static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
8010 return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8011}
8012
8013static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
8015 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8016}
8017
8018static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
8019_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8020 *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8021 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8022}
8023
8026 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8027}
8028
8031 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8032}
8033
8036 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8037}
8038
8039#define _kand_mask16 _mm512_kand
8040#define _kandn_mask16 _mm512_kandn
8041#define _knot_mask16 _mm512_knot
8042#define _kor_mask16 _mm512_kor
8043#define _kxnor_mask16 _mm512_kxnor
8044#define _kxor_mask16 _mm512_kxor
8045
8046#define _kshiftli_mask16(A, I) \
8047 ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
8048
8049#define _kshiftri_mask16(A, I) \
8050 ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
8051
8052static __inline__ unsigned int
8054 return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8055}
8056
8058_cvtu32_mask16(unsigned int __A) {
8059 return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8060}
8061
8062static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8064 return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8065}
8066
8067static __inline__ void __DEFAULT_FN_ATTRS
8069 *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8070}
8071
8072static __inline__ void __DEFAULT_FN_ATTRS512
8073_mm512_stream_si512 (void * __P, __m512i __A)
8074{
8075 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8076 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8077}
8078
8079static __inline__ __m512i __DEFAULT_FN_ATTRS512
8081{
8082 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8083 return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8084}
8085
8086static __inline__ void __DEFAULT_FN_ATTRS512
8087_mm512_stream_pd (void *__P, __m512d __A)
8088{
8089 typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8090 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8091}
8092
8093static __inline__ void __DEFAULT_FN_ATTRS512
8094_mm512_stream_ps (void *__P, __m512 __A)
8095{
8096 typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8097 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8098}
8099
8100static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8101_mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A) {
8102 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8103 (__v8df) __W,
8104 (__mmask8) __U);
8105}
8106
8107static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8109 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8110 (__v8df)
8112 (__mmask8) __U);
8113}
8114
8115static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8116_mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
8117 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8118 (__v8di) __W,
8119 (__mmask8) __U);
8120}
8121
8122static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8124 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8125 (__v8di)
8127 (__mmask8) __U);
8128}
8129
8130static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8131_mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8132 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8133 (__v16sf) __W,
8134 (__mmask16) __U);
8135}
8136
8137static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8139 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8140 (__v16sf)
8142 (__mmask16) __U);
8143}
8144
8145static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8146_mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
8147 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8148 (__v16si) __W,
8149 (__mmask16) __U);
8150}
8151
8152static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8154 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8155 (__v16si)
8157 (__mmask16) __U);
8158}
8159
8160#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8161 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8162 (__v4sf)(__m128)(Y), (int)(P), \
8163 (__mmask8)-1, (int)(R)))
8164
8165#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8166 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8167 (__v4sf)(__m128)(Y), (int)(P), \
8168 (__mmask8)(M), (int)(R)))
8169
8170#define _mm_cmp_ss_mask(X, Y, P) \
8171 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8172 (__v4sf)(__m128)(Y), (int)(P), \
8173 (__mmask8)-1, \
8174 _MM_FROUND_CUR_DIRECTION))
8175
8176#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8177 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8178 (__v4sf)(__m128)(Y), (int)(P), \
8179 (__mmask8)(M), \
8180 _MM_FROUND_CUR_DIRECTION))
8181
8182#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8183 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8184 (__v2df)(__m128d)(Y), (int)(P), \
8185 (__mmask8)-1, (int)(R)))
8186
8187#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8188 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8189 (__v2df)(__m128d)(Y), (int)(P), \
8190 (__mmask8)(M), (int)(R)))
8191
8192#define _mm_cmp_sd_mask(X, Y, P) \
8193 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8194 (__v2df)(__m128d)(Y), (int)(P), \
8195 (__mmask8)-1, \
8196 _MM_FROUND_CUR_DIRECTION))
8197
8198#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8199 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8200 (__v2df)(__m128d)(Y), (int)(P), \
8201 (__mmask8)(M), \
8202 _MM_FROUND_CUR_DIRECTION))
8203
8204/* Bit Test */
8205
8207_mm512_test_epi32_mask(__m512i __A, __m512i __B) {
8210}
8211
8213_mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B) {
8214 return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8216}
8217
8219_mm512_test_epi64_mask(__m512i __A, __m512i __B) {
8220 return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8222}
8223
8225_mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B) {
8226 return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8228}
8229
8231_mm512_testn_epi32_mask(__m512i __A, __m512i __B) {
8232 return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8234}
8235
8237_mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B) {
8238 return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8240}
8241
8243_mm512_testn_epi64_mask(__m512i __A, __m512i __B) {
8244 return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8246}
8247
8249_mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B) {
8250 return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8252}
8253
8254static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8256{
8257 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8258 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8259}
8260
8261static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8262_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8263 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8264 (__v16sf)_mm512_movehdup_ps(__A),
8265 (__v16sf)__W);
8266}
8267
8268static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8270 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8271 (__v16sf)_mm512_movehdup_ps(__A),
8272 (__v16sf)_mm512_setzero_ps());
8273}
8274
8275static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8277{
8278 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8279 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8280}
8281
8282static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8283_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8284 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8285 (__v16sf)_mm512_moveldup_ps(__A),
8286 (__v16sf)__W);
8287}
8288
8289static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8291 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8292 (__v16sf)_mm512_moveldup_ps(__A),
8293 (__v16sf)_mm512_setzero_ps());
8294}
8295
8296static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8297_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
8298 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8299}
8300
8301static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8302_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
8303 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8304 _mm_setzero_ps());
8305}
8306
8307static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8308_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
8309 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8310}
8311
8312static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8313_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
8314 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8315 _mm_setzero_pd());
8316}
8317
8318static __inline__ void __DEFAULT_FN_ATTRS128
8319_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8320{
8321 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8322}
8323
8324static __inline__ void __DEFAULT_FN_ATTRS128
8325_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8326{
8327 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8328}
8329
8330static __inline__ __m128 __DEFAULT_FN_ATTRS128
8331_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8332{
8333 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8334 (__v4sf)_mm_setzero_ps(),
8335 0, 4, 4, 4);
8336
8337 return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
8338}
8339
8340static __inline__ __m128 __DEFAULT_FN_ATTRS128
8341_mm_maskz_load_ss (__mmask8 __U, const float* __A)
8342{
8343 return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
8344 (__v4sf) _mm_setzero_ps(),
8345 __U & 1);
8346}
8347
8348static __inline__ __m128d __DEFAULT_FN_ATTRS128
8349_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8350{
8351 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8352 (__v2df)_mm_setzero_pd(),
8353 0, 2);
8354
8355 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
8356}
8357
8358static __inline__ __m128d __DEFAULT_FN_ATTRS128
8359_mm_maskz_load_sd (__mmask8 __U, const double* __A)
8360{
8361 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
8362 (__v2df) _mm_setzero_pd(),
8363 __U & 1);
8364}
8365
8366#define _mm512_shuffle_epi32(A, I) \
8367 ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
8368
8369#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8370 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8371 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8372 (__v16si)(__m512i)(W)))
8373
8374#define _mm512_maskz_shuffle_epi32(U, A, I) \
8375 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8376 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8377 (__v16si)_mm512_setzero_si512()))
8378
8379static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8380_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8381{
8382 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8383 (__v8df) __W,
8384 (__mmask8) __U);
8385}
8386
8387static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8389{
8390 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8391 (__v8df) _mm512_setzero_pd (),
8392 (__mmask8) __U);
8393}
8394
8395static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8396_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8397{
8398 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8399 (__v8di) __W,
8400 (__mmask8) __U);
8401}
8402
8403static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8405{
8406 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8407 (__v8di) _mm512_setzero_si512 (),
8408 (__mmask8) __U);
8409}
8410
8411static __inline__ __m512d __DEFAULT_FN_ATTRS512
8412_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8413{
8414 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8415 (__v8df) __W,
8416 (__mmask8) __U);
8417}
8418
8419static __inline__ __m512d __DEFAULT_FN_ATTRS512
8421{
8422 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8423 (__v8df) _mm512_setzero_pd(),
8424 (__mmask8) __U);
8425}
8426
8427static __inline__ __m512i __DEFAULT_FN_ATTRS512
8428_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8429{
8430 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8431 (__v8di) __W,
8432 (__mmask8) __U);
8433}
8434
8435static __inline__ __m512i __DEFAULT_FN_ATTRS512
8437{
8438 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8439 (__v8di) _mm512_setzero_si512(),
8440 (__mmask8) __U);
8441}
8442
8443static __inline__ __m512 __DEFAULT_FN_ATTRS512
8444_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8445{
8446 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8447 (__v16sf) __W,
8448 (__mmask16) __U);
8449}
8450
8451static __inline__ __m512 __DEFAULT_FN_ATTRS512
8453{
8454 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8455 (__v16sf) _mm512_setzero_ps(),
8456 (__mmask16) __U);
8457}
8458
8459static __inline__ __m512i __DEFAULT_FN_ATTRS512
8460_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8461{
8462 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8463 (__v16si) __W,
8464 (__mmask16) __U);
8465}
8466
8467static __inline__ __m512i __DEFAULT_FN_ATTRS512
8469{
8470 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8471 (__v16si) _mm512_setzero_si512(),
8472 (__mmask16) __U);
8473}
8474
8475static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8476_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8477{
8478 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8479 (__v16sf) __W,
8480 (__mmask16) __U);
8481}
8482
8483static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8485{
8486 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8487 (__v16sf) _mm512_setzero_ps(),
8488 (__mmask16) __U);
8489}
8490
8491static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8492_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8493{
8494 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8495 (__v16si) __W,
8496 (__mmask16) __U);
8497}
8498
8499static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8501{
8502 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8503 (__v16si) _mm512_setzero_si512(),
8504 (__mmask16) __U);
8505}
8506
8507#define _mm512_cvt_roundps_pd(A, R) \
8508 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8509 (__v8df)_mm512_undefined_pd(), \
8510 (__mmask8)-1, (int)(R)))
8511
8512#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8513 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8514 (__v8df)(__m512d)(W), \
8515 (__mmask8)(U), (int)(R)))
8516
8517#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8518 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8519 (__v8df)_mm512_setzero_pd(), \
8520 (__mmask8)(U), (int)(R)))
8521
8522static __inline__ __m512d
8524 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8525}
8526
8527static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8528_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) {
8529 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8530 (__v8df)_mm512_cvtps_pd(__A),
8531 (__v8df)__W);
8532}
8533
8534static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8536 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8537 (__v8df)_mm512_cvtps_pd(__A),
8538 (__v8df)_mm512_setzero_pd());
8539}
8540
8541static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8543 return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8544}
8545
8546static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8547_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) {
8548 return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8549}
8550
8551static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8552_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) {
8553 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8554 (__v8df)__W);
8555}
8556
8557static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8559 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8560 (__v8df)_mm512_setzero_pd());
8561}
8562
8563static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8564_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8565 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8566 (__v16sf)__W);
8567}
8568
8569static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8571 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8572 (__v16sf)_mm512_setzero_ps());
8573}
8574
8575static __inline__ void __DEFAULT_FN_ATTRS512
8577{
8578 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8579 (__mmask8) __U);
8580}
8581
8582static __inline__ void __DEFAULT_FN_ATTRS512
8584{
8585 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8586 (__mmask8) __U);
8587}
8588
8589static __inline__ void __DEFAULT_FN_ATTRS512
8591{
8592 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
8593 (__mmask16) __U);
8594}
8595
8596static __inline__ void __DEFAULT_FN_ATTRS512
8598{
8599 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
8600 (__mmask16) __U);
8601}
8602
8603#define _mm_cvt_roundsd_ss(A, B, R) \
8604 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8605 (__v2df)(__m128d)(B), \
8606 (__v4sf)_mm_undefined_ps(), \
8607 (__mmask8)-1, (int)(R)))
8608
8609#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
8610 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8611 (__v2df)(__m128d)(B), \
8612 (__v4sf)(__m128)(W), \
8613 (__mmask8)(U), (int)(R)))
8614
8615#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
8616 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8617 (__v2df)(__m128d)(B), \
8618 (__v4sf)_mm_setzero_ps(), \
8619 (__mmask8)(U), (int)(R)))
8620
8621static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8622_mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) {
8623 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8624 (__v2df)__B,
8625 (__v4sf)__W,
8627}
8628
8629static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
8630_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) {
8631 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8632 (__v2df)__B,
8633 (__v4sf)_mm_setzero_ps(),
8635}
8636
8637#define _mm_cvtss_i32 _mm_cvtss_si32
8638#define _mm_cvtsd_i32 _mm_cvtsd_si32
8639#define _mm_cvti32_sd _mm_cvtsi32_sd
8640#define _mm_cvti32_ss _mm_cvtsi32_ss
8641#ifdef __x86_64__
8642#define _mm_cvtss_i64 _mm_cvtss_si64
8643#define _mm_cvtsd_i64 _mm_cvtsd_si64
8644#define _mm_cvti64_sd _mm_cvtsi64_sd
8645#define _mm_cvti64_ss _mm_cvtsi64_ss
8646#endif
8647
8648#ifdef __x86_64__
8649#define _mm_cvt_roundi64_sd(A, B, R) \
8650 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8651 (int)(R)))
8652
8653#define _mm_cvt_roundsi64_sd(A, B, R) \
8654 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
8655 (int)(R)))
8656#endif
8657
8658#define _mm_cvt_roundsi32_ss(A, B, R) \
8659 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8660
8661#define _mm_cvt_roundi32_ss(A, B, R) \
8662 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
8663
8664#ifdef __x86_64__
8665#define _mm_cvt_roundsi64_ss(A, B, R) \
8666 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8667 (int)(R)))
8668
8669#define _mm_cvt_roundi64_ss(A, B, R) \
8670 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
8671 (int)(R)))
8672#endif
8673
8674#define _mm_cvt_roundss_sd(A, B, R) \
8675 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8676 (__v4sf)(__m128)(B), \
8677 (__v2df)_mm_undefined_pd(), \
8678 (__mmask8)-1, (int)(R)))
8679
8680#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
8681 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8682 (__v4sf)(__m128)(B), \
8683 (__v2df)(__m128d)(W), \
8684 (__mmask8)(U), (int)(R)))
8685
8686#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
8687 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
8688 (__v4sf)(__m128)(B), \
8689 (__v2df)_mm_setzero_pd(), \
8690 (__mmask8)(U), (int)(R)))
8691
8692static __inline__ __m128d __DEFAULT_FN_ATTRS128
8693_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
8694{
8695 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8696 (__v4sf)__B,
8697 (__v2df)__W,
8699}
8700
8701static __inline__ __m128d __DEFAULT_FN_ATTRS128
8702_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
8703{
8704 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
8705 (__v4sf)__B,
8706 (__v2df)_mm_setzero_pd(),
8708}
8709
8710static __inline__ __m128d __DEFAULT_FN_ATTRS128
8711_mm_cvtu32_sd (__m128d __A, unsigned __B)
8712{
8713 __A[0] = __B;
8714 return __A;
8715}
8716
8717#ifdef __x86_64__
8718#define _mm_cvt_roundu64_sd(A, B, R) \
8719 ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
8720 (unsigned long long)(B), (int)(R)))
8721
8722static __inline__ __m128d __DEFAULT_FN_ATTRS128
8723_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
8724{
8725 __A[0] = __B;
8726 return __A;
8727}
8728#endif
8729
8730#define _mm_cvt_roundu32_ss(A, B, R) \
8731 ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
8732 (int)(R)))
8733
8734static __inline__ __m128 __DEFAULT_FN_ATTRS128
8735_mm_cvtu32_ss (__m128 __A, unsigned __B)
8736{
8737 __A[0] = __B;
8738 return __A;
8739}
8740
8741#ifdef __x86_64__
8742#define _mm_cvt_roundu64_ss(A, B, R) \
8743 ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
8744 (unsigned long long)(B), (int)(R)))
8745
8746static __inline__ __m128 __DEFAULT_FN_ATTRS128
8747_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
8748{
8749 __A[0] = __B;
8750 return __A;
8751}
8752#endif
8753
8754static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8755_mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A) {
8756 return (__m512i) __builtin_ia32_selectd_512(__M,
8757 (__v16si) _mm512_set1_epi32(__A),
8758 (__v16si) __O);
8759}
8760
8761static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8762_mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A) {
8763 return (__m512i) __builtin_ia32_selectq_512(__M,
8764 (__v8di) _mm512_set1_epi64(__A),
8765 (__v8di) __O);
8766}
8767
8769 char __e63, char __e62, char __e61, char __e60, char __e59, char __e58,
8770 char __e57, char __e56, char __e55, char __e54, char __e53, char __e52,
8771 char __e51, char __e50, char __e49, char __e48, char __e47, char __e46,
8772 char __e45, char __e44, char __e43, char __e42, char __e41, char __e40,
8773 char __e39, char __e38, char __e37, char __e36, char __e35, char __e34,
8774 char __e33, char __e32, char __e31, char __e30, char __e29, char __e28,
8775 char __e27, char __e26, char __e25, char __e24, char __e23, char __e22,
8776 char __e21, char __e20, char __e19, char __e18, char __e17, char __e16,
8777 char __e15, char __e14, char __e13, char __e12, char __e11, char __e10,
8778 char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3,
8779 char __e2, char __e1, char __e0) {
8780
8781 return __extension__ (__m512i)(__v64qi)
8782 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8783 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8784 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8785 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
8786 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
8787 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
8788 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
8789 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
8790}
8791
8793 short __e31, short __e30, short __e29, short __e28, short __e27,
8794 short __e26, short __e25, short __e24, short __e23, short __e22,
8795 short __e21, short __e20, short __e19, short __e18, short __e17,
8796 short __e16, short __e15, short __e14, short __e13, short __e12,
8797 short __e11, short __e10, short __e9, short __e8, short __e7, short __e6,
8798 short __e5, short __e4, short __e3, short __e2, short __e1, short __e0) {
8799 return __extension__ (__m512i)(__v32hi)
8800 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
8801 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
8802 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
8803 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
8804}
8805
8807 int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H,
8808 int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P) {
8809 return __extension__ (__m512i)(__v16si)
8810 { __P, __O, __N, __M, __L, __K, __J, __I,
8811 __H, __G, __F, __E, __D, __C, __B, __A };
8812}
8813
8815 int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8,
8816 int e9, int e10, int e11, int e12, int e13, int e14, int e15) {
8817 return _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4,
8818 e3, e2, e1, e0);
8819}
8820
8821static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8822_mm512_set_epi64(long long __A, long long __B, long long __C, long long __D,
8823 long long __E, long long __F, long long __G, long long __H) {
8824 return __extension__ (__m512i) (__v8di)
8825 { __H, __G, __F, __E, __D, __C, __B, __A };
8826}
8827
8828static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
8829_mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3,
8830 long long e4, long long e5, long long e6, long long e7) {
8831 return _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0);
8832}
8833
8834static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8835_mm512_set_pd(double __A, double __B, double __C, double __D, double __E,
8836 double __F, double __G, double __H) {
8837 return __extension__ (__m512d)
8838 { __H, __G, __F, __E, __D, __C, __B, __A };
8839}
8840
8841static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8842_mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5,
8843 double e6, double e7) {
8844 return _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0);
8845}
8846
8847static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8848_mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F,
8849 float __G, float __H, float __I, float __J, float __K, float __L,
8850 float __M, float __N, float __O, float __P) {
8851 return __extension__ (__m512)
8852 { __P, __O, __N, __M, __L, __K, __J, __I,
8853 __H, __G, __F, __E, __D, __C, __B, __A };
8854}
8855
8856static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8857_mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5,
8858 float e6, float e7, float e8, float e9, float e10, float e11,
8859 float e12, float e13, float e14, float e15) {
8860 return _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
8861 e2, e1, e0);
8862}
8863
8864static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8865_mm512_abs_ps(__m512 __A) {
8866 return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
8867}
8868
8869static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8870_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) {
8871 return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
8872}
8873
8874static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8875_mm512_abs_pd(__m512d __A) {
8876 return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
8877}
8878
8879static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8880_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) {
8881 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
8882}
8883
8884/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
8885 * outputs. This class of vector operation forms the basis of many scientific
8886 * computations. In vector-reduction arithmetic, the evaluation order is
8887 * independent of the order of the input elements of V.
8888
8889 * For floating-point intrinsics:
8890 * 1. When using fadd/fmul intrinsics, the order of operations within the
8891 * vector is unspecified (associative math).
8892 * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector
8893 * produce unspecified results.
8894
8895 * Used bisection method. At each step, we partition the vector with previous
8896 * step in half, and the operation is performed on its two halves.
8897 * This takes log2(n) steps where n is the number of elements in the vector.
8898 */
8899
8900static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8902 return __builtin_reduce_add((__v8di)__W);
8903}
8904
8905static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8907 return __builtin_reduce_mul((__v8di)__W);
8908}
8909
8910static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8912 return __builtin_reduce_and((__v8di)__W);
8913}
8914
8915static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
8917 return __builtin_reduce_or((__v8di)__W);
8918}
8919
8920static __inline__ long long __DEFAULT_FN_ATTRS512
8922 __W = _mm512_maskz_mov_epi64(__M, __W);
8923 return __builtin_reduce_add((__v8di)__W);
8924}
8925
8926static __inline__ long long __DEFAULT_FN_ATTRS512
8928 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
8929 return __builtin_reduce_mul((__v8di)__W);
8930}
8931
8932static __inline__ long long __DEFAULT_FN_ATTRS512
8934 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __W);
8935 return __builtin_reduce_and((__v8di)__W);
8936}
8937
8938static __inline__ long long __DEFAULT_FN_ATTRS512
8940 __W = _mm512_maskz_mov_epi64(__M, __W);
8941 return __builtin_reduce_or((__v8di)__W);
8942}
8943
8944// -0.0 is used to ignore the start value since it is the neutral value of
8945// floating point addition. For more information, please refer to
8946// https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic
8947static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
8948 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
8949}
8950
8951static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
8952 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
8953}
8954
8955static __inline__ double __DEFAULT_FN_ATTRS512
8957 __W = _mm512_maskz_mov_pd(__M, __W);
8958 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
8959}
8960
8961static __inline__ double __DEFAULT_FN_ATTRS512
8963 __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
8964 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
8965}
8966
8967static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
8969 return __builtin_reduce_add((__v16si)__W);
8970}
8971
8972static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
8974 return __builtin_reduce_mul((__v16si)__W);
8975}
8976
8977static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
8979 return __builtin_reduce_and((__v16si)__W);
8980}
8981
8982static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
8984 return __builtin_reduce_or((__v16si)__W);
8985}
8986
8987static __inline__ int __DEFAULT_FN_ATTRS512
8989 __W = _mm512_maskz_mov_epi32(__M, __W);
8990 return __builtin_reduce_add((__v16si)__W);
8991}
8992
8993static __inline__ int __DEFAULT_FN_ATTRS512
8995 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
8996 return __builtin_reduce_mul((__v16si)__W);
8997}
8998
8999static __inline__ int __DEFAULT_FN_ATTRS512
9001 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __W);
9002 return __builtin_reduce_and((__v16si)__W);
9003}
9004
9005static __inline__ int __DEFAULT_FN_ATTRS512
9007 __W = _mm512_maskz_mov_epi32(__M, __W);
9008 return __builtin_reduce_or((__v16si)__W);
9009}
9010
9011static __inline__ float __DEFAULT_FN_ATTRS512
9013 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9014}
9015
9016static __inline__ float __DEFAULT_FN_ATTRS512
9018 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9019}
9020
9021static __inline__ float __DEFAULT_FN_ATTRS512
9023 __W = _mm512_maskz_mov_ps(__M, __W);
9024 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9025}
9026
9027static __inline__ float __DEFAULT_FN_ATTRS512
9029 __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9030 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9031}
9032
9033static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9035 return __builtin_reduce_max((__v8di)__V);
9036}
9037
9038static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9040 return __builtin_reduce_max((__v8du)__V);
9041}
9042
9043static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9045 return __builtin_reduce_min((__v8di)__V);
9046}
9047
9048static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9050 return __builtin_reduce_min((__v8du)__V);
9051}
9052
9053static __inline__ long long __DEFAULT_FN_ATTRS512
9055 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9056 return __builtin_reduce_max((__v8di)__V);
9057}
9058
9059static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9061 __V = _mm512_maskz_mov_epi64(__M, __V);
9062 return __builtin_reduce_max((__v8du)__V);
9063}
9064
9065static __inline__ long long __DEFAULT_FN_ATTRS512
9067 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9068 return __builtin_reduce_min((__v8di)__V);
9069}
9070
9071static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9073 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V);
9074 return __builtin_reduce_min((__v8du)__V);
9075}
9076static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9078 return __builtin_reduce_max((__v16si)__V);
9079}
9080
9081static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9083 return __builtin_reduce_max((__v16su)__V);
9084}
9085
9086static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9088 return __builtin_reduce_min((__v16si)__V);
9089}
9090
9091static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9093 return __builtin_reduce_min((__v16su)__V);
9094}
9095
9096static __inline__ int __DEFAULT_FN_ATTRS512
9098 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9099 return __builtin_reduce_max((__v16si)__V);
9100}
9101
9102static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9104 __V = _mm512_maskz_mov_epi32(__M, __V);
9105 return __builtin_reduce_max((__v16su)__V);
9106}
9107
9108static __inline__ int __DEFAULT_FN_ATTRS512
9110 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9111 return __builtin_reduce_min((__v16si)__V);
9112}
9113
9114static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9116 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __V);
9117 return __builtin_reduce_min((__v16su)__V);
9118}
9119
9120static __inline__ double __DEFAULT_FN_ATTRS512
9122 return __builtin_ia32_reduce_fmax_pd512(__V);
9123}
9124
9125static __inline__ double __DEFAULT_FN_ATTRS512
9127 return __builtin_ia32_reduce_fmin_pd512(__V);
9128}
9129
9130static __inline__ double __DEFAULT_FN_ATTRS512
9132 __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9133 return __builtin_ia32_reduce_fmax_pd512(__V);
9134}
9135
9136static __inline__ double __DEFAULT_FN_ATTRS512
9138 __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9139 return __builtin_ia32_reduce_fmin_pd512(__V);
9140}
9141
9142static __inline__ float __DEFAULT_FN_ATTRS512
9144 return __builtin_ia32_reduce_fmax_ps512(__V);
9145}
9146
9147static __inline__ float __DEFAULT_FN_ATTRS512
9149 return __builtin_ia32_reduce_fmin_ps512(__V);
9150}
9151
9152static __inline__ float __DEFAULT_FN_ATTRS512
9154 __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9155 return __builtin_ia32_reduce_fmax_ps512(__V);
9156}
9157
9158static __inline__ float __DEFAULT_FN_ATTRS512
9160 __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9161 return __builtin_ia32_reduce_fmin_ps512(__V);
9162}
9163
9164/// Moves the least significant 32 bits of a vector of [16 x i32] to a
9165/// 32-bit signed integer value.
9166///
9167/// \headerfile <x86intrin.h>
9168///
9169/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
9170///
9171/// \param __A
9172/// A vector of [16 x i32]. The least significant 32 bits are moved to the
9173/// destination.
9174/// \returns A 32-bit signed integer containing the moved value.
9175static __inline__ int __DEFAULT_FN_ATTRS512
9177 __v16si __b = (__v16si)__A;
9178 return __b[0];
9179}
9180
9181/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
9182/// locations starting at location \a base_addr at packed 32-bit integer indices
9183/// stored in the lower half of \a vindex scaled by \a scale them in dst.
9184///
9185/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9186///
9187/// \code{.operation}
9188/// FOR j := 0 to 7
9189/// i := j*64
9190/// m := j*32
9191/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9192/// dst[i+63:i] := MEM[addr+63:addr]
9193/// ENDFOR
9194/// dst[MAX:512] := 0
9195/// \endcode
9196#define _mm512_i32logather_pd(vindex, base_addr, scale) \
9197 _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9198
9199/// Loads 8 double-precision (64-bit) floating-point elements from memory
9200/// starting at location \a base_addr at packed 32-bit integer indices stored in
9201/// the lower half of \a vindex scaled by \a scale into dst using writemask
9202/// \a mask (elements are copied from \a src when the corresponding mask bit is
9203/// not set).
9204///
9205/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9206///
9207/// \code{.operation}
9208/// FOR j := 0 to 7
9209/// i := j*64
9210/// m := j*32
9211/// IF mask[j]
9212/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9213/// dst[i+63:i] := MEM[addr+63:addr]
9214/// ELSE
9215/// dst[i+63:i] := src[i+63:i]
9216/// FI
9217/// ENDFOR
9218/// dst[MAX:512] := 0
9219/// \endcode
9220#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \
9221 _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \
9222 (base_addr), (scale))
9223
9224/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9225/// at packed 32-bit integer indices stored in the lower half of \a vindex
9226/// scaled by \a scale and stores them in dst.
9227///
9228/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9229///
9230/// \code{.operation}
9231/// FOR j := 0 to 7
9232/// i := j*64
9233/// m := j*32
9234/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9235/// dst[i+63:i] := MEM[addr+63:addr]
9236/// ENDFOR
9237/// dst[MAX:512] := 0
9238/// \endcode
9239#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
9240 _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9241
9242/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9243/// at packed 32-bit integer indices stored in the lower half of \a vindex
9244/// scaled by \a scale and stores them in dst using writemask \a mask (elements
9245/// are copied from \a src when the corresponding mask bit is not set).
9246///
9247/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9248///
9249/// \code{.operation}
9250/// FOR j := 0 to 7
9251/// i := j*64
9252/// m := j*32
9253/// IF mask[j]
9254/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9255/// dst[i+63:i] := MEM[addr+63:addr]
9256/// ELSE
9257/// dst[i+63:i] := src[i+63:i]
9258/// FI
9259/// ENDFOR
9260/// dst[MAX:512] := 0
9261/// \endcode
9262#define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \
9263 _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \
9264 (base_addr), (scale))
9265
9266/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9267/// and to memory locations starting at location \a base_addr at packed 32-bit
9268/// integer indices stored in \a vindex scaled by \a scale.
9269///
9270/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9271///
9272/// \code{.operation}
9273/// FOR j := 0 to 7
9274/// i := j*64
9275/// m := j*32
9276/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9277/// MEM[addr+63:addr] := v1[i+63:i]
9278/// ENDFOR
9279/// \endcode
9280#define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \
9281 _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9282
9283/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9284/// to memory locations starting at location \a base_addr at packed 32-bit
9285/// integer indices stored in \a vindex scaled by \a scale. Only those elements
9286/// whose corresponding mask bit is set in writemask \a mask are written to
9287/// memory.
9288///
9289/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9290///
9291/// \code{.operation}
9292/// FOR j := 0 to 7
9293/// i := j*64
9294/// m := j*32
9295/// IF mask[j]
9296/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9297/// MEM[addr+63:addr] := a[i+63:i]
9298/// FI
9299/// ENDFOR
9300/// \endcode
9301#define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \
9302 _mm512_mask_i32scatter_pd((base_addr), (mask), \
9303 _mm512_castsi512_si256(vindex), (v1), (scale))
9304
9305/// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in
9306/// memory locations starting at location \a base_addr at packed 32-bit integer
9307/// indices stored in \a vindex scaled by \a scale.
9308///
9309/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9310///
9311/// \code{.operation}
9312/// FOR j := 0 to 7
9313/// i := j*64
9314/// m := j*32
9315/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9316/// MEM[addr+63:addr] := a[i+63:i]
9317/// ENDFOR
9318/// \endcode
9319#define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \
9320 _mm512_i32scatter_epi64((base_addr), \
9321 _mm512_castsi512_si256(vindex), (v1), (scale))
9322
9323/// Stores 8 packed 64-bit integer elements located in a and stores them in
9324/// memory locations starting at location \a base_addr at packed 32-bit integer
9325/// indices stored in \a vindex scaled by scale using writemask \a mask (elements
9326/// whose corresponding mask bit is not set are not written to memory).
9327///
9328/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9329///
9330/// \code{.operation}
9331/// FOR j := 0 to 7
9332/// i := j*64
9333/// m := j*32
9334/// IF mask[j]
9335/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9336/// MEM[addr+63:addr] := a[i+63:i]
9337/// FI
9338/// ENDFOR
9339/// \endcode
9340#define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \
9341 _mm512_mask_i32scatter_epi64((base_addr), (mask), \
9342 _mm512_castsi512_si256(vindex), (v1), (scale))
9343
9344#undef __DEFAULT_FN_ATTRS512
9345#undef __DEFAULT_FN_ATTRS128
9346#undef __DEFAULT_FN_ATTRS
9347#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
9348#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
9349#undef __DEFAULT_FN_ATTRS_CONSTEXPR
9350
9351#endif /* __AVX512FINTRIN_H */
#define __L(__X)
#define __DEFAULT_FN_ATTRS
static __inline__ vector float vector float __b
Definition altivec.h:578
static __inline__ uint32_t volatile uint32_t * __p
Definition arm_acle.h:57
return __v
Definition arm_acle.h:88
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
Definition avx2intrin.h:30
#define __DEFAULT_FN_ATTRS512_CONSTEXPR
#define __DEFAULT_FN_ATTRS512
#define __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR _cvtmask16_u32(__mmask16 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _cvtu32_mask16(unsigned int __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi64(long long __d)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi64(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f32x4(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd_si512(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi64(__m512i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A)
Moves the least significant 32 bits of a vector of [16 x i32] to a 32-bit signed integer value.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_ps(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi64(__m512i __W)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(void *__P, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi32(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi32(int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8, int e9, int e10, int e11, int e12, int e13, int e14, int e15)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi64(__m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_si256(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movehdup_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5, float e6, float e7, float e8, float e9, float e10, float e11, float e12, float e13, float e14, float e15)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_pd(double __w)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
_MM_MANTISSA_NORM_ENUM
@ _MM_MANT_NORM_p5_1
@ _MM_MANT_NORM_p5_2
@ _MM_MANT_NORM_1_2
@ _MM_MANT_NORM_p75_1p5
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline __m128 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps512_ps128(__m512 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i64x4(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5, double e6, double e7)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps512_ps256(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi8(char __w)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3, long long e4, long long e5, long long e6, long long e7)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline __m128i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_si128(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_int2mask(int __a)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_mask2int(__mmask16 __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m256d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd512_pd256(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_pd(__m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_test_epi64_mask(__m512i __A, __m512i __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi64(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movedup_pd(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_pd(__m512d __a, __m512d __b)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtps_pd(__m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
_MM_CMPINT_ENUM
@ _MM_CMPINT_NE
@ _MM_CMPINT_NLT
@ _MM_CMPINT_LE
@ _MM_CMPINT_EQ
@ _MM_CMPINT_LT
@ _MM_CMPINT_UNUSED
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_knot(__mmask16 __M)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastq_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_pd(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
unsigned char __mmask8
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
_MM_TERNLOG_ENUM
A helper to represent the ternary logic operations among vector A, B and C.
@ _MM_TERNLOG_A
@ _MM_TERNLOG_B
@ _MM_TERNLOG_C
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi32(__m512i __W)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(void *__P, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_ps(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
_MM_MANTISSA_SIGN_ENUM
@ _MM_MANT_SIGN_zero
@ _MM_MANT_SIGN_src
@ _MM_MANT_SIGN_nan
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_ps(__m512 __a, __m512 __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi32(int e0, int e1, int e2, int e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_ps(float __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi32(__m512i __A, __m512i __B)
_MM_PERM_ENUM
@ _MM_PERM_BBCA
@ _MM_PERM_BCBD
@ _MM_PERM_DAAB
@ _MM_PERM_CBBD
@ _MM_PERM_DCCC
@ _MM_PERM_CDBB
@ _MM_PERM_DDDD
@ _MM_PERM_CCCC
@ _MM_PERM_CADA
@ _MM_PERM_BACD
@ _MM_PERM_CCAD
@ _MM_PERM_ABDB
@ _MM_PERM_BBBD
@ _MM_PERM_DCAB
@ _MM_PERM_BABC
@ _MM_PERM_AACD
@ _MM_PERM_BBAB
@ _MM_PERM_DCDB
@ _MM_PERM_BACC
@ _MM_PERM_ABDA
@ _MM_PERM_ACCC
@ _MM_PERM_ADAC
@ _MM_PERM_CCCD
@ _MM_PERM_CADD
@ _MM_PERM_ACCB
@ _MM_PERM_BBDB
@ _MM_PERM_ABBB
@ _MM_PERM_BACB
@ _MM_PERM_CDCA
@ _MM_PERM_ACBC
@ _MM_PERM_ADCB
@ _MM_PERM_BBBC
@ _MM_PERM_DBBA
@ _MM_PERM_BBBB
@ _MM_PERM_DDDB
@ _MM_PERM_CAAA
@ _MM_PERM_ADBB
@ _MM_PERM_ACDB
@ _MM_PERM_DCAD
@ _MM_PERM_DBBC
@ _MM_PERM_BAAB
@ _MM_PERM_BDDD
@ _MM_PERM_BBAD
@ _MM_PERM_DDBA
@ _MM_PERM_CDCD
@ _MM_PERM_CCCA
@ _MM_PERM_DBBB
@ _MM_PERM_DAAD
@ _MM_PERM_DCBA
@ _MM_PERM_CCBC
@ _MM_PERM_ADDD
@ _MM_PERM_DBAC
@ _MM_PERM_ABAB
@ _MM_PERM_CBDB
@ _MM_PERM_CDBC
@ _MM_PERM_AABC
@ _MM_PERM_DABD
@ _MM_PERM_CBBA
@ _MM_PERM_CBAA
@ _MM_PERM_BDDB
@ _MM_PERM_CABC
@ _MM_PERM_BDBD
@ _MM_PERM_BCAD
@ _MM_PERM_ACBA
@ _MM_PERM_ADBA
@ _MM_PERM_ADBC
@ _MM_PERM_DBCB
@ _MM_PERM_CBDC
@ _MM_PERM_CBAD
@ _MM_PERM_ABCC
@ _MM_PERM_AAAD
@ _MM_PERM_CBAC
@ _MM_PERM_CCDA
@ _MM_PERM_CDAC
@ _MM_PERM_BADD
@ _MM_PERM_DAAC
@ _MM_PERM_BCCC
@ _MM_PERM_DBBD
@ _MM_PERM_DDAC
@ _MM_PERM_DACD
@ _MM_PERM_BAAC
@ _MM_PERM_ACCA
@ _MM_PERM_ABDD
@ _MM_PERM_BBCC
@ _MM_PERM_DAAA
@ _MM_PERM_CAAB
@ _MM_PERM_BCDB
@ _MM_PERM_ACBB
@ _MM_PERM_CDAB
@ _MM_PERM_DBDB
@ _MM_PERM_AABB
@ _MM_PERM_DBDA
@ _MM_PERM_BCBA
@ _MM_PERM_CBAB
@ _MM_PERM_DCDC
@ _MM_PERM_BBCB
@ _MM_PERM_CDCB
@ _MM_PERM_AACA
@ _MM_PERM_ACBD
@ _MM_PERM_AAAB
@ _MM_PERM_DCCB
@ _MM_PERM_ADDB
@ _MM_PERM_AAAA
@ _MM_PERM_AACC
@ _MM_PERM_BDDC
@ _MM_PERM_CBBC
@ _MM_PERM_DDCC
@ _MM_PERM_CABD
@ _MM_PERM_AADC
@ _MM_PERM_BCBC
@ _MM_PERM_BCCA
@ _MM_PERM_CCBD
@ _MM_PERM_CBBB
@ _MM_PERM_CDBA
@ _MM_PERM_CACD
@ _MM_PERM_BDAD
@ _MM_PERM_ADCA
@ _MM_PERM_AAAC
@ _MM_PERM_BDDA
@ _MM_PERM_CCAC
@ _MM_PERM_ACDC
@ _MM_PERM_DBCA
@ _MM_PERM_DBAA
@ _MM_PERM_AABD
@ _MM_PERM_CDCC
@ _MM_PERM_DCAA
@ _MM_PERM_DDDC
@ _MM_PERM_CDDB
@ _MM_PERM_AABA
@ _MM_PERM_DDBB
@ _MM_PERM_CDDA
@ _MM_PERM_AADD
@ _MM_PERM_BADC
@ _MM_PERM_BDBA
@ _MM_PERM_DBDD
@ _MM_PERM_BDAC
@ _MM_PERM_DBDC
@ _MM_PERM_BBBA
@ _MM_PERM_DDBC
@ _MM_PERM_BAAA
@ _MM_PERM_BDCC
@ _MM_PERM_DDAB
@ _MM_PERM_BCCB
@ _MM_PERM_BCCD
@ _MM_PERM_ADBD
@ _MM_PERM_ADCC
@ _MM_PERM_CCBB
@ _MM_PERM_CDAA
@ _MM_PERM_BBDA
@ _MM_PERM_CACC
@ _MM_PERM_DCBB
@ _MM_PERM_DABA
@ _MM_PERM_BADB
@ _MM_PERM_ABCA
@ _MM_PERM_CBCC
@ _MM_PERM_ABAD
@ _MM_PERM_BDBC
@ _MM_PERM_DDDA
@ _MM_PERM_ADAB
@ _MM_PERM_CADB
@ _MM_PERM_ADAA
@ _MM_PERM_ACAC
@ _MM_PERM_DADD
@ _MM_PERM_BABD
@ _MM_PERM_ACCD
@ _MM_PERM_CCAA
@ _MM_PERM_AADA
@ _MM_PERM_BDCA
@ _MM_PERM_CDDD
@ _MM_PERM_ABBD
@ _MM_PERM_ACAA
@ _MM_PERM_ACDD
@ _MM_PERM_DABB
@ _MM_PERM_CCCB
@ _MM_PERM_AADB
@ _MM_PERM_DBAD
@ _MM_PERM_BBDD
@ _MM_PERM_BCDC
@ _MM_PERM_CABA
@ _MM_PERM_BBAA
@ _MM_PERM_ADAD
@ _MM_PERM_BADA
@ _MM_PERM_DCDA
@ _MM_PERM_ABBA
@ _MM_PERM_ACAB
@ _MM_PERM_CCDD
@ _MM_PERM_CADC
@ _MM_PERM_DDCB
@ _MM_PERM_BABB
@ _MM_PERM_CCDB
@ _MM_PERM_DDAD
@ _MM_PERM_DBCC
@ _MM_PERM_BCBB
@ _MM_PERM_ADDC
@ _MM_PERM_CCBA
@ _MM_PERM_ABCD
@ _MM_PERM_BCAB
@ _MM_PERM_DCBC
@ _MM_PERM_BCDD
@ _MM_PERM_CCDC
@ _MM_PERM_ABAC
@ _MM_PERM_CBCB
@ _MM_PERM_CCAB
@ _MM_PERM_DDCD
@ _MM_PERM_DACA
@ _MM_PERM_ACAD
@ _MM_PERM_BABA
@ _MM_PERM_CBCD
@ _MM_PERM_CAAD
@ _MM_PERM_DCDD
@ _MM_PERM_BDBB
@ _MM_PERM_BCAA
@ _MM_PERM_ABDC
@ _MM_PERM_BBCD
@ _MM_PERM_CAAC
@ _MM_PERM_BBAC
@ _MM_PERM_CBCA
@ _MM_PERM_DCAC
@ _MM_PERM_ABAA
@ _MM_PERM_CACB
@ _MM_PERM_BBDC
@ _MM_PERM_CDAD
@ _MM_PERM_ADCD
@ _MM_PERM_DADB
@ _MM_PERM_DBCD
@ _MM_PERM_DACC
@ _MM_PERM_DACB
@ _MM_PERM_DCBD
@ _MM_PERM_CACA
@ _MM_PERM_ABBC
@ _MM_PERM_DCCA
@ _MM_PERM_DABC
@ _MM_PERM_CBDD
@ _MM_PERM_DDBD
@ _MM_PERM_DDCA
@ _MM_PERM_BDCD
@ _MM_PERM_CDBD
@ _MM_PERM_ABCB
@ _MM_PERM_CDDC
@ _MM_PERM_AACB
@ _MM_PERM_DDAA
@ _MM_PERM_ADDA
@ _MM_PERM_DADA
@ _MM_PERM_BCDA
@ _MM_PERM_BDAB
@ _MM_PERM_BAAD
@ _MM_PERM_DBAB
@ _MM_PERM_DCCD
@ _MM_PERM_CABB
@ _MM_PERM_BDAA
@ _MM_PERM_BDCB
@ _MM_PERM_ACDA
@ _MM_PERM_DADC
@ _MM_PERM_CBDA
@ _MM_PERM_BCAC
@ _MM_PERM_BACA
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi32(int __s)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_pd(double e0, double e1, double e2, double e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastss_ps(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m128d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd512_pd128(__m512d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
unsigned short __mmask16
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kmov(__mmask16 __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_si512(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps_si512(__m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castsi512_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr4_ps(float e0, float e1, float e2, float e3)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi16(short __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_moveldup_ps(__m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd_ps(__m512d __A)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition avxintrin.h:3616
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition avxintrin.h:4287
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition avxintrin.h:4275
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4299
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
Definition emmintrin.h:120
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
Definition emmintrin.h:199
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
Definition emmintrin.h:80
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3878
static __inline__ void int __a
Definition emmintrin.h:4077
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3493
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
Definition emmintrin.h:1887
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition emmintrin.h:1867
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
Definition emmintrin.h:159
static __inline__ void short __D
Definition immintrin.h:342
static __inline__ void const void * __src
__inline unsigned int unsigned int unsigned int * __P
Definition bmi2intrin.h:25
__inline unsigned int unsigned int __Y
Definition bmi2intrin.h:19
#define _MM_FROUND_FLOOR
Definition smmintrin.h:41
#define _MM_FROUND_CEIL
Definition smmintrin.h:42
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
Definition xmmintrin.h:2797
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:160
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
Definition xmmintrin.h:119
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:79
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:2014
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
Definition xmmintrin.h:200