clang 22.0.0git
avx512fintrin.h
Go to the documentation of this file.
1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
15
16typedef char __v64qi __attribute__((__vector_size__(64)));
17typedef short __v32hi __attribute__((__vector_size__(64)));
18typedef double __v8df __attribute__((__vector_size__(64)));
19typedef float __v16sf __attribute__((__vector_size__(64)));
20typedef long long __v8di __attribute__((__vector_size__(64)));
21typedef int __v16si __attribute__((__vector_size__(64)));
22
23/* Unsigned types */
24typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28
29/* We need an explicitly signed variant for char. Note that this shouldn't
30 * appear in the interface though. */
31typedef signed char __v64qs __attribute__((__vector_size__(64)));
32
33typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
34typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
35typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
36
37typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
38typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
39typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
40
41typedef unsigned char __mmask8;
42typedef unsigned short __mmask16;
43
44/* Rounding mode macros. */
45#define _MM_FROUND_TO_NEAREST_INT 0x00
46#define _MM_FROUND_TO_NEG_INF 0x01
47#define _MM_FROUND_TO_POS_INF 0x02
48#define _MM_FROUND_TO_ZERO 0x03
49#define _MM_FROUND_CUR_DIRECTION 0x04
50
51/* Constants for integer comparison predicates */
52typedef enum {
53 _MM_CMPINT_EQ, /* Equal */
54 _MM_CMPINT_LT, /* Less than */
55 _MM_CMPINT_LE, /* Less than or Equal */
57 _MM_CMPINT_NE, /* Not Equal */
58 _MM_CMPINT_NLT, /* Not Less than */
59#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
60 _MM_CMPINT_NLE /* Not Less than or Equal */
61#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
63
64typedef enum
65{
151 _MM_PERM_DDDD = 0xFF
153
154typedef enum
155{
156 _MM_MANT_NORM_1_2, /* interval [1, 2) */
157 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
158 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
159 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
161
162typedef enum
163{
164 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
165 _MM_MANT_SIGN_zero, /* sign = 0 */
166 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
168
169/* Define the default attributes for the functions in this file. */
170#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512)))
171#define __DEFAULT_FN_ATTRS128 \
172 __attribute__((__always_inline__, __nodebug__, \
173 __target__("avx512f,no-evex512"), __min_vector_width__(128)))
174#define __DEFAULT_FN_ATTRS \
175 __attribute__((__always_inline__, __nodebug__, \
176 __target__("avx512f,no-evex512")))
177
178#if defined(__cplusplus) && (__cplusplus >= 201103L)
179#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
180#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
181#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
182#else
183#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
184#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
185#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
186#endif
187
188/* Create vectors with repeated elements */
189
190static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
192 return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0};
193}
194
195#define _mm512_setzero_epi32 _mm512_setzero_si512
196
197static __inline__ __m512d __DEFAULT_FN_ATTRS512
199{
200 return (__m512d)__builtin_ia32_undef512();
201}
202
203static __inline__ __m512 __DEFAULT_FN_ATTRS512
205{
206 return (__m512)__builtin_ia32_undef512();
207}
208
209static __inline__ __m512 __DEFAULT_FN_ATTRS512
211{
212 return (__m512)__builtin_ia32_undef512();
213}
214
215static __inline__ __m512i __DEFAULT_FN_ATTRS512
217{
218 return (__m512i)__builtin_ia32_undef512();
219}
220
221static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
223 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
225}
226
227static __inline__ __m512i __DEFAULT_FN_ATTRS512
228_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
229{
230 return (__m512i)__builtin_ia32_selectd_512(__M,
231 (__v16si) _mm512_broadcastd_epi32(__A),
232 (__v16si) __O);
233}
234
235static __inline__ __m512i __DEFAULT_FN_ATTRS512
237{
238 return (__m512i)__builtin_ia32_selectd_512(__M,
239 (__v16si) _mm512_broadcastd_epi32(__A),
240 (__v16si) _mm512_setzero_si512());
241}
242
243static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
245 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
246 0, 0, 0, 0, 0, 0, 0, 0);
247}
248
249static __inline__ __m512i __DEFAULT_FN_ATTRS512
250_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
251{
252 return (__m512i)__builtin_ia32_selectq_512(__M,
253 (__v8di) _mm512_broadcastq_epi64(__A),
254 (__v8di) __O);
255
256}
257
258static __inline__ __m512i __DEFAULT_FN_ATTRS512
260{
261 return (__m512i)__builtin_ia32_selectq_512(__M,
262 (__v8di) _mm512_broadcastq_epi64(__A),
263 (__v8di) _mm512_setzero_si512());
264}
265
267 return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
268 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
269}
270
271#define _mm512_setzero _mm512_setzero_ps
272
273static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
275 return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
276}
277
278static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
280{
281 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
282 __w, __w, __w, __w, __w, __w, __w, __w };
283}
284
285static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
286_mm512_set1_pd(double __w)
287{
288 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
289}
290
291static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
293{
294 return __extension__ (__m512i)(__v64qi){
295 __w, __w, __w, __w, __w, __w, __w, __w,
296 __w, __w, __w, __w, __w, __w, __w, __w,
297 __w, __w, __w, __w, __w, __w, __w, __w,
298 __w, __w, __w, __w, __w, __w, __w, __w,
299 __w, __w, __w, __w, __w, __w, __w, __w,
300 __w, __w, __w, __w, __w, __w, __w, __w,
301 __w, __w, __w, __w, __w, __w, __w, __w,
302 __w, __w, __w, __w, __w, __w, __w, __w };
303}
304
305static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
307{
308 return __extension__ (__m512i)(__v32hi){
309 __w, __w, __w, __w, __w, __w, __w, __w,
310 __w, __w, __w, __w, __w, __w, __w, __w,
311 __w, __w, __w, __w, __w, __w, __w, __w,
312 __w, __w, __w, __w, __w, __w, __w, __w };
313}
314
315static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
317{
318 return __extension__ (__m512i)(__v16si){
319 __s, __s, __s, __s, __s, __s, __s, __s,
320 __s, __s, __s, __s, __s, __s, __s, __s };
321}
322
323static __inline __m512i __DEFAULT_FN_ATTRS512
325{
326 return (__m512i)__builtin_ia32_selectd_512(__M,
327 (__v16si)_mm512_set1_epi32(__A),
328 (__v16si)_mm512_setzero_si512());
329}
330
331static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
332_mm512_set1_epi64(long long __d)
333{
334 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
335}
336
337static __inline __m512i __DEFAULT_FN_ATTRS512
339{
340 return (__m512i)__builtin_ia32_selectq_512(__M,
341 (__v8di)_mm512_set1_epi64(__A),
342 (__v8di)_mm512_setzero_si512());
343}
344
345static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
347 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
349}
350
351static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
352_mm512_set4_epi32(int __A, int __B, int __C, int __D) {
353 return __extension__ (__m512i)(__v16si)
354 { __D, __C, __B, __A, __D, __C, __B, __A,
355 __D, __C, __B, __A, __D, __C, __B, __A };
356}
357
358static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
359_mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D) {
360 return __extension__ (__m512i) (__v8di)
361 { __D, __C, __B, __A, __D, __C, __B, __A };
362}
363
364static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
365_mm512_set4_pd(double __A, double __B, double __C, double __D) {
366 return __extension__ (__m512d)
367 { __D, __C, __B, __A, __D, __C, __B, __A };
368}
369
370static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
371_mm512_set4_ps(float __A, float __B, float __C, float __D) {
372 return __extension__ (__m512)
373 { __D, __C, __B, __A, __D, __C, __B, __A,
374 __D, __C, __B, __A, __D, __C, __B, __A };
375}
376
377#define _mm512_setr4_epi32(e0,e1,e2,e3) \
378 _mm512_set4_epi32((e3),(e2),(e1),(e0))
379
380#define _mm512_setr4_epi64(e0,e1,e2,e3) \
381 _mm512_set4_epi64((e3),(e2),(e1),(e0))
382
383#define _mm512_setr4_pd(e0,e1,e2,e3) \
384 _mm512_set4_pd((e3),(e2),(e1),(e0))
385
386#define _mm512_setr4_ps(e0,e1,e2,e3) \
387 _mm512_set4_ps((e3),(e2),(e1),(e0))
388
389static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
391 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
392 0, 0, 0, 0, 0, 0, 0, 0);
393}
394
395/* Cast between vector types */
396
397static __inline __m512d __DEFAULT_FN_ATTRS512
399{
400 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
401 1, 2, 3, 4, 5, 6, 7);
402}
403
404static __inline __m512 __DEFAULT_FN_ATTRS512
406{
407 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
408 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
409}
410
411static __inline __m128d __DEFAULT_FN_ATTRS512
413{
414 return __builtin_shufflevector(__a, __a, 0, 1);
415}
416
417static __inline __m256d __DEFAULT_FN_ATTRS512
419{
420 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
421}
422
423static __inline __m128 __DEFAULT_FN_ATTRS512
425{
426 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
427}
428
429static __inline __m256 __DEFAULT_FN_ATTRS512
431{
432 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
433}
434
435static __inline __m512 __DEFAULT_FN_ATTRS512
436_mm512_castpd_ps (__m512d __A)
437{
438 return (__m512) (__A);
439}
440
441static __inline __m512i __DEFAULT_FN_ATTRS512
443{
444 return (__m512i) (__A);
445}
446
447static __inline__ __m512d __DEFAULT_FN_ATTRS512
449{
450 __m256d __B = __builtin_nondeterministic_value(__B);
451 return __builtin_shufflevector(
452 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
453 __B, 0, 1, 2, 3, 4, 5, 6, 7);
454}
455
456static __inline __m512d __DEFAULT_FN_ATTRS512
458{
459 return (__m512d) (__A);
460}
461
462static __inline __m512i __DEFAULT_FN_ATTRS512
464{
465 return (__m512i) (__A);
466}
467
468static __inline__ __m512 __DEFAULT_FN_ATTRS512
470{
471 __m256 __B = __builtin_nondeterministic_value(__B);
472 return __builtin_shufflevector(
473 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
474 __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
475}
476
477static __inline__ __m512i __DEFAULT_FN_ATTRS512
479{
480 __m256i __B = __builtin_nondeterministic_value(__B);
481 return __builtin_shufflevector(
482 __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
483 __B, 0, 1, 2, 3, 4, 5, 6, 7);
484}
485
486static __inline__ __m512i __DEFAULT_FN_ATTRS512
488{
489 return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
490}
491
492static __inline __m512 __DEFAULT_FN_ATTRS512
494{
495 return (__m512) (__A);
496}
497
498static __inline __m512d __DEFAULT_FN_ATTRS512
500{
501 return (__m512d) (__A);
502}
503
504static __inline __m128i __DEFAULT_FN_ATTRS512
506{
507 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
508}
509
510static __inline __m256i __DEFAULT_FN_ATTRS512
512{
513 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
514}
515
516static __inline__ __mmask16 __DEFAULT_FN_ATTRS
518{
519 return (__mmask16)__a;
520}
521
522static __inline__ int __DEFAULT_FN_ATTRS
524{
525 return (int)__a;
526}
527
528/// Constructs a 512-bit floating-point vector of [8 x double] from a
529/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
530/// contain the value of the source vector. The upper 384 bits are set
531/// to zero.
532///
533/// \headerfile <x86intrin.h>
534///
535/// This intrinsic has no corresponding instruction.
536///
537/// \param __a
538/// A 128-bit vector of [2 x double].
539/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
540/// contain the value of the parameter. The upper 384 bits are set to zero.
541static __inline __m512d __DEFAULT_FN_ATTRS512
543{
544 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
545}
546
547/// Constructs a 512-bit floating-point vector of [8 x double] from a
548/// 256-bit floating-point vector of [4 x double]. The lower 256 bits
549/// contain the value of the source vector. The upper 256 bits are set
550/// to zero.
551///
552/// \headerfile <x86intrin.h>
553///
554/// This intrinsic has no corresponding instruction.
555///
556/// \param __a
557/// A 256-bit vector of [4 x double].
558/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
559/// contain the value of the parameter. The upper 256 bits are set to zero.
560static __inline __m512d __DEFAULT_FN_ATTRS512
562{
563 return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
564}
565
566/// Constructs a 512-bit floating-point vector of [16 x float] from a
567/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
568/// the value of the source vector. The upper 384 bits are set to zero.
569///
570/// \headerfile <x86intrin.h>
571///
572/// This intrinsic has no corresponding instruction.
573///
574/// \param __a
575/// A 128-bit vector of [4 x float].
576/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
577/// contain the value of the parameter. The upper 384 bits are set to zero.
578static __inline __m512 __DEFAULT_FN_ATTRS512
580{
581 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
582}
583
584/// Constructs a 512-bit floating-point vector of [16 x float] from a
585/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
586/// the value of the source vector. The upper 256 bits are set to zero.
587///
588/// \headerfile <x86intrin.h>
589///
590/// This intrinsic has no corresponding instruction.
591///
592/// \param __a
593/// A 256-bit vector of [8 x float].
594/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
595/// contain the value of the parameter. The upper 256 bits are set to zero.
596static __inline __m512 __DEFAULT_FN_ATTRS512
598{
599 return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
600}
601
602/// Constructs a 512-bit integer vector from a 128-bit integer vector.
603/// The lower 128 bits contain the value of the source vector. The upper
604/// 384 bits are set to zero.
605///
606/// \headerfile <x86intrin.h>
607///
608/// This intrinsic has no corresponding instruction.
609///
610/// \param __a
611/// A 128-bit integer vector.
612/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
613/// the parameter. The upper 384 bits are set to zero.
614static __inline __m512i __DEFAULT_FN_ATTRS512
616{
617 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
618}
619
620/// Constructs a 512-bit integer vector from a 256-bit integer vector.
621/// The lower 256 bits contain the value of the source vector. The upper
622/// 256 bits are set to zero.
623///
624/// \headerfile <x86intrin.h>
625///
626/// This intrinsic has no corresponding instruction.
627///
628/// \param __a
629/// A 256-bit integer vector.
630/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
631/// the parameter. The upper 256 bits are set to zero.
632static __inline __m512i __DEFAULT_FN_ATTRS512
634{
635 return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
636}
637
638/* Bitwise operators */
639static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
640_mm512_and_epi32(__m512i __a, __m512i __b)
641{
642 return (__m512i)((__v16su)__a & (__v16su)__b);
643}
644
645static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
646_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) {
647 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
648 (__v16si) _mm512_and_epi32(__a, __b),
649 (__v16si) __src);
650}
651
652static __inline__ __m512i __DEFAULT_FN_ATTRS512
654{
656 __k, __a, __b);
657}
658
659static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
660_mm512_and_epi64(__m512i __a, __m512i __b)
661{
662 return (__m512i)((__v8du)__a & (__v8du)__b);
663}
664
665static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
666_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) {
667 return (__m512i)__builtin_ia32_selectq_512(
668 (__mmask8)__k, (__v8di)_mm512_and_epi64(__a, __b), (__v8di)__src);
669}
670
671static __inline__ __m512i __DEFAULT_FN_ATTRS512
673{
675 __k, __a, __b);
676}
677
678static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
679_mm512_andnot_si512 (__m512i __A, __m512i __B)
680{
681 return (__m512i)(~(__v8du)__A & (__v8du)__B);
682}
683
684static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
685_mm512_andnot_epi32 (__m512i __A, __m512i __B)
686{
687 return (__m512i)(~(__v16su)__A & (__v16su)__B);
688}
689
690static __inline__ __m512i __DEFAULT_FN_ATTRS512
691_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
692{
693 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
694 (__v16si)_mm512_andnot_epi32(__A, __B),
695 (__v16si)__W);
696}
697
698static __inline__ __m512i __DEFAULT_FN_ATTRS512
699_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
700{
702 __U, __A, __B);
703}
704
705static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
706_mm512_andnot_epi64(__m512i __A, __m512i __B)
707{
708 return (__m512i)(~(__v8du)__A & (__v8du)__B);
709}
710
711static __inline__ __m512i __DEFAULT_FN_ATTRS512
712_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
713{
714 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
715 (__v8di)_mm512_andnot_epi64(__A, __B),
716 (__v8di)__W);
717}
718
719static __inline__ __m512i __DEFAULT_FN_ATTRS512
720_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
721{
723 __U, __A, __B);
724}
725
726static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
727_mm512_or_epi32(__m512i __a, __m512i __b)
728{
729 return (__m512i)((__v16su)__a | (__v16su)__b);
730}
731
732static __inline__ __m512i __DEFAULT_FN_ATTRS512
733_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
734{
735 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
736 (__v16si)_mm512_or_epi32(__a, __b),
737 (__v16si)__src);
738}
739
740static __inline__ __m512i __DEFAULT_FN_ATTRS512
742{
743 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
744}
745
746static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
747_mm512_or_epi64(__m512i __a, __m512i __b)
748{
749 return (__m512i)((__v8du)__a | (__v8du)__b);
750}
751
752static __inline__ __m512i __DEFAULT_FN_ATTRS512
753_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
754{
755 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
756 (__v8di)_mm512_or_epi64(__a, __b),
757 (__v8di)__src);
758}
759
760static __inline__ __m512i __DEFAULT_FN_ATTRS512
761_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
762{
763 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
764}
765
766static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
767_mm512_xor_epi32(__m512i __a, __m512i __b)
768{
769 return (__m512i)((__v16su)__a ^ (__v16su)__b);
770}
771
772static __inline__ __m512i __DEFAULT_FN_ATTRS512
773_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
774{
775 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
776 (__v16si)_mm512_xor_epi32(__a, __b),
777 (__v16si)__src);
778}
779
780static __inline__ __m512i __DEFAULT_FN_ATTRS512
782{
783 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
784}
785
786static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
787_mm512_xor_epi64(__m512i __a, __m512i __b)
788{
789 return (__m512i)((__v8du)__a ^ (__v8du)__b);
790}
791
792static __inline__ __m512i __DEFAULT_FN_ATTRS512
793_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
794{
795 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
796 (__v8di)_mm512_xor_epi64(__a, __b),
797 (__v8di)__src);
798}
799
800static __inline__ __m512i __DEFAULT_FN_ATTRS512
802{
803 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
804}
805
806static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
807_mm512_and_si512(__m512i __a, __m512i __b)
808{
809 return (__m512i)((__v8du)__a & (__v8du)__b);
810}
811
812static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
813_mm512_or_si512(__m512i __a, __m512i __b)
814{
815 return (__m512i)((__v8du)__a | (__v8du)__b);
816}
817
818static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
819_mm512_xor_si512(__m512i __a, __m512i __b)
820{
821 return (__m512i)((__v8du)__a ^ (__v8du)__b);
822}
823
824/* Arithmetic */
825
826static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
827_mm512_add_pd(__m512d __a, __m512d __b) {
828 return (__m512d)((__v8df)__a + (__v8df)__b);
829}
830
831static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
832_mm512_add_ps(__m512 __a, __m512 __b) {
833 return (__m512)((__v16sf)__a + (__v16sf)__b);
834}
835
836static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
837_mm512_mul_pd(__m512d __a, __m512d __b) {
838 return (__m512d)((__v8df)__a * (__v8df)__b);
839}
840
841static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
842_mm512_mul_ps(__m512 __a, __m512 __b) {
843 return (__m512)((__v16sf)__a * (__v16sf)__b);
844}
845
846static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
847_mm512_sub_pd(__m512d __a, __m512d __b) {
848 return (__m512d)((__v8df)__a - (__v8df)__b);
849}
850
851static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
852_mm512_sub_ps(__m512 __a, __m512 __b) {
853 return (__m512)((__v16sf)__a - (__v16sf)__b);
854}
855
856static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
857_mm512_add_epi64(__m512i __A, __m512i __B) {
858 return (__m512i) ((__v8du) __A + (__v8du) __B);
859}
860
861static __inline__ __m512i __DEFAULT_FN_ATTRS512
862_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
863{
864 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
865 (__v8di)_mm512_add_epi64(__A, __B),
866 (__v8di)__W);
867}
868
869static __inline__ __m512i __DEFAULT_FN_ATTRS512
870_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
871{
872 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
873 (__v8di)_mm512_add_epi64(__A, __B),
874 (__v8di)_mm512_setzero_si512());
875}
876
877static __inline__ __m512i __DEFAULT_FN_ATTRS512
878_mm512_sub_epi64 (__m512i __A, __m512i __B)
879{
880 return (__m512i) ((__v8du) __A - (__v8du) __B);
881}
882
883static __inline__ __m512i __DEFAULT_FN_ATTRS512
884_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
885{
886 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
887 (__v8di)_mm512_sub_epi64(__A, __B),
888 (__v8di)__W);
889}
890
891static __inline__ __m512i __DEFAULT_FN_ATTRS512
892_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
893{
894 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
895 (__v8di)_mm512_sub_epi64(__A, __B),
896 (__v8di)_mm512_setzero_si512());
897}
898
899static __inline__ __m512i __DEFAULT_FN_ATTRS512
900_mm512_add_epi32 (__m512i __A, __m512i __B)
901{
902 return (__m512i) ((__v16su) __A + (__v16su) __B);
903}
904
905static __inline__ __m512i __DEFAULT_FN_ATTRS512
906_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
907{
908 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
909 (__v16si)_mm512_add_epi32(__A, __B),
910 (__v16si)__W);
911}
912
913static __inline__ __m512i __DEFAULT_FN_ATTRS512
914_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
915{
916 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
917 (__v16si)_mm512_add_epi32(__A, __B),
918 (__v16si)_mm512_setzero_si512());
919}
920
921static __inline__ __m512i __DEFAULT_FN_ATTRS512
922_mm512_sub_epi32 (__m512i __A, __m512i __B)
923{
924 return (__m512i) ((__v16su) __A - (__v16su) __B);
925}
926
927static __inline__ __m512i __DEFAULT_FN_ATTRS512
928_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
929{
930 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
931 (__v16si)_mm512_sub_epi32(__A, __B),
932 (__v16si)__W);
933}
934
935static __inline__ __m512i __DEFAULT_FN_ATTRS512
936_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
937{
938 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
939 (__v16si)_mm512_sub_epi32(__A, __B),
940 (__v16si)_mm512_setzero_si512());
941}
942
943#define _mm512_max_round_pd(A, B, R) \
944 ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
945 (__v8df)(__m512d)(B), (int)(R)))
946
947#define _mm512_mask_max_round_pd(W, U, A, B, R) \
948 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
949 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
950 (__v8df)(W)))
951
952#define _mm512_maskz_max_round_pd(U, A, B, R) \
953 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
954 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
955 (__v8df)_mm512_setzero_pd()))
956
957static __inline__ __m512d __DEFAULT_FN_ATTRS512
958_mm512_max_pd(__m512d __A, __m512d __B)
959{
960 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
962}
963
964static __inline__ __m512d __DEFAULT_FN_ATTRS512
965_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
966{
967 return (__m512d)__builtin_ia32_selectpd_512(__U,
968 (__v8df)_mm512_max_pd(__A, __B),
969 (__v8df)__W);
970}
971
972static __inline__ __m512d __DEFAULT_FN_ATTRS512
973_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
974{
975 return (__m512d)__builtin_ia32_selectpd_512(__U,
976 (__v8df)_mm512_max_pd(__A, __B),
977 (__v8df)_mm512_setzero_pd());
978}
979
980#define _mm512_max_round_ps(A, B, R) \
981 ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
982 (__v16sf)(__m512)(B), (int)(R)))
983
984#define _mm512_mask_max_round_ps(W, U, A, B, R) \
985 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
986 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
987 (__v16sf)(W)))
988
989#define _mm512_maskz_max_round_ps(U, A, B, R) \
990 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
991 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
992 (__v16sf)_mm512_setzero_ps()))
993
994static __inline__ __m512 __DEFAULT_FN_ATTRS512
995_mm512_max_ps(__m512 __A, __m512 __B)
996{
997 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
999}
1000
1001static __inline__ __m512 __DEFAULT_FN_ATTRS512
1002_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1003{
1004 return (__m512)__builtin_ia32_selectps_512(__U,
1005 (__v16sf)_mm512_max_ps(__A, __B),
1006 (__v16sf)__W);
1007}
1008
1009static __inline__ __m512 __DEFAULT_FN_ATTRS512
1010_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1011{
1012 return (__m512)__builtin_ia32_selectps_512(__U,
1013 (__v16sf)_mm512_max_ps(__A, __B),
1014 (__v16sf)_mm512_setzero_ps());
1015}
1016
1017static __inline__ __m128 __DEFAULT_FN_ATTRS128
1018_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1019 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1020 (__v4sf) __B,
1021 (__v4sf) __W,
1022 (__mmask8) __U,
1024}
1025
1026static __inline__ __m128 __DEFAULT_FN_ATTRS128
1027_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1028 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1029 (__v4sf) __B,
1030 (__v4sf) _mm_setzero_ps (),
1031 (__mmask8) __U,
1033}
1034
1035#define _mm_max_round_ss(A, B, R) \
1036 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1037 (__v4sf)(__m128)(B), \
1038 (__v4sf)_mm_setzero_ps(), \
1039 (__mmask8)-1, (int)(R)))
1040
1041#define _mm_mask_max_round_ss(W, U, A, B, R) \
1042 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1043 (__v4sf)(__m128)(B), \
1044 (__v4sf)(__m128)(W), (__mmask8)(U), \
1045 (int)(R)))
1046
1047#define _mm_maskz_max_round_ss(U, A, B, R) \
1048 ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1049 (__v4sf)(__m128)(B), \
1050 (__v4sf)_mm_setzero_ps(), \
1051 (__mmask8)(U), (int)(R)))
1052
1053static __inline__ __m128d __DEFAULT_FN_ATTRS128
1054_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1055 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1056 (__v2df) __B,
1057 (__v2df) __W,
1058 (__mmask8) __U,
1060}
1061
1062static __inline__ __m128d __DEFAULT_FN_ATTRS128
1063_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1064 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1065 (__v2df) __B,
1066 (__v2df) _mm_setzero_pd (),
1067 (__mmask8) __U,
1069}
1070
1071#define _mm_max_round_sd(A, B, R) \
1072 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1073 (__v2df)(__m128d)(B), \
1074 (__v2df)_mm_setzero_pd(), \
1075 (__mmask8)-1, (int)(R)))
1076
1077#define _mm_mask_max_round_sd(W, U, A, B, R) \
1078 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1079 (__v2df)(__m128d)(B), \
1080 (__v2df)(__m128d)(W), \
1081 (__mmask8)(U), (int)(R)))
1082
1083#define _mm_maskz_max_round_sd(U, A, B, R) \
1084 ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1085 (__v2df)(__m128d)(B), \
1086 (__v2df)_mm_setzero_pd(), \
1087 (__mmask8)(U), (int)(R)))
1088
1089static __inline __m512i
1091_mm512_max_epi32(__m512i __A, __m512i __B)
1092{
1093 return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1094}
1095
1096static __inline__ __m512i __DEFAULT_FN_ATTRS512
1097_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1098{
1099 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1100 (__v16si)_mm512_max_epi32(__A, __B),
1101 (__v16si)__W);
1102}
1103
1104static __inline__ __m512i __DEFAULT_FN_ATTRS512
1105_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1106{
1107 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1108 (__v16si)_mm512_max_epi32(__A, __B),
1109 (__v16si)_mm512_setzero_si512());
1110}
1111
1112static __inline __m512i __DEFAULT_FN_ATTRS512
1113_mm512_max_epu32(__m512i __A, __m512i __B)
1114{
1115 return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1116}
1117
1118static __inline__ __m512i __DEFAULT_FN_ATTRS512
1119_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1120{
1121 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1122 (__v16si)_mm512_max_epu32(__A, __B),
1123 (__v16si)__W);
1124}
1125
1126static __inline__ __m512i __DEFAULT_FN_ATTRS512
1127_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1128{
1129 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1130 (__v16si)_mm512_max_epu32(__A, __B),
1131 (__v16si)_mm512_setzero_si512());
1132}
1133
1134static __inline __m512i __DEFAULT_FN_ATTRS512
1135_mm512_max_epi64(__m512i __A, __m512i __B)
1136{
1137 return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1138}
1139
1140static __inline__ __m512i __DEFAULT_FN_ATTRS512
1141_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1142{
1143 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1144 (__v8di)_mm512_max_epi64(__A, __B),
1145 (__v8di)__W);
1146}
1147
1148static __inline__ __m512i __DEFAULT_FN_ATTRS512
1149_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1150{
1151 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1152 (__v8di)_mm512_max_epi64(__A, __B),
1153 (__v8di)_mm512_setzero_si512());
1154}
1155
1156static __inline __m512i __DEFAULT_FN_ATTRS512
1157_mm512_max_epu64(__m512i __A, __m512i __B)
1158{
1159 return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1160}
1161
1162static __inline__ __m512i __DEFAULT_FN_ATTRS512
1163_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1164{
1165 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1166 (__v8di)_mm512_max_epu64(__A, __B),
1167 (__v8di)__W);
1168}
1169
1170static __inline__ __m512i __DEFAULT_FN_ATTRS512
1171_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1172{
1173 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1174 (__v8di)_mm512_max_epu64(__A, __B),
1175 (__v8di)_mm512_setzero_si512());
1176}
1177
1178#define _mm512_min_round_pd(A, B, R) \
1179 ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1180 (__v8df)(__m512d)(B), (int)(R)))
1181
1182#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1183 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1184 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1185 (__v8df)(W)))
1186
1187#define _mm512_maskz_min_round_pd(U, A, B, R) \
1188 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1189 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1190 (__v8df)_mm512_setzero_pd()))
1191
1192static __inline__ __m512d __DEFAULT_FN_ATTRS512
1193_mm512_min_pd(__m512d __A, __m512d __B)
1194{
1195 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1197}
1198
1199static __inline__ __m512d __DEFAULT_FN_ATTRS512
1200_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1201{
1202 return (__m512d)__builtin_ia32_selectpd_512(__U,
1203 (__v8df)_mm512_min_pd(__A, __B),
1204 (__v8df)__W);
1205}
1206
1207static __inline__ __m512d __DEFAULT_FN_ATTRS512
1208_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1209{
1210 return (__m512d)__builtin_ia32_selectpd_512(__U,
1211 (__v8df)_mm512_min_pd(__A, __B),
1212 (__v8df)_mm512_setzero_pd());
1213}
1214
1215#define _mm512_min_round_ps(A, B, R) \
1216 ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1217 (__v16sf)(__m512)(B), (int)(R)))
1218
1219#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1220 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1221 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1222 (__v16sf)(W)))
1223
1224#define _mm512_maskz_min_round_ps(U, A, B, R) \
1225 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1226 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1227 (__v16sf)_mm512_setzero_ps()))
1228
1229static __inline__ __m512 __DEFAULT_FN_ATTRS512
1230_mm512_min_ps(__m512 __A, __m512 __B)
1231{
1232 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1234}
1235
1236static __inline__ __m512 __DEFAULT_FN_ATTRS512
1237_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1238{
1239 return (__m512)__builtin_ia32_selectps_512(__U,
1240 (__v16sf)_mm512_min_ps(__A, __B),
1241 (__v16sf)__W);
1242}
1243
1244static __inline__ __m512 __DEFAULT_FN_ATTRS512
1245_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1246{
1247 return (__m512)__builtin_ia32_selectps_512(__U,
1248 (__v16sf)_mm512_min_ps(__A, __B),
1249 (__v16sf)_mm512_setzero_ps());
1250}
1251
1252static __inline__ __m128 __DEFAULT_FN_ATTRS128
1253_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1254 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1255 (__v4sf) __B,
1256 (__v4sf) __W,
1257 (__mmask8) __U,
1259}
1260
1261static __inline__ __m128 __DEFAULT_FN_ATTRS128
1262_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1263 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1264 (__v4sf) __B,
1265 (__v4sf) _mm_setzero_ps (),
1266 (__mmask8) __U,
1268}
1269
1270#define _mm_min_round_ss(A, B, R) \
1271 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1272 (__v4sf)(__m128)(B), \
1273 (__v4sf)_mm_setzero_ps(), \
1274 (__mmask8)-1, (int)(R)))
1275
1276#define _mm_mask_min_round_ss(W, U, A, B, R) \
1277 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1278 (__v4sf)(__m128)(B), \
1279 (__v4sf)(__m128)(W), (__mmask8)(U), \
1280 (int)(R)))
1281
1282#define _mm_maskz_min_round_ss(U, A, B, R) \
1283 ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1284 (__v4sf)(__m128)(B), \
1285 (__v4sf)_mm_setzero_ps(), \
1286 (__mmask8)(U), (int)(R)))
1287
1288static __inline__ __m128d __DEFAULT_FN_ATTRS128
1289_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1290 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1291 (__v2df) __B,
1292 (__v2df) __W,
1293 (__mmask8) __U,
1295}
1296
1297static __inline__ __m128d __DEFAULT_FN_ATTRS128
1298_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1299 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1300 (__v2df) __B,
1301 (__v2df) _mm_setzero_pd (),
1302 (__mmask8) __U,
1304}
1305
1306#define _mm_min_round_sd(A, B, R) \
1307 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1308 (__v2df)(__m128d)(B), \
1309 (__v2df)_mm_setzero_pd(), \
1310 (__mmask8)-1, (int)(R)))
1311
1312#define _mm_mask_min_round_sd(W, U, A, B, R) \
1313 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1314 (__v2df)(__m128d)(B), \
1315 (__v2df)(__m128d)(W), \
1316 (__mmask8)(U), (int)(R)))
1317
1318#define _mm_maskz_min_round_sd(U, A, B, R) \
1319 ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1320 (__v2df)(__m128d)(B), \
1321 (__v2df)_mm_setzero_pd(), \
1322 (__mmask8)(U), (int)(R)))
1323
1324static __inline __m512i
1326_mm512_min_epi32(__m512i __A, __m512i __B)
1327{
1328 return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1329}
1330
1331static __inline__ __m512i __DEFAULT_FN_ATTRS512
1332_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1333{
1334 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1335 (__v16si)_mm512_min_epi32(__A, __B),
1336 (__v16si)__W);
1337}
1338
1339static __inline__ __m512i __DEFAULT_FN_ATTRS512
1340_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1341{
1342 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1343 (__v16si)_mm512_min_epi32(__A, __B),
1344 (__v16si)_mm512_setzero_si512());
1345}
1346
1347static __inline __m512i __DEFAULT_FN_ATTRS512
1348_mm512_min_epu32(__m512i __A, __m512i __B)
1349{
1350 return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1351}
1352
1353static __inline__ __m512i __DEFAULT_FN_ATTRS512
1354_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1355{
1356 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1357 (__v16si)_mm512_min_epu32(__A, __B),
1358 (__v16si)__W);
1359}
1360
1361static __inline__ __m512i __DEFAULT_FN_ATTRS512
1362_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1363{
1364 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1365 (__v16si)_mm512_min_epu32(__A, __B),
1366 (__v16si)_mm512_setzero_si512());
1367}
1368
1369static __inline __m512i __DEFAULT_FN_ATTRS512
1370_mm512_min_epi64(__m512i __A, __m512i __B)
1371{
1372 return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1373}
1374
1375static __inline__ __m512i __DEFAULT_FN_ATTRS512
1376_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1377{
1378 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1379 (__v8di)_mm512_min_epi64(__A, __B),
1380 (__v8di)__W);
1381}
1382
1383static __inline__ __m512i __DEFAULT_FN_ATTRS512
1384_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1385{
1386 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1387 (__v8di)_mm512_min_epi64(__A, __B),
1388 (__v8di)_mm512_setzero_si512());
1389}
1390
1391static __inline __m512i __DEFAULT_FN_ATTRS512
1392_mm512_min_epu64(__m512i __A, __m512i __B)
1393{
1394 return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1395}
1396
1397static __inline__ __m512i __DEFAULT_FN_ATTRS512
1398_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1399{
1400 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1401 (__v8di)_mm512_min_epu64(__A, __B),
1402 (__v8di)__W);
1403}
1404
1405static __inline__ __m512i __DEFAULT_FN_ATTRS512
1406_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1407{
1408 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1409 (__v8di)_mm512_min_epu64(__A, __B),
1410 (__v8di)_mm512_setzero_si512());
1411}
1412
1413static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1414_mm512_mul_epi32(__m512i __X, __m512i __Y) {
1415 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1416}
1417
1418static __inline __m512i __DEFAULT_FN_ATTRS512
1419_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1420{
1421 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1422 (__v8di)_mm512_mul_epi32(__X, __Y),
1423 (__v8di)__W);
1424}
1425
1426static __inline __m512i __DEFAULT_FN_ATTRS512
1427_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1428{
1429 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1430 (__v8di)_mm512_mul_epi32(__X, __Y),
1431 (__v8di)_mm512_setzero_si512 ());
1432}
1433
1434static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1435_mm512_mul_epu32(__m512i __X, __m512i __Y) {
1436 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1437}
1438
1439static __inline __m512i __DEFAULT_FN_ATTRS512
1440_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1441{
1442 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1443 (__v8di)_mm512_mul_epu32(__X, __Y),
1444 (__v8di)__W);
1445}
1446
1447static __inline __m512i __DEFAULT_FN_ATTRS512
1448_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1449{
1450 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1451 (__v8di)_mm512_mul_epu32(__X, __Y),
1452 (__v8di)_mm512_setzero_si512 ());
1453}
1454
1455static __inline __m512i __DEFAULT_FN_ATTRS512
1456_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1457{
1458 return (__m512i) ((__v16su) __A * (__v16su) __B);
1459}
1460
1461static __inline __m512i __DEFAULT_FN_ATTRS512
1462_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1463{
1464 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1465 (__v16si)_mm512_mullo_epi32(__A, __B),
1466 (__v16si)_mm512_setzero_si512());
1467}
1468
1469static __inline __m512i __DEFAULT_FN_ATTRS512
1470_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1471{
1472 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1473 (__v16si)_mm512_mullo_epi32(__A, __B),
1474 (__v16si)__W);
1475}
1476
1477static __inline__ __m512i __DEFAULT_FN_ATTRS512
1478_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1479 return (__m512i) ((__v8du) __A * (__v8du) __B);
1480}
1481
1482static __inline__ __m512i __DEFAULT_FN_ATTRS512
1483_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1484 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1485 (__v8di)_mm512_mullox_epi64(__A, __B),
1486 (__v8di)__W);
1487}
1488
1489#define _mm512_sqrt_round_pd(A, R) \
1490 ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1491
1492#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1493 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1494 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1495 (__v8df)(__m512d)(W)))
1496
1497#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1498 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1499 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1500 (__v8df)_mm512_setzero_pd()))
1501
1502static __inline__ __m512d __DEFAULT_FN_ATTRS512
1503_mm512_sqrt_pd(__m512d __A)
1504{
1505 return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1507}
1508
1509static __inline__ __m512d __DEFAULT_FN_ATTRS512
1510_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1511{
1512 return (__m512d)__builtin_ia32_selectpd_512(__U,
1513 (__v8df)_mm512_sqrt_pd(__A),
1514 (__v8df)__W);
1515}
1516
1517static __inline__ __m512d __DEFAULT_FN_ATTRS512
1519{
1520 return (__m512d)__builtin_ia32_selectpd_512(__U,
1521 (__v8df)_mm512_sqrt_pd(__A),
1522 (__v8df)_mm512_setzero_pd());
1523}
1524
1525#define _mm512_sqrt_round_ps(A, R) \
1526 ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1527
1528#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1529 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1530 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1531 (__v16sf)(__m512)(W)))
1532
1533#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1534 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1535 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1536 (__v16sf)_mm512_setzero_ps()))
1537
1538static __inline__ __m512 __DEFAULT_FN_ATTRS512
1540{
1541 return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1543}
1544
1545static __inline__ __m512 __DEFAULT_FN_ATTRS512
1546_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1547{
1548 return (__m512)__builtin_ia32_selectps_512(__U,
1549 (__v16sf)_mm512_sqrt_ps(__A),
1550 (__v16sf)__W);
1551}
1552
1553static __inline__ __m512 __DEFAULT_FN_ATTRS512
1555{
1556 return (__m512)__builtin_ia32_selectps_512(__U,
1557 (__v16sf)_mm512_sqrt_ps(__A),
1558 (__v16sf)_mm512_setzero_ps());
1559}
1560
1561static __inline__ __m512d __DEFAULT_FN_ATTRS512
1563{
1564 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1565 (__v8df)
1567 (__mmask8) -1);}
1568
1569static __inline__ __m512d __DEFAULT_FN_ATTRS512
1570_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1571{
1572 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1573 (__v8df) __W,
1574 (__mmask8) __U);
1575}
1576
1577static __inline__ __m512d __DEFAULT_FN_ATTRS512
1579{
1580 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1581 (__v8df)
1583 (__mmask8) __U);
1584}
1585
1586static __inline__ __m512 __DEFAULT_FN_ATTRS512
1588{
1589 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1590 (__v16sf)
1592 (__mmask16) -1);
1593}
1594
1595static __inline__ __m512 __DEFAULT_FN_ATTRS512
1596_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1597{
1598 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1599 (__v16sf) __W,
1600 (__mmask16) __U);
1601}
1602
1603static __inline__ __m512 __DEFAULT_FN_ATTRS512
1605{
1606 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1607 (__v16sf)
1609 (__mmask16) __U);
1610}
1611
1612static __inline__ __m128 __DEFAULT_FN_ATTRS128
1613_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1614{
1615 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1616 (__v4sf) __B,
1617 (__v4sf)
1618 _mm_setzero_ps (),
1619 (__mmask8) -1);
1620}
1621
1622static __inline__ __m128 __DEFAULT_FN_ATTRS128
1623_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1624{
1625 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1626 (__v4sf) __B,
1627 (__v4sf) __W,
1628 (__mmask8) __U);
1629}
1630
1631static __inline__ __m128 __DEFAULT_FN_ATTRS128
1632_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1633{
1634 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1635 (__v4sf) __B,
1636 (__v4sf) _mm_setzero_ps (),
1637 (__mmask8) __U);
1638}
1639
1640static __inline__ __m128d __DEFAULT_FN_ATTRS128
1641_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1642{
1643 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1644 (__v2df) __B,
1645 (__v2df)
1646 _mm_setzero_pd (),
1647 (__mmask8) -1);
1648}
1649
1650static __inline__ __m128d __DEFAULT_FN_ATTRS128
1651_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1652{
1653 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1654 (__v2df) __B,
1655 (__v2df) __W,
1656 (__mmask8) __U);
1657}
1658
1659static __inline__ __m128d __DEFAULT_FN_ATTRS128
1660_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1661{
1662 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1663 (__v2df) __B,
1664 (__v2df) _mm_setzero_pd (),
1665 (__mmask8) __U);
1666}
1667
1668static __inline__ __m512d __DEFAULT_FN_ATTRS512
1670{
1671 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1672 (__v8df)
1674 (__mmask8) -1);
1675}
1676
1677static __inline__ __m512d __DEFAULT_FN_ATTRS512
1678_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1679{
1680 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1681 (__v8df) __W,
1682 (__mmask8) __U);
1683}
1684
1685static __inline__ __m512d __DEFAULT_FN_ATTRS512
1687{
1688 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1689 (__v8df)
1691 (__mmask8) __U);
1692}
1693
1694static __inline__ __m512 __DEFAULT_FN_ATTRS512
1696{
1697 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1698 (__v16sf)
1700 (__mmask16) -1);
1701}
1702
1703static __inline__ __m512 __DEFAULT_FN_ATTRS512
1704_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1705{
1706 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1707 (__v16sf) __W,
1708 (__mmask16) __U);
1709}
1710
1711static __inline__ __m512 __DEFAULT_FN_ATTRS512
1713{
1714 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1715 (__v16sf)
1717 (__mmask16) __U);
1718}
1719
1720static __inline__ __m128 __DEFAULT_FN_ATTRS128
1721_mm_rcp14_ss(__m128 __A, __m128 __B)
1722{
1723 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1724 (__v4sf) __B,
1725 (__v4sf)
1726 _mm_setzero_ps (),
1727 (__mmask8) -1);
1728}
1729
1730static __inline__ __m128 __DEFAULT_FN_ATTRS128
1731_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1732{
1733 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1734 (__v4sf) __B,
1735 (__v4sf) __W,
1736 (__mmask8) __U);
1737}
1738
1739static __inline__ __m128 __DEFAULT_FN_ATTRS128
1740_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1741{
1742 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1743 (__v4sf) __B,
1744 (__v4sf) _mm_setzero_ps (),
1745 (__mmask8) __U);
1746}
1747
1748static __inline__ __m128d __DEFAULT_FN_ATTRS128
1749_mm_rcp14_sd(__m128d __A, __m128d __B)
1750{
1751 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1752 (__v2df) __B,
1753 (__v2df)
1754 _mm_setzero_pd (),
1755 (__mmask8) -1);
1756}
1757
1758static __inline__ __m128d __DEFAULT_FN_ATTRS128
1759_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1760{
1761 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1762 (__v2df) __B,
1763 (__v2df) __W,
1764 (__mmask8) __U);
1765}
1766
1767static __inline__ __m128d __DEFAULT_FN_ATTRS128
1768_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1769{
1770 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1771 (__v2df) __B,
1772 (__v2df) _mm_setzero_pd (),
1773 (__mmask8) __U);
1774}
1775
1776static __inline __m512 __DEFAULT_FN_ATTRS512
1778{
1779 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1781 (__v16sf) __A, (unsigned short)-1,
1783}
1784
1785static __inline__ __m512 __DEFAULT_FN_ATTRS512
1786_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1787{
1788 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1790 (__v16sf) __W, __U,
1792}
1793
1794static __inline __m512d __DEFAULT_FN_ATTRS512
1796{
1797 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1799 (__v8df) __A, (unsigned char)-1,
1801}
1802
1803static __inline__ __m512d __DEFAULT_FN_ATTRS512
1804_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1805{
1806 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1808 (__v8df) __W, __U,
1810}
1811
1812static __inline__ __m512 __DEFAULT_FN_ATTRS512
1813_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1814{
1815 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1817 (__v16sf) __W, __U,
1819}
1820
1821static __inline __m512 __DEFAULT_FN_ATTRS512
1823{
1824 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1826 (__v16sf) __A, (unsigned short)-1,
1828}
1829
1830static __inline __m512d __DEFAULT_FN_ATTRS512
1831_mm512_ceil_pd(__m512d __A)
1832{
1833 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1835 (__v8df) __A, (unsigned char)-1,
1837}
1838
1839static __inline__ __m512d __DEFAULT_FN_ATTRS512
1840_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1841{
1842 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1844 (__v8df) __W, __U,
1846}
1847
1848static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1849_mm512_abs_epi64(__m512i __A) {
1850 return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1851}
1852
1853static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1854_mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
1855 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1856 (__v8di)_mm512_abs_epi64(__A),
1857 (__v8di)__W);
1858}
1859
1860static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1862 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1863 (__v8di)_mm512_abs_epi64(__A),
1864 (__v8di)_mm512_setzero_si512());
1865}
1866
1867static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1868_mm512_abs_epi32(__m512i __A) {
1869 return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1870}
1871
1872static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1873_mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
1874 return (__m512i)__builtin_ia32_selectd_512(__U,
1875 (__v16si)_mm512_abs_epi32(__A),
1876 (__v16si)__W);
1877}
1878
1879static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1881 return (__m512i)__builtin_ia32_selectd_512(__U,
1882 (__v16si)_mm512_abs_epi32(__A),
1883 (__v16si)_mm512_setzero_si512());
1884}
1885
1886static __inline__ __m128 __DEFAULT_FN_ATTRS128
1887_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1888 __A = _mm_add_ss(__A, __B);
1889 return __builtin_ia32_selectss_128(__U, __A, __W);
1890}
1891
1892static __inline__ __m128 __DEFAULT_FN_ATTRS128
1893_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1894 __A = _mm_add_ss(__A, __B);
1895 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1896}
1897
1898#define _mm_add_round_ss(A, B, R) \
1899 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1900 (__v4sf)(__m128)(B), \
1901 (__v4sf)_mm_setzero_ps(), \
1902 (__mmask8)-1, (int)(R)))
1903
1904#define _mm_mask_add_round_ss(W, U, A, B, R) \
1905 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1906 (__v4sf)(__m128)(B), \
1907 (__v4sf)(__m128)(W), (__mmask8)(U), \
1908 (int)(R)))
1909
1910#define _mm_maskz_add_round_ss(U, A, B, R) \
1911 ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1912 (__v4sf)(__m128)(B), \
1913 (__v4sf)_mm_setzero_ps(), \
1914 (__mmask8)(U), (int)(R)))
1915
1916static __inline__ __m128d __DEFAULT_FN_ATTRS128
1917_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1918 __A = _mm_add_sd(__A, __B);
1919 return __builtin_ia32_selectsd_128(__U, __A, __W);
1920}
1921
1922static __inline__ __m128d __DEFAULT_FN_ATTRS128
1923_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1924 __A = _mm_add_sd(__A, __B);
1925 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1926}
1927#define _mm_add_round_sd(A, B, R) \
1928 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1929 (__v2df)(__m128d)(B), \
1930 (__v2df)_mm_setzero_pd(), \
1931 (__mmask8)-1, (int)(R)))
1932
1933#define _mm_mask_add_round_sd(W, U, A, B, R) \
1934 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1935 (__v2df)(__m128d)(B), \
1936 (__v2df)(__m128d)(W), \
1937 (__mmask8)(U), (int)(R)))
1938
1939#define _mm_maskz_add_round_sd(U, A, B, R) \
1940 ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1941 (__v2df)(__m128d)(B), \
1942 (__v2df)_mm_setzero_pd(), \
1943 (__mmask8)(U), (int)(R)))
1944
1945static __inline__ __m512d __DEFAULT_FN_ATTRS512
1946_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1947 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1948 (__v8df)_mm512_add_pd(__A, __B),
1949 (__v8df)__W);
1950}
1951
1952static __inline__ __m512d __DEFAULT_FN_ATTRS512
1953_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1954 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1955 (__v8df)_mm512_add_pd(__A, __B),
1956 (__v8df)_mm512_setzero_pd());
1957}
1958
1959static __inline__ __m512 __DEFAULT_FN_ATTRS512
1960_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1961 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1962 (__v16sf)_mm512_add_ps(__A, __B),
1963 (__v16sf)__W);
1964}
1965
1966static __inline__ __m512 __DEFAULT_FN_ATTRS512
1967_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1968 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1969 (__v16sf)_mm512_add_ps(__A, __B),
1970 (__v16sf)_mm512_setzero_ps());
1971}
1972
1973#define _mm512_add_round_pd(A, B, R) \
1974 ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1975 (__v8df)(__m512d)(B), (int)(R)))
1976
1977#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1978 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1979 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1980 (__v8df)(__m512d)(W)))
1981
1982#define _mm512_maskz_add_round_pd(U, A, B, R) \
1983 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1984 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1985 (__v8df)_mm512_setzero_pd()))
1986
1987#define _mm512_add_round_ps(A, B, R) \
1988 ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1989 (__v16sf)(__m512)(B), (int)(R)))
1990
1991#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1992 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1993 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1994 (__v16sf)(__m512)(W)))
1995
1996#define _mm512_maskz_add_round_ps(U, A, B, R) \
1997 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1998 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1999 (__v16sf)_mm512_setzero_ps()))
2000
2001static __inline__ __m128 __DEFAULT_FN_ATTRS128
2002_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2003 __A = _mm_sub_ss(__A, __B);
2004 return __builtin_ia32_selectss_128(__U, __A, __W);
2005}
2006
2007static __inline__ __m128 __DEFAULT_FN_ATTRS128
2008_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2009 __A = _mm_sub_ss(__A, __B);
2010 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2011}
2012#define _mm_sub_round_ss(A, B, R) \
2013 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2014 (__v4sf)(__m128)(B), \
2015 (__v4sf)_mm_setzero_ps(), \
2016 (__mmask8)-1, (int)(R)))
2017
2018#define _mm_mask_sub_round_ss(W, U, A, B, R) \
2019 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2020 (__v4sf)(__m128)(B), \
2021 (__v4sf)(__m128)(W), (__mmask8)(U), \
2022 (int)(R)))
2023
2024#define _mm_maskz_sub_round_ss(U, A, B, R) \
2025 ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2026 (__v4sf)(__m128)(B), \
2027 (__v4sf)_mm_setzero_ps(), \
2028 (__mmask8)(U), (int)(R)))
2029
2030static __inline__ __m128d __DEFAULT_FN_ATTRS128
2031_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2032 __A = _mm_sub_sd(__A, __B);
2033 return __builtin_ia32_selectsd_128(__U, __A, __W);
2034}
2035
2036static __inline__ __m128d __DEFAULT_FN_ATTRS128
2037_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2038 __A = _mm_sub_sd(__A, __B);
2039 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2040}
2041
2042#define _mm_sub_round_sd(A, B, R) \
2043 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2044 (__v2df)(__m128d)(B), \
2045 (__v2df)_mm_setzero_pd(), \
2046 (__mmask8)-1, (int)(R)))
2047
2048#define _mm_mask_sub_round_sd(W, U, A, B, R) \
2049 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2050 (__v2df)(__m128d)(B), \
2051 (__v2df)(__m128d)(W), \
2052 (__mmask8)(U), (int)(R)))
2053
2054#define _mm_maskz_sub_round_sd(U, A, B, R) \
2055 ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2056 (__v2df)(__m128d)(B), \
2057 (__v2df)_mm_setzero_pd(), \
2058 (__mmask8)(U), (int)(R)))
2059
2060static __inline__ __m512d __DEFAULT_FN_ATTRS512
2061_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2062 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2063 (__v8df)_mm512_sub_pd(__A, __B),
2064 (__v8df)__W);
2065}
2066
2067static __inline__ __m512d __DEFAULT_FN_ATTRS512
2068_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2069 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2070 (__v8df)_mm512_sub_pd(__A, __B),
2071 (__v8df)_mm512_setzero_pd());
2072}
2073
2074static __inline__ __m512 __DEFAULT_FN_ATTRS512
2075_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2076 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2077 (__v16sf)_mm512_sub_ps(__A, __B),
2078 (__v16sf)__W);
2079}
2080
2081static __inline__ __m512 __DEFAULT_FN_ATTRS512
2082_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2083 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2084 (__v16sf)_mm512_sub_ps(__A, __B),
2085 (__v16sf)_mm512_setzero_ps());
2086}
2087
2088#define _mm512_sub_round_pd(A, B, R) \
2089 ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2090 (__v8df)(__m512d)(B), (int)(R)))
2091
2092#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2093 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2094 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2095 (__v8df)(__m512d)(W)))
2096
2097#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2098 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2099 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2100 (__v8df)_mm512_setzero_pd()))
2101
2102#define _mm512_sub_round_ps(A, B, R) \
2103 ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2104 (__v16sf)(__m512)(B), (int)(R)))
2105
2106#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2107 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2108 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2109 (__v16sf)(__m512)(W)))
2110
2111#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2112 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2113 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2114 (__v16sf)_mm512_setzero_ps()))
2115
2116static __inline__ __m128 __DEFAULT_FN_ATTRS128
2117_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2118 __A = _mm_mul_ss(__A, __B);
2119 return __builtin_ia32_selectss_128(__U, __A, __W);
2120}
2121
2122static __inline__ __m128 __DEFAULT_FN_ATTRS128
2123_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2124 __A = _mm_mul_ss(__A, __B);
2125 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2126}
2127#define _mm_mul_round_ss(A, B, R) \
2128 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2129 (__v4sf)(__m128)(B), \
2130 (__v4sf)_mm_setzero_ps(), \
2131 (__mmask8)-1, (int)(R)))
2132
2133#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2134 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2135 (__v4sf)(__m128)(B), \
2136 (__v4sf)(__m128)(W), (__mmask8)(U), \
2137 (int)(R)))
2138
2139#define _mm_maskz_mul_round_ss(U, A, B, R) \
2140 ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2141 (__v4sf)(__m128)(B), \
2142 (__v4sf)_mm_setzero_ps(), \
2143 (__mmask8)(U), (int)(R)))
2144
2145static __inline__ __m128d __DEFAULT_FN_ATTRS128
2146_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2147 __A = _mm_mul_sd(__A, __B);
2148 return __builtin_ia32_selectsd_128(__U, __A, __W);
2149}
2150
2151static __inline__ __m128d __DEFAULT_FN_ATTRS128
2152_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2153 __A = _mm_mul_sd(__A, __B);
2154 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2155}
2156
2157#define _mm_mul_round_sd(A, B, R) \
2158 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2159 (__v2df)(__m128d)(B), \
2160 (__v2df)_mm_setzero_pd(), \
2161 (__mmask8)-1, (int)(R)))
2162
2163#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2164 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2165 (__v2df)(__m128d)(B), \
2166 (__v2df)(__m128d)(W), \
2167 (__mmask8)(U), (int)(R)))
2168
2169#define _mm_maskz_mul_round_sd(U, A, B, R) \
2170 ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2171 (__v2df)(__m128d)(B), \
2172 (__v2df)_mm_setzero_pd(), \
2173 (__mmask8)(U), (int)(R)))
2174
2175static __inline__ __m512d __DEFAULT_FN_ATTRS512
2176_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2177 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2178 (__v8df)_mm512_mul_pd(__A, __B),
2179 (__v8df)__W);
2180}
2181
2182static __inline__ __m512d __DEFAULT_FN_ATTRS512
2183_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2184 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2185 (__v8df)_mm512_mul_pd(__A, __B),
2186 (__v8df)_mm512_setzero_pd());
2187}
2188
2189static __inline__ __m512 __DEFAULT_FN_ATTRS512
2190_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2191 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2192 (__v16sf)_mm512_mul_ps(__A, __B),
2193 (__v16sf)__W);
2194}
2195
2196static __inline__ __m512 __DEFAULT_FN_ATTRS512
2197_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2198 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2199 (__v16sf)_mm512_mul_ps(__A, __B),
2200 (__v16sf)_mm512_setzero_ps());
2201}
2202
2203#define _mm512_mul_round_pd(A, B, R) \
2204 ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2205 (__v8df)(__m512d)(B), (int)(R)))
2206
2207#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2208 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2209 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2210 (__v8df)(__m512d)(W)))
2211
2212#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2213 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2214 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2215 (__v8df)_mm512_setzero_pd()))
2216
2217#define _mm512_mul_round_ps(A, B, R) \
2218 ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2219 (__v16sf)(__m512)(B), (int)(R)))
2220
2221#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2222 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2223 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2224 (__v16sf)(__m512)(W)))
2225
2226#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2227 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2228 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2229 (__v16sf)_mm512_setzero_ps()))
2230
2231static __inline__ __m128 __DEFAULT_FN_ATTRS128
2232_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2233 __A = _mm_div_ss(__A, __B);
2234 return __builtin_ia32_selectss_128(__U, __A, __W);
2235}
2236
2237static __inline__ __m128 __DEFAULT_FN_ATTRS128
2238_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2239 __A = _mm_div_ss(__A, __B);
2240 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2241}
2242
2243#define _mm_div_round_ss(A, B, R) \
2244 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2245 (__v4sf)(__m128)(B), \
2246 (__v4sf)_mm_setzero_ps(), \
2247 (__mmask8)-1, (int)(R)))
2248
2249#define _mm_mask_div_round_ss(W, U, A, B, R) \
2250 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2251 (__v4sf)(__m128)(B), \
2252 (__v4sf)(__m128)(W), (__mmask8)(U), \
2253 (int)(R)))
2254
2255#define _mm_maskz_div_round_ss(U, A, B, R) \
2256 ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2257 (__v4sf)(__m128)(B), \
2258 (__v4sf)_mm_setzero_ps(), \
2259 (__mmask8)(U), (int)(R)))
2260
2261static __inline__ __m128d __DEFAULT_FN_ATTRS128
2262_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2263 __A = _mm_div_sd(__A, __B);
2264 return __builtin_ia32_selectsd_128(__U, __A, __W);
2265}
2266
2267static __inline__ __m128d __DEFAULT_FN_ATTRS128
2268_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2269 __A = _mm_div_sd(__A, __B);
2270 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2271}
2272
2273#define _mm_div_round_sd(A, B, R) \
2274 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2275 (__v2df)(__m128d)(B), \
2276 (__v2df)_mm_setzero_pd(), \
2277 (__mmask8)-1, (int)(R)))
2278
2279#define _mm_mask_div_round_sd(W, U, A, B, R) \
2280 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2281 (__v2df)(__m128d)(B), \
2282 (__v2df)(__m128d)(W), \
2283 (__mmask8)(U), (int)(R)))
2284
2285#define _mm_maskz_div_round_sd(U, A, B, R) \
2286 ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2287 (__v2df)(__m128d)(B), \
2288 (__v2df)_mm_setzero_pd(), \
2289 (__mmask8)(U), (int)(R)))
2290
2291static __inline __m512d
2293 return (__m512d)((__v8df)__a/(__v8df)__b);
2294}
2295
2296static __inline__ __m512d __DEFAULT_FN_ATTRS512
2297_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2298 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2299 (__v8df)_mm512_div_pd(__A, __B),
2300 (__v8df)__W);
2301}
2302
2303static __inline__ __m512d __DEFAULT_FN_ATTRS512
2304_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2305 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2306 (__v8df)_mm512_div_pd(__A, __B),
2307 (__v8df)_mm512_setzero_pd());
2308}
2309
2310static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2311_mm512_div_ps(__m512 __a, __m512 __b) {
2312 return (__m512)((__v16sf)__a/(__v16sf)__b);
2313}
2314
2315static __inline__ __m512 __DEFAULT_FN_ATTRS512
2316_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2317 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2318 (__v16sf)_mm512_div_ps(__A, __B),
2319 (__v16sf)__W);
2320}
2321
2322static __inline__ __m512 __DEFAULT_FN_ATTRS512
2323_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2324 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2325 (__v16sf)_mm512_div_ps(__A, __B),
2326 (__v16sf)_mm512_setzero_ps());
2327}
2328
2329#define _mm512_div_round_pd(A, B, R) \
2330 ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2331 (__v8df)(__m512d)(B), (int)(R)))
2332
2333#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2334 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2335 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2336 (__v8df)(__m512d)(W)))
2337
2338#define _mm512_maskz_div_round_pd(U, A, B, R) \
2339 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2340 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2341 (__v8df)_mm512_setzero_pd()))
2342
2343#define _mm512_div_round_ps(A, B, R) \
2344 ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2345 (__v16sf)(__m512)(B), (int)(R)))
2346
2347#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2348 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2349 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2350 (__v16sf)(__m512)(W)))
2351
2352#define _mm512_maskz_div_round_ps(U, A, B, R) \
2353 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2354 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2355 (__v16sf)_mm512_setzero_ps()))
2356
2357#define _mm512_roundscale_ps(A, B) \
2358 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2359 (__v16sf)_mm512_undefined_ps(), \
2360 (__mmask16)-1, \
2361 _MM_FROUND_CUR_DIRECTION))
2362
2363#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2364 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2365 (__v16sf)(__m512)(A), (__mmask16)(B), \
2366 _MM_FROUND_CUR_DIRECTION))
2367
2368#define _mm512_maskz_roundscale_ps(A, B, imm) \
2369 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2370 (__v16sf)_mm512_setzero_ps(), \
2371 (__mmask16)(A), \
2372 _MM_FROUND_CUR_DIRECTION))
2373
2374#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2375 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2376 (__v16sf)(__m512)(A), (__mmask16)(B), \
2377 (int)(R)))
2378
2379#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2380 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2381 (__v16sf)_mm512_setzero_ps(), \
2382 (__mmask16)(A), (int)(R)))
2383
2384#define _mm512_roundscale_round_ps(A, imm, R) \
2385 ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2386 (__v16sf)_mm512_undefined_ps(), \
2387 (__mmask16)-1, (int)(R)))
2388
2389#define _mm512_roundscale_pd(A, B) \
2390 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2391 (__v8df)_mm512_undefined_pd(), \
2392 (__mmask8)-1, \
2393 _MM_FROUND_CUR_DIRECTION))
2394
2395#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2396 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2397 (__v8df)(__m512d)(A), (__mmask8)(B), \
2398 _MM_FROUND_CUR_DIRECTION))
2399
2400#define _mm512_maskz_roundscale_pd(A, B, imm) \
2401 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2402 (__v8df)_mm512_setzero_pd(), \
2403 (__mmask8)(A), \
2404 _MM_FROUND_CUR_DIRECTION))
2405
2406#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2407 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2408 (__v8df)(__m512d)(A), (__mmask8)(B), \
2409 (int)(R)))
2410
2411#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2412 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2413 (__v8df)_mm512_setzero_pd(), \
2414 (__mmask8)(A), (int)(R)))
2415
2416#define _mm512_roundscale_round_pd(A, imm, R) \
2417 ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2418 (__v8df)_mm512_undefined_pd(), \
2419 (__mmask8)-1, (int)(R)))
2420
2421#define _mm512_fmadd_round_pd(A, B, C, R) \
2422 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2423 (__v8df)(__m512d)(B), \
2424 (__v8df)(__m512d)(C), \
2425 (__mmask8)-1, (int)(R)))
2426
2427
2428#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2429 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2430 (__v8df)(__m512d)(B), \
2431 (__v8df)(__m512d)(C), \
2432 (__mmask8)(U), (int)(R)))
2433
2434
2435#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2436 ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2437 (__v8df)(__m512d)(B), \
2438 (__v8df)(__m512d)(C), \
2439 (__mmask8)(U), (int)(R)))
2440
2441
2442#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2443 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2444 (__v8df)(__m512d)(B), \
2445 (__v8df)(__m512d)(C), \
2446 (__mmask8)(U), (int)(R)))
2447
2448
2449#define _mm512_fmsub_round_pd(A, B, C, R) \
2450 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2451 (__v8df)(__m512d)(B), \
2452 -(__v8df)(__m512d)(C), \
2453 (__mmask8)-1, (int)(R)))
2454
2455
2456#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2457 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2458 (__v8df)(__m512d)(B), \
2459 -(__v8df)(__m512d)(C), \
2460 (__mmask8)(U), (int)(R)))
2461
2462
2463#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2464 ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2465 (__v8df)(__m512d)(B), \
2466 -(__v8df)(__m512d)(C), \
2467 (__mmask8)(U), (int)(R)))
2468
2469
2470#define _mm512_fnmadd_round_pd(A, B, C, R) \
2471 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2472 (__v8df)(__m512d)(B), \
2473 (__v8df)(__m512d)(C), \
2474 (__mmask8)-1, (int)(R)))
2475
2476
2477#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2478 ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2479 (__v8df)(__m512d)(B), \
2480 (__v8df)(__m512d)(C), \
2481 (__mmask8)(U), (int)(R)))
2482
2483
2484#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2485 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2486 (__v8df)(__m512d)(B), \
2487 (__v8df)(__m512d)(C), \
2488 (__mmask8)(U), (int)(R)))
2489
2490
2491#define _mm512_fnmsub_round_pd(A, B, C, R) \
2492 ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2493 (__v8df)(__m512d)(B), \
2494 -(__v8df)(__m512d)(C), \
2495 (__mmask8)-1, (int)(R)))
2496
2497
2498#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2499 ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2500 (__v8df)(__m512d)(B), \
2501 -(__v8df)(__m512d)(C), \
2502 (__mmask8)(U), (int)(R)))
2503
2504
2505static __inline__ __m512d __DEFAULT_FN_ATTRS512
2506_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2507{
2508 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2509 (__v8df) __B,
2510 (__v8df) __C,
2511 (__mmask8) -1,
2513}
2514
2515static __inline__ __m512d __DEFAULT_FN_ATTRS512
2516_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2517{
2518 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2519 (__v8df) __B,
2520 (__v8df) __C,
2521 (__mmask8) __U,
2523}
2524
2525static __inline__ __m512d __DEFAULT_FN_ATTRS512
2526_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2527{
2528 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2529 (__v8df) __B,
2530 (__v8df) __C,
2531 (__mmask8) __U,
2533}
2534
2535static __inline__ __m512d __DEFAULT_FN_ATTRS512
2536_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2537{
2538 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2539 (__v8df) __B,
2540 (__v8df) __C,
2541 (__mmask8) __U,
2543}
2544
2545static __inline__ __m512d __DEFAULT_FN_ATTRS512
2546_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2547{
2548 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2549 (__v8df) __B,
2550 -(__v8df) __C,
2551 (__mmask8) -1,
2553}
2554
2555static __inline__ __m512d __DEFAULT_FN_ATTRS512
2556_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2557{
2558 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2559 (__v8df) __B,
2560 -(__v8df) __C,
2561 (__mmask8) __U,
2563}
2564
2565static __inline__ __m512d __DEFAULT_FN_ATTRS512
2566_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2567{
2568 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2569 (__v8df) __B,
2570 -(__v8df) __C,
2571 (__mmask8) __U,
2573}
2574
2575static __inline__ __m512d __DEFAULT_FN_ATTRS512
2576_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2577{
2578 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2579 -(__v8df) __B,
2580 (__v8df) __C,
2581 (__mmask8) -1,
2583}
2584
2585static __inline__ __m512d __DEFAULT_FN_ATTRS512
2586_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2587{
2588 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2589 (__v8df) __B,
2590 (__v8df) __C,
2591 (__mmask8) __U,
2593}
2594
2595static __inline__ __m512d __DEFAULT_FN_ATTRS512
2596_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2597{
2598 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2599 (__v8df) __B,
2600 (__v8df) __C,
2601 (__mmask8) __U,
2603}
2604
2605static __inline__ __m512d __DEFAULT_FN_ATTRS512
2606_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2607{
2608 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2609 -(__v8df) __B,
2610 -(__v8df) __C,
2611 (__mmask8) -1,
2613}
2614
2615static __inline__ __m512d __DEFAULT_FN_ATTRS512
2616_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2617{
2618 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2619 (__v8df) __B,
2620 -(__v8df) __C,
2621 (__mmask8) __U,
2623}
2624
2625#define _mm512_fmadd_round_ps(A, B, C, R) \
2626 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2627 (__v16sf)(__m512)(B), \
2628 (__v16sf)(__m512)(C), \
2629 (__mmask16)-1, (int)(R)))
2630
2631
2632#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2633 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2634 (__v16sf)(__m512)(B), \
2635 (__v16sf)(__m512)(C), \
2636 (__mmask16)(U), (int)(R)))
2637
2638
2639#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2640 ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2641 (__v16sf)(__m512)(B), \
2642 (__v16sf)(__m512)(C), \
2643 (__mmask16)(U), (int)(R)))
2644
2645
2646#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2647 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2648 (__v16sf)(__m512)(B), \
2649 (__v16sf)(__m512)(C), \
2650 (__mmask16)(U), (int)(R)))
2651
2652
2653#define _mm512_fmsub_round_ps(A, B, C, R) \
2654 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2655 (__v16sf)(__m512)(B), \
2656 -(__v16sf)(__m512)(C), \
2657 (__mmask16)-1, (int)(R)))
2658
2659
2660#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2661 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2662 (__v16sf)(__m512)(B), \
2663 -(__v16sf)(__m512)(C), \
2664 (__mmask16)(U), (int)(R)))
2665
2666
2667#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2668 ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2669 (__v16sf)(__m512)(B), \
2670 -(__v16sf)(__m512)(C), \
2671 (__mmask16)(U), (int)(R)))
2672
2673
2674#define _mm512_fnmadd_round_ps(A, B, C, R) \
2675 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2676 -(__v16sf)(__m512)(B), \
2677 (__v16sf)(__m512)(C), \
2678 (__mmask16)-1, (int)(R)))
2679
2680
2681#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2682 ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2683 (__v16sf)(__m512)(B), \
2684 (__v16sf)(__m512)(C), \
2685 (__mmask16)(U), (int)(R)))
2686
2687
2688#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2689 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2690 (__v16sf)(__m512)(B), \
2691 (__v16sf)(__m512)(C), \
2692 (__mmask16)(U), (int)(R)))
2693
2694
2695#define _mm512_fnmsub_round_ps(A, B, C, R) \
2696 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2697 -(__v16sf)(__m512)(B), \
2698 -(__v16sf)(__m512)(C), \
2699 (__mmask16)-1, (int)(R)))
2700
2701
2702#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2703 ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2704 (__v16sf)(__m512)(B), \
2705 -(__v16sf)(__m512)(C), \
2706 (__mmask16)(U), (int)(R)))
2707
2708
2709static __inline__ __m512 __DEFAULT_FN_ATTRS512
2710_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2711{
2712 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2713 (__v16sf) __B,
2714 (__v16sf) __C,
2715 (__mmask16) -1,
2717}
2718
2719static __inline__ __m512 __DEFAULT_FN_ATTRS512
2720_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2721{
2722 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2723 (__v16sf) __B,
2724 (__v16sf) __C,
2725 (__mmask16) __U,
2727}
2728
2729static __inline__ __m512 __DEFAULT_FN_ATTRS512
2730_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2731{
2732 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2733 (__v16sf) __B,
2734 (__v16sf) __C,
2735 (__mmask16) __U,
2737}
2738
2739static __inline__ __m512 __DEFAULT_FN_ATTRS512
2740_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2741{
2742 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2743 (__v16sf) __B,
2744 (__v16sf) __C,
2745 (__mmask16) __U,
2747}
2748
2749static __inline__ __m512 __DEFAULT_FN_ATTRS512
2750_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2751{
2752 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2753 (__v16sf) __B,
2754 -(__v16sf) __C,
2755 (__mmask16) -1,
2757}
2758
2759static __inline__ __m512 __DEFAULT_FN_ATTRS512
2760_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2761{
2762 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2763 (__v16sf) __B,
2764 -(__v16sf) __C,
2765 (__mmask16) __U,
2767}
2768
2769static __inline__ __m512 __DEFAULT_FN_ATTRS512
2770_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2771{
2772 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2773 (__v16sf) __B,
2774 -(__v16sf) __C,
2775 (__mmask16) __U,
2777}
2778
2779static __inline__ __m512 __DEFAULT_FN_ATTRS512
2780_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2781{
2782 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2783 -(__v16sf) __B,
2784 (__v16sf) __C,
2785 (__mmask16) -1,
2787}
2788
2789static __inline__ __m512 __DEFAULT_FN_ATTRS512
2790_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2791{
2792 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2793 (__v16sf) __B,
2794 (__v16sf) __C,
2795 (__mmask16) __U,
2797}
2798
2799static __inline__ __m512 __DEFAULT_FN_ATTRS512
2800_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2801{
2802 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2803 (__v16sf) __B,
2804 (__v16sf) __C,
2805 (__mmask16) __U,
2807}
2808
2809static __inline__ __m512 __DEFAULT_FN_ATTRS512
2810_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2811{
2812 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2813 -(__v16sf) __B,
2814 -(__v16sf) __C,
2815 (__mmask16) -1,
2817}
2818
2819static __inline__ __m512 __DEFAULT_FN_ATTRS512
2820_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2821{
2822 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2823 (__v16sf) __B,
2824 -(__v16sf) __C,
2825 (__mmask16) __U,
2827}
2828
2829#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2830 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2831 (__v8df)(__m512d)(B), \
2832 (__v8df)(__m512d)(C), \
2833 (__mmask8)-1, (int)(R)))
2834
2835
2836#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2837 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2838 (__v8df)(__m512d)(B), \
2839 (__v8df)(__m512d)(C), \
2840 (__mmask8)(U), (int)(R)))
2841
2842
2843#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2844 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2845 (__v8df)(__m512d)(B), \
2846 (__v8df)(__m512d)(C), \
2847 (__mmask8)(U), (int)(R)))
2848
2849
2850#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2851 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2852 (__v8df)(__m512d)(B), \
2853 (__v8df)(__m512d)(C), \
2854 (__mmask8)(U), (int)(R)))
2855
2856
2857#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2858 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2859 (__v8df)(__m512d)(B), \
2860 -(__v8df)(__m512d)(C), \
2861 (__mmask8)-1, (int)(R)))
2862
2863
2864#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2865 ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2866 (__v8df)(__m512d)(B), \
2867 -(__v8df)(__m512d)(C), \
2868 (__mmask8)(U), (int)(R)))
2869
2870
2871#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2872 ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2873 (__v8df)(__m512d)(B), \
2874 -(__v8df)(__m512d)(C), \
2875 (__mmask8)(U), (int)(R)))
2876
2877
2878static __inline__ __m512d __DEFAULT_FN_ATTRS512
2879_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2880{
2881 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2882 (__v8df) __B,
2883 (__v8df) __C,
2884 (__mmask8) -1,
2886}
2887
2888static __inline__ __m512d __DEFAULT_FN_ATTRS512
2889_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2890{
2891 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2892 (__v8df) __B,
2893 (__v8df) __C,
2894 (__mmask8) __U,
2896}
2897
2898static __inline__ __m512d __DEFAULT_FN_ATTRS512
2899_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2900{
2901 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2902 (__v8df) __B,
2903 (__v8df) __C,
2904 (__mmask8) __U,
2906}
2907
2908static __inline__ __m512d __DEFAULT_FN_ATTRS512
2909_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2910{
2911 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2912 (__v8df) __B,
2913 (__v8df) __C,
2914 (__mmask8) __U,
2916}
2917
2918static __inline__ __m512d __DEFAULT_FN_ATTRS512
2919_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2920{
2921 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2922 (__v8df) __B,
2923 -(__v8df) __C,
2924 (__mmask8) -1,
2926}
2927
2928static __inline__ __m512d __DEFAULT_FN_ATTRS512
2929_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2930{
2931 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2932 (__v8df) __B,
2933 -(__v8df) __C,
2934 (__mmask8) __U,
2936}
2937
2938static __inline__ __m512d __DEFAULT_FN_ATTRS512
2939_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2940{
2941 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2942 (__v8df) __B,
2943 -(__v8df) __C,
2944 (__mmask8) __U,
2946}
2947
2948#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2949 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2950 (__v16sf)(__m512)(B), \
2951 (__v16sf)(__m512)(C), \
2952 (__mmask16)-1, (int)(R)))
2953
2954
2955#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2956 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2957 (__v16sf)(__m512)(B), \
2958 (__v16sf)(__m512)(C), \
2959 (__mmask16)(U), (int)(R)))
2960
2961
2962#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2963 ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2964 (__v16sf)(__m512)(B), \
2965 (__v16sf)(__m512)(C), \
2966 (__mmask16)(U), (int)(R)))
2967
2968
2969#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2970 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2971 (__v16sf)(__m512)(B), \
2972 (__v16sf)(__m512)(C), \
2973 (__mmask16)(U), (int)(R)))
2974
2975
2976#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2977 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2978 (__v16sf)(__m512)(B), \
2979 -(__v16sf)(__m512)(C), \
2980 (__mmask16)-1, (int)(R)))
2981
2982
2983#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2984 ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2985 (__v16sf)(__m512)(B), \
2986 -(__v16sf)(__m512)(C), \
2987 (__mmask16)(U), (int)(R)))
2988
2989
2990#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2991 ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2992 (__v16sf)(__m512)(B), \
2993 -(__v16sf)(__m512)(C), \
2994 (__mmask16)(U), (int)(R)))
2995
2996
2997static __inline__ __m512 __DEFAULT_FN_ATTRS512
2998_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
2999{
3000 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3001 (__v16sf) __B,
3002 (__v16sf) __C,
3003 (__mmask16) -1,
3005}
3006
3007static __inline__ __m512 __DEFAULT_FN_ATTRS512
3008_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3009{
3010 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3011 (__v16sf) __B,
3012 (__v16sf) __C,
3013 (__mmask16) __U,
3015}
3016
3017static __inline__ __m512 __DEFAULT_FN_ATTRS512
3018_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3019{
3020 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3021 (__v16sf) __B,
3022 (__v16sf) __C,
3023 (__mmask16) __U,
3025}
3026
3027static __inline__ __m512 __DEFAULT_FN_ATTRS512
3028_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3029{
3030 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3031 (__v16sf) __B,
3032 (__v16sf) __C,
3033 (__mmask16) __U,
3035}
3036
3037static __inline__ __m512 __DEFAULT_FN_ATTRS512
3038_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3039{
3040 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3041 (__v16sf) __B,
3042 -(__v16sf) __C,
3043 (__mmask16) -1,
3045}
3046
3047static __inline__ __m512 __DEFAULT_FN_ATTRS512
3048_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3049{
3050 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3051 (__v16sf) __B,
3052 -(__v16sf) __C,
3053 (__mmask16) __U,
3055}
3056
3057static __inline__ __m512 __DEFAULT_FN_ATTRS512
3058_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3059{
3060 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3061 (__v16sf) __B,
3062 -(__v16sf) __C,
3063 (__mmask16) __U,
3065}
3066
3067#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3068 ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3069 (__v8df)(__m512d)(B), \
3070 (__v8df)(__m512d)(C), \
3071 (__mmask8)(U), (int)(R)))
3072
3073
3074static __inline__ __m512d __DEFAULT_FN_ATTRS512
3075_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3076{
3077 return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3078 (__v8df) __B,
3079 (__v8df) __C,
3080 (__mmask8) __U,
3082}
3083
3084#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3085 ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3086 (__v16sf)(__m512)(B), \
3087 (__v16sf)(__m512)(C), \
3088 (__mmask16)(U), (int)(R)))
3089
3090static __inline__ __m512 __DEFAULT_FN_ATTRS512
3091_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3092{
3093 return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3094 (__v16sf) __B,
3095 (__v16sf) __C,
3096 (__mmask16) __U,
3098}
3099
3100#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3101 ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3102 (__v8df)(__m512d)(B), \
3103 (__v8df)(__m512d)(C), \
3104 (__mmask8)(U), (int)(R)))
3105
3106
3107static __inline__ __m512d __DEFAULT_FN_ATTRS512
3108_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3109{
3110 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3111 (__v8df) __B,
3112 (__v8df) __C,
3113 (__mmask8) __U,
3115}
3116
3117#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3118 ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3119 (__v16sf)(__m512)(B), \
3120 (__v16sf)(__m512)(C), \
3121 (__mmask16)(U), (int)(R)))
3122
3123
3124static __inline__ __m512 __DEFAULT_FN_ATTRS512
3125_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3126{
3127 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3128 (__v16sf) __B,
3129 (__v16sf) __C,
3130 (__mmask16) __U,
3132}
3133
3134#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3135 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3136 -(__v8df)(__m512d)(B), \
3137 (__v8df)(__m512d)(C), \
3138 (__mmask8)(U), (int)(R)))
3139
3140
3141static __inline__ __m512d __DEFAULT_FN_ATTRS512
3142_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3143{
3144 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3145 -(__v8df) __B,
3146 (__v8df) __C,
3147 (__mmask8) __U,
3149}
3150
3151#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3152 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3153 -(__v16sf)(__m512)(B), \
3154 (__v16sf)(__m512)(C), \
3155 (__mmask16)(U), (int)(R)))
3156
3157
3158static __inline__ __m512 __DEFAULT_FN_ATTRS512
3159_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3160{
3161 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3162 -(__v16sf) __B,
3163 (__v16sf) __C,
3164 (__mmask16) __U,
3166}
3167
3168#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3169 ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3170 -(__v8df)(__m512d)(B), \
3171 -(__v8df)(__m512d)(C), \
3172 (__mmask8)(U), (int)(R)))
3173
3174
3175#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3176 ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3177 (__v8df)(__m512d)(B), \
3178 (__v8df)(__m512d)(C), \
3179 (__mmask8)(U), (int)(R)))
3180
3181
3182static __inline__ __m512d __DEFAULT_FN_ATTRS512
3183_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3184{
3185 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3186 -(__v8df) __B,
3187 -(__v8df) __C,
3188 (__mmask8) __U,
3190}
3191
3192static __inline__ __m512d __DEFAULT_FN_ATTRS512
3193_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3194{
3195 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3196 (__v8df) __B,
3197 (__v8df) __C,
3198 (__mmask8) __U,
3200}
3201
3202#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3203 ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3204 -(__v16sf)(__m512)(B), \
3205 -(__v16sf)(__m512)(C), \
3206 (__mmask16)(U), (int)(R)))
3207
3208
3209#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3210 ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3211 (__v16sf)(__m512)(B), \
3212 (__v16sf)(__m512)(C), \
3213 (__mmask16)(U), (int)(R)))
3214
3215
3216static __inline__ __m512 __DEFAULT_FN_ATTRS512
3217_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3218{
3219 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3220 -(__v16sf) __B,
3221 -(__v16sf) __C,
3222 (__mmask16) __U,
3224}
3225
3226static __inline__ __m512 __DEFAULT_FN_ATTRS512
3227_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3228{
3229 return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3230 (__v16sf) __B,
3231 (__v16sf) __C,
3232 (__mmask16) __U,
3234}
3235
3236
3237
3238/* Vector permutations */
3239
3240static __inline __m512i __DEFAULT_FN_ATTRS512
3241_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3242{
3243 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3244 (__v16si) __B);
3245}
3246
3247static __inline__ __m512i __DEFAULT_FN_ATTRS512
3248_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3249 __m512i __B)
3250{
3251 return (__m512i)__builtin_ia32_selectd_512(__U,
3252 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3253 (__v16si)__A);
3254}
3255
3256static __inline__ __m512i __DEFAULT_FN_ATTRS512
3257_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3258 __m512i __B)
3259{
3260 return (__m512i)__builtin_ia32_selectd_512(__U,
3261 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3262 (__v16si)__I);
3263}
3264
3265static __inline__ __m512i __DEFAULT_FN_ATTRS512
3266_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3267 __m512i __B)
3268{
3269 return (__m512i)__builtin_ia32_selectd_512(__U,
3270 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3271 (__v16si)_mm512_setzero_si512());
3272}
3273
3274static __inline __m512i __DEFAULT_FN_ATTRS512
3275_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3276{
3277 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3278 (__v8di) __B);
3279}
3280
3281static __inline__ __m512i __DEFAULT_FN_ATTRS512
3282_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3283 __m512i __B)
3284{
3285 return (__m512i)__builtin_ia32_selectq_512(__U,
3286 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3287 (__v8di)__A);
3288}
3289
3290static __inline__ __m512i __DEFAULT_FN_ATTRS512
3291_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3292 __m512i __B)
3293{
3294 return (__m512i)__builtin_ia32_selectq_512(__U,
3295 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3296 (__v8di)__I);
3297}
3298
3299static __inline__ __m512i __DEFAULT_FN_ATTRS512
3300_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3301 __m512i __B)
3302{
3303 return (__m512i)__builtin_ia32_selectq_512(__U,
3304 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3305 (__v8di)_mm512_setzero_si512());
3306}
3307
3308#define _mm512_alignr_epi64(A, B, I) \
3309 ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3310 (__v8di)(__m512i)(B), (int)(I)))
3311
3312#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3313 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3314 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3315 (__v8di)(__m512i)(W)))
3316
3317#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3318 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3319 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3320 (__v8di)_mm512_setzero_si512()))
3321
3322#define _mm512_alignr_epi32(A, B, I) \
3323 ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3324 (__v16si)(__m512i)(B), (int)(I)))
3325
3326#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3327 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3328 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3329 (__v16si)(__m512i)(W)))
3330
3331#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3332 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3333 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3334 (__v16si)_mm512_setzero_si512()))
3335/* Vector Extract */
3336
3337#define _mm512_extractf64x4_pd(A, I) \
3338 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3339 (__v4df)_mm256_undefined_pd(), \
3340 (__mmask8)-1))
3341
3342#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3343 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3344 (__v4df)(__m256d)(W), \
3345 (__mmask8)(U)))
3346
3347#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3348 ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3349 (__v4df)_mm256_setzero_pd(), \
3350 (__mmask8)(U)))
3351
3352#define _mm512_extractf32x4_ps(A, I) \
3353 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3354 (__v4sf)_mm_undefined_ps(), \
3355 (__mmask8)-1))
3356
3357#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3358 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3359 (__v4sf)(__m128)(W), \
3360 (__mmask8)(U)))
3361
3362#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3363 ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3364 (__v4sf)_mm_setzero_ps(), \
3365 (__mmask8)(U)))
3366
3367/* Vector Blend */
3368
3369static __inline __m512d __DEFAULT_FN_ATTRS512
3370_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3371{
3372 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3373 (__v8df) __W,
3374 (__v8df) __A);
3375}
3376
3377static __inline __m512 __DEFAULT_FN_ATTRS512
3378_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3379{
3380 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3381 (__v16sf) __W,
3382 (__v16sf) __A);
3383}
3384
3385static __inline __m512i __DEFAULT_FN_ATTRS512
3386_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3387{
3388 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3389 (__v8di) __W,
3390 (__v8di) __A);
3391}
3392
3393static __inline __m512i __DEFAULT_FN_ATTRS512
3394_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3395{
3396 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3397 (__v16si) __W,
3398 (__v16si) __A);
3399}
3400
3401/* Compare */
3402
3403#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3404 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3405 (__v16sf)(__m512)(B), (int)(P), \
3406 (__mmask16)-1, (int)(R)))
3407
3408#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3409 ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3410 (__v16sf)(__m512)(B), (int)(P), \
3411 (__mmask16)(U), (int)(R)))
3412
3413#define _mm512_cmp_ps_mask(A, B, P) \
3414 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3415#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3416 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3417
3418#define _mm512_cmpeq_ps_mask(A, B) \
3419 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3420#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3421 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3422
3423#define _mm512_cmplt_ps_mask(A, B) \
3424 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3425#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3426 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3427
3428#define _mm512_cmple_ps_mask(A, B) \
3429 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3430#define _mm512_mask_cmple_ps_mask(k, A, B) \
3431 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3432
3433#define _mm512_cmpunord_ps_mask(A, B) \
3434 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3435#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3436 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3437
3438#define _mm512_cmpneq_ps_mask(A, B) \
3439 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3440#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3441 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3442
3443#define _mm512_cmpnlt_ps_mask(A, B) \
3444 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3445#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3446 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3447
3448#define _mm512_cmpnle_ps_mask(A, B) \
3449 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3450#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3451 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3452
3453#define _mm512_cmpord_ps_mask(A, B) \
3454 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3455#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3456 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3457
3458#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3459 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3460 (__v8df)(__m512d)(B), (int)(P), \
3461 (__mmask8)-1, (int)(R)))
3462
3463#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3464 ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3465 (__v8df)(__m512d)(B), (int)(P), \
3466 (__mmask8)(U), (int)(R)))
3467
3468#define _mm512_cmp_pd_mask(A, B, P) \
3469 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3470#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3471 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3472
3473#define _mm512_cmpeq_pd_mask(A, B) \
3474 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3475#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3476 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3477
3478#define _mm512_cmplt_pd_mask(A, B) \
3479 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3480#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3481 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3482
3483#define _mm512_cmple_pd_mask(A, B) \
3484 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3485#define _mm512_mask_cmple_pd_mask(k, A, B) \
3486 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3487
3488#define _mm512_cmpunord_pd_mask(A, B) \
3489 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3490#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3491 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3492
3493#define _mm512_cmpneq_pd_mask(A, B) \
3494 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3495#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3496 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3497
3498#define _mm512_cmpnlt_pd_mask(A, B) \
3499 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3500#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3501 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3502
3503#define _mm512_cmpnle_pd_mask(A, B) \
3504 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3505#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3506 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3507
3508#define _mm512_cmpord_pd_mask(A, B) \
3509 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3510#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3511 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3512
3513/* Conversion */
3514
3515#define _mm512_cvtt_roundps_epu32(A, R) \
3516 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3517 (__v16si)_mm512_undefined_epi32(), \
3518 (__mmask16)-1, (int)(R)))
3519
3520#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3521 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3522 (__v16si)(__m512i)(W), \
3523 (__mmask16)(U), (int)(R)))
3524
3525#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3526 ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3527 (__v16si)_mm512_setzero_si512(), \
3528 (__mmask16)(U), (int)(R)))
3529
3530
3531static __inline __m512i __DEFAULT_FN_ATTRS512
3533{
3534 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3535 (__v16si)
3537 (__mmask16) -1,
3539}
3540
3541static __inline__ __m512i __DEFAULT_FN_ATTRS512
3542_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3543{
3544 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3545 (__v16si) __W,
3546 (__mmask16) __U,
3548}
3549
3550static __inline__ __m512i __DEFAULT_FN_ATTRS512
3552{
3553 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3554 (__v16si) _mm512_setzero_si512 (),
3555 (__mmask16) __U,
3557}
3558
3559#define _mm512_cvt_roundepi32_ps(A, R) \
3560 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3561 (__v16sf)_mm512_setzero_ps(), \
3562 (__mmask16)-1, (int)(R)))
3563
3564#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3565 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3566 (__v16sf)(__m512)(W), \
3567 (__mmask16)(U), (int)(R)))
3568
3569#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3570 ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3571 (__v16sf)_mm512_setzero_ps(), \
3572 (__mmask16)(U), (int)(R)))
3573
3574#define _mm512_cvt_roundepu32_ps(A, R) \
3575 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3576 (__v16sf)_mm512_setzero_ps(), \
3577 (__mmask16)-1, (int)(R)))
3578
3579#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3580 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3581 (__v16sf)(__m512)(W), \
3582 (__mmask16)(U), (int)(R)))
3583
3584#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3585 ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3586 (__v16sf)_mm512_setzero_ps(), \
3587 (__mmask16)(U), (int)(R)))
3588
3589static __inline__ __m512 __DEFAULT_FN_ATTRS512
3591{
3592 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3593}
3594
3595static __inline__ __m512 __DEFAULT_FN_ATTRS512
3596_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3597{
3598 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3599 (__v16sf)_mm512_cvtepu32_ps(__A),
3600 (__v16sf)__W);
3601}
3602
3603static __inline__ __m512 __DEFAULT_FN_ATTRS512
3605{
3606 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3607 (__v16sf)_mm512_cvtepu32_ps(__A),
3608 (__v16sf)_mm512_setzero_ps());
3609}
3610
3611static __inline __m512d __DEFAULT_FN_ATTRS512
3613{
3614 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3615}
3616
3617static __inline__ __m512d __DEFAULT_FN_ATTRS512
3618_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3619{
3620 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3621 (__v8df)_mm512_cvtepi32_pd(__A),
3622 (__v8df)__W);
3623}
3624
3625static __inline__ __m512d __DEFAULT_FN_ATTRS512
3627{
3628 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3629 (__v8df)_mm512_cvtepi32_pd(__A),
3630 (__v8df)_mm512_setzero_pd());
3631}
3632
3633static __inline__ __m512d __DEFAULT_FN_ATTRS512
3635{
3636 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3637}
3638
3639static __inline__ __m512d __DEFAULT_FN_ATTRS512
3640_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3641{
3642 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3643}
3644
3645static __inline__ __m512 __DEFAULT_FN_ATTRS512
3647{
3648 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3649}
3650
3651static __inline__ __m512 __DEFAULT_FN_ATTRS512
3652_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3653{
3654 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3655 (__v16sf)_mm512_cvtepi32_ps(__A),
3656 (__v16sf)__W);
3657}
3658
3659static __inline__ __m512 __DEFAULT_FN_ATTRS512
3661{
3662 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3663 (__v16sf)_mm512_cvtepi32_ps(__A),
3664 (__v16sf)_mm512_setzero_ps());
3665}
3666
3667static __inline __m512d __DEFAULT_FN_ATTRS512
3669{
3670 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3671}
3672
3673static __inline__ __m512d __DEFAULT_FN_ATTRS512
3674_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3675{
3676 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3677 (__v8df)_mm512_cvtepu32_pd(__A),
3678 (__v8df)__W);
3679}
3680
3681static __inline__ __m512d __DEFAULT_FN_ATTRS512
3683{
3684 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3685 (__v8df)_mm512_cvtepu32_pd(__A),
3686 (__v8df)_mm512_setzero_pd());
3687}
3688
3689static __inline__ __m512d __DEFAULT_FN_ATTRS512
3691{
3692 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3693}
3694
3695static __inline__ __m512d __DEFAULT_FN_ATTRS512
3696_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3697{
3698 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3699}
3700
3701#define _mm512_cvt_roundpd_ps(A, R) \
3702 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3703 (__v8sf)_mm256_setzero_ps(), \
3704 (__mmask8)-1, (int)(R)))
3705
3706#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3707 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3708 (__v8sf)(__m256)(W), (__mmask8)(U), \
3709 (int)(R)))
3710
3711#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3712 ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3713 (__v8sf)_mm256_setzero_ps(), \
3714 (__mmask8)(U), (int)(R)))
3715
3716static __inline__ __m256 __DEFAULT_FN_ATTRS512
3717_mm512_cvtpd_ps (__m512d __A)
3718{
3719 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3720 (__v8sf) _mm256_undefined_ps (),
3721 (__mmask8) -1,
3723}
3724
3725static __inline__ __m256 __DEFAULT_FN_ATTRS512
3726_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3727{
3728 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3729 (__v8sf) __W,
3730 (__mmask8) __U,
3732}
3733
3734static __inline__ __m256 __DEFAULT_FN_ATTRS512
3736{
3737 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3738 (__v8sf) _mm256_setzero_ps (),
3739 (__mmask8) __U,
3741}
3742
3743static __inline__ __m512 __DEFAULT_FN_ATTRS512
3745{
3746 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3747 (__v8sf) _mm256_setzero_ps (),
3748 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3749}
3750
3751static __inline__ __m512 __DEFAULT_FN_ATTRS512
3752_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3753{
3754 return (__m512) __builtin_shufflevector (
3756 __U, __A),
3757 (__v8sf) _mm256_setzero_ps (),
3758 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3759}
3760
3761#define _mm512_cvt_roundps_ph(A, I) \
3762 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3763 (__v16hi)_mm256_undefined_si256(), \
3764 (__mmask16)-1))
3765
3766#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3767 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3768 (__v16hi)(__m256i)(U), \
3769 (__mmask16)(W)))
3770
3771#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3772 ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3773 (__v16hi)_mm256_setzero_si256(), \
3774 (__mmask16)(W)))
3775
3776#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3777#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3778#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3779
3780#define _mm512_cvt_roundph_ps(A, R) \
3781 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3782 (__v16sf)_mm512_undefined_ps(), \
3783 (__mmask16)-1, (int)(R)))
3784
3785#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3786 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3787 (__v16sf)(__m512)(W), \
3788 (__mmask16)(U), (int)(R)))
3789
3790#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3791 ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3792 (__v16sf)_mm512_setzero_ps(), \
3793 (__mmask16)(U), (int)(R)))
3794
3795
3796static __inline __m512 __DEFAULT_FN_ATTRS512
3798{
3799 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3800 (__v16sf)
3802 (__mmask16) -1,
3804}
3805
3806static __inline__ __m512 __DEFAULT_FN_ATTRS512
3807_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3808{
3809 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3810 (__v16sf) __W,
3811 (__mmask16) __U,
3813}
3814
3815static __inline__ __m512 __DEFAULT_FN_ATTRS512
3817{
3818 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3819 (__v16sf) _mm512_setzero_ps (),
3820 (__mmask16) __U,
3822}
3823
3824#define _mm512_cvtt_roundpd_epi32(A, R) \
3825 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3826 (__v8si)_mm256_setzero_si256(), \
3827 (__mmask8)-1, (int)(R)))
3828
3829#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3830 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3831 (__v8si)(__m256i)(W), \
3832 (__mmask8)(U), (int)(R)))
3833
3834#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3835 ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3836 (__v8si)_mm256_setzero_si256(), \
3837 (__mmask8)(U), (int)(R)))
3838
3839static __inline __m256i __DEFAULT_FN_ATTRS512
3841{
3842 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3843 (__v8si)_mm256_setzero_si256(),
3844 (__mmask8) -1,
3846}
3847
3848static __inline__ __m256i __DEFAULT_FN_ATTRS512
3849_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3850{
3851 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3852 (__v8si) __W,
3853 (__mmask8) __U,
3855}
3856
3857static __inline__ __m256i __DEFAULT_FN_ATTRS512
3859{
3860 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3861 (__v8si) _mm256_setzero_si256 (),
3862 (__mmask8) __U,
3864}
3865
3866#define _mm512_cvtt_roundps_epi32(A, R) \
3867 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3868 (__v16si)_mm512_setzero_si512(), \
3869 (__mmask16)-1, (int)(R)))
3870
3871#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3872 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3873 (__v16si)(__m512i)(W), \
3874 (__mmask16)(U), (int)(R)))
3875
3876#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3877 ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3878 (__v16si)_mm512_setzero_si512(), \
3879 (__mmask16)(U), (int)(R)))
3880
3881static __inline __m512i __DEFAULT_FN_ATTRS512
3883{
3884 return (__m512i)
3885 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3886 (__v16si) _mm512_setzero_si512 (),
3888}
3889
3890static __inline__ __m512i __DEFAULT_FN_ATTRS512
3891_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3892{
3893 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3894 (__v16si) __W,
3895 (__mmask16) __U,
3897}
3898
3899static __inline__ __m512i __DEFAULT_FN_ATTRS512
3901{
3902 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3903 (__v16si) _mm512_setzero_si512 (),
3904 (__mmask16) __U,
3906}
3907
3908#define _mm512_cvt_roundps_epi32(A, R) \
3909 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3910 (__v16si)_mm512_setzero_si512(), \
3911 (__mmask16)-1, (int)(R)))
3912
3913#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3914 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3915 (__v16si)(__m512i)(W), \
3916 (__mmask16)(U), (int)(R)))
3917
3918#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3919 ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3920 (__v16si)_mm512_setzero_si512(), \
3921 (__mmask16)(U), (int)(R)))
3922
3923static __inline__ __m512i __DEFAULT_FN_ATTRS512
3925{
3926 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3927 (__v16si) _mm512_undefined_epi32 (),
3928 (__mmask16) -1,
3930}
3931
3932static __inline__ __m512i __DEFAULT_FN_ATTRS512
3933_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3934{
3935 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3936 (__v16si) __W,
3937 (__mmask16) __U,
3939}
3940
3941static __inline__ __m512i __DEFAULT_FN_ATTRS512
3943{
3944 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3945 (__v16si)
3947 (__mmask16) __U,
3949}
3950
3951#define _mm512_cvt_roundpd_epi32(A, R) \
3952 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3953 (__v8si)_mm256_setzero_si256(), \
3954 (__mmask8)-1, (int)(R)))
3955
3956#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3957 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3958 (__v8si)(__m256i)(W), \
3959 (__mmask8)(U), (int)(R)))
3960
3961#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3962 ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3963 (__v8si)_mm256_setzero_si256(), \
3964 (__mmask8)(U), (int)(R)))
3965
3966static __inline__ __m256i __DEFAULT_FN_ATTRS512
3968{
3969 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3970 (__v8si)
3972 (__mmask8) -1,
3974}
3975
3976static __inline__ __m256i __DEFAULT_FN_ATTRS512
3977_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3978{
3979 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3980 (__v8si) __W,
3981 (__mmask8) __U,
3983}
3984
3985static __inline__ __m256i __DEFAULT_FN_ATTRS512
3987{
3988 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3989 (__v8si)
3991 (__mmask8) __U,
3993}
3994
3995#define _mm512_cvt_roundps_epu32(A, R) \
3996 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
3997 (__v16si)_mm512_setzero_si512(), \
3998 (__mmask16)-1, (int)(R)))
3999
4000#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4001 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4002 (__v16si)(__m512i)(W), \
4003 (__mmask16)(U), (int)(R)))
4004
4005#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4006 ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4007 (__v16si)_mm512_setzero_si512(), \
4008 (__mmask16)(U), (int)(R)))
4009
4010static __inline__ __m512i __DEFAULT_FN_ATTRS512
4012{
4013 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4014 (__v16si)\
4016 (__mmask16) -1,\
4018}
4019
4020static __inline__ __m512i __DEFAULT_FN_ATTRS512
4021_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4022{
4023 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4024 (__v16si) __W,
4025 (__mmask16) __U,
4027}
4028
4029static __inline__ __m512i __DEFAULT_FN_ATTRS512
4031{
4032 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4033 (__v16si)
4035 (__mmask16) __U ,
4037}
4038
4039#define _mm512_cvt_roundpd_epu32(A, R) \
4040 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4041 (__v8si)_mm256_setzero_si256(), \
4042 (__mmask8)-1, (int)(R)))
4043
4044#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4045 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4046 (__v8si)(__m256i)(W), \
4047 (__mmask8)(U), (int)(R)))
4048
4049#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4050 ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4051 (__v8si)_mm256_setzero_si256(), \
4052 (__mmask8)(U), (int)(R)))
4053
4054static __inline__ __m256i __DEFAULT_FN_ATTRS512
4056{
4057 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4058 (__v8si)
4060 (__mmask8) -1,
4062}
4063
4064static __inline__ __m256i __DEFAULT_FN_ATTRS512
4065_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4066{
4067 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4068 (__v8si) __W,
4069 (__mmask8) __U,
4071}
4072
4073static __inline__ __m256i __DEFAULT_FN_ATTRS512
4075{
4076 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4077 (__v8si)
4079 (__mmask8) __U,
4081}
4082
4083static __inline__ double __DEFAULT_FN_ATTRS512
4085{
4086 return __a[0];
4087}
4088
4089static __inline__ float __DEFAULT_FN_ATTRS512
4091{
4092 return __a[0];
4093}
4094
4095/* Unpack and Interleave */
4096
4097static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
4098_mm512_unpackhi_pd(__m512d __a, __m512d __b) {
4099 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4100 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4101}
4102
4103static __inline__ __m512d __DEFAULT_FN_ATTRS512
4104_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4105{
4106 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4107 (__v8df)_mm512_unpackhi_pd(__A, __B),
4108 (__v8df)__W);
4109}
4110
4111static __inline__ __m512d __DEFAULT_FN_ATTRS512
4112_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4113{
4114 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4115 (__v8df)_mm512_unpackhi_pd(__A, __B),
4116 (__v8df)_mm512_setzero_pd());
4117}
4118
4119static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
4120_mm512_unpacklo_pd(__m512d __a, __m512d __b) {
4121 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4122 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4123}
4124
4125static __inline__ __m512d __DEFAULT_FN_ATTRS512
4126_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4127{
4128 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4129 (__v8df)_mm512_unpacklo_pd(__A, __B),
4130 (__v8df)__W);
4131}
4132
4133static __inline__ __m512d __DEFAULT_FN_ATTRS512
4134_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4135{
4136 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4137 (__v8df)_mm512_unpacklo_pd(__A, __B),
4138 (__v8df)_mm512_setzero_pd());
4139}
4140
4141static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
4142_mm512_unpackhi_ps(__m512 __a, __m512 __b) {
4143 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4144 2, 18, 3, 19,
4145 2+4, 18+4, 3+4, 19+4,
4146 2+8, 18+8, 3+8, 19+8,
4147 2+12, 18+12, 3+12, 19+12);
4148}
4149
4150static __inline__ __m512 __DEFAULT_FN_ATTRS512
4151_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4152{
4153 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4154 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4155 (__v16sf)__W);
4156}
4157
4158static __inline__ __m512 __DEFAULT_FN_ATTRS512
4159_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4160{
4161 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4162 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4163 (__v16sf)_mm512_setzero_ps());
4164}
4165
4166static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
4167_mm512_unpacklo_ps(__m512 __a, __m512 __b) {
4168 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4169 0, 16, 1, 17,
4170 0+4, 16+4, 1+4, 17+4,
4171 0+8, 16+8, 1+8, 17+8,
4172 0+12, 16+12, 1+12, 17+12);
4173}
4174
4175static __inline__ __m512 __DEFAULT_FN_ATTRS512
4176_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4177{
4178 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4179 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4180 (__v16sf)__W);
4181}
4182
4183static __inline__ __m512 __DEFAULT_FN_ATTRS512
4184_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4185{
4186 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4187 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4188 (__v16sf)_mm512_setzero_ps());
4189}
4190
4191static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4192_mm512_unpackhi_epi32(__m512i __A, __m512i __B) {
4193 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4194 2, 18, 3, 19,
4195 2+4, 18+4, 3+4, 19+4,
4196 2+8, 18+8, 3+8, 19+8,
4197 2+12, 18+12, 3+12, 19+12);
4198}
4199
4200static __inline__ __m512i __DEFAULT_FN_ATTRS512
4201_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4202{
4203 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4204 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4205 (__v16si)__W);
4206}
4207
4208static __inline__ __m512i __DEFAULT_FN_ATTRS512
4209_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4210{
4211 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4212 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4213 (__v16si)_mm512_setzero_si512());
4214}
4215
4216static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4217_mm512_unpacklo_epi32(__m512i __A, __m512i __B) {
4218 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4219 0, 16, 1, 17,
4220 0+4, 16+4, 1+4, 17+4,
4221 0+8, 16+8, 1+8, 17+8,
4222 0+12, 16+12, 1+12, 17+12);
4223}
4224
4225static __inline__ __m512i __DEFAULT_FN_ATTRS512
4226_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4227{
4228 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4229 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4230 (__v16si)__W);
4231}
4232
4233static __inline__ __m512i __DEFAULT_FN_ATTRS512
4234_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4235{
4236 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4237 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4238 (__v16si)_mm512_setzero_si512());
4239}
4240
4241static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4242_mm512_unpackhi_epi64(__m512i __A, __m512i __B) {
4243 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4244 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4245}
4246
4247static __inline__ __m512i __DEFAULT_FN_ATTRS512
4248_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4249{
4250 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4251 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4252 (__v8di)__W);
4253}
4254
4255static __inline__ __m512i __DEFAULT_FN_ATTRS512
4256_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4257{
4258 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4259 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4260 (__v8di)_mm512_setzero_si512());
4261}
4262
4263static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4264_mm512_unpacklo_epi64(__m512i __A, __m512i __B) {
4265 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4266 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4267}
4268
4269static __inline__ __m512i __DEFAULT_FN_ATTRS512
4270_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4271{
4272 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4273 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4274 (__v8di)__W);
4275}
4276
4277static __inline__ __m512i __DEFAULT_FN_ATTRS512
4278_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4279{
4280 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4281 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4282 (__v8di)_mm512_setzero_si512());
4283}
4284
4285
4286/* SIMD load ops */
4287
4288static __inline __m512i __DEFAULT_FN_ATTRS512
4290{
4291 struct __loadu_si512 {
4292 __m512i_u __v;
4293 } __attribute__((__packed__, __may_alias__));
4294 return ((const struct __loadu_si512*)__P)->__v;
4295}
4296
4297static __inline __m512i __DEFAULT_FN_ATTRS512
4299{
4300 struct __loadu_epi32 {
4301 __m512i_u __v;
4302 } __attribute__((__packed__, __may_alias__));
4303 return ((const struct __loadu_epi32*)__P)->__v;
4304}
4305
4306static __inline __m512i __DEFAULT_FN_ATTRS512
4307_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4308{
4309 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4310 (__v16si) __W,
4311 (__mmask16) __U);
4312}
4313
4314
4315static __inline __m512i __DEFAULT_FN_ATTRS512
4317{
4318 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4319 (__v16si)
4321 (__mmask16) __U);
4322}
4323
4324static __inline __m512i __DEFAULT_FN_ATTRS512
4326{
4327 struct __loadu_epi64 {
4328 __m512i_u __v;
4329 } __attribute__((__packed__, __may_alias__));
4330 return ((const struct __loadu_epi64*)__P)->__v;
4331}
4332
4333static __inline __m512i __DEFAULT_FN_ATTRS512
4334_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4335{
4336 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4337 (__v8di) __W,
4338 (__mmask8) __U);
4339}
4340
4341static __inline __m512i __DEFAULT_FN_ATTRS512
4343{
4344 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4345 (__v8di)
4347 (__mmask8) __U);
4348}
4349
4350static __inline __m512 __DEFAULT_FN_ATTRS512
4351_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4352{
4353 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4354 (__v16sf) __W,
4355 (__mmask16) __U);
4356}
4357
4358static __inline __m512 __DEFAULT_FN_ATTRS512
4360{
4361 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4362 (__v16sf)
4364 (__mmask16) __U);
4365}
4366
4367static __inline __m512d __DEFAULT_FN_ATTRS512
4368_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4369{
4370 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4371 (__v8df) __W,
4372 (__mmask8) __U);
4373}
4374
4375static __inline __m512d __DEFAULT_FN_ATTRS512
4377{
4378 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4379 (__v8df)
4381 (__mmask8) __U);
4382}
4383
4384static __inline __m512d __DEFAULT_FN_ATTRS512
4386{
4387 struct __loadu_pd {
4388 __m512d_u __v;
4389 } __attribute__((__packed__, __may_alias__));
4390 return ((const struct __loadu_pd*)__p)->__v;
4391}
4392
4393static __inline __m512 __DEFAULT_FN_ATTRS512
4395{
4396 struct __loadu_ps {
4397 __m512_u __v;
4398 } __attribute__((__packed__, __may_alias__));
4399 return ((const struct __loadu_ps*)__p)->__v;
4400}
4401
4402static __inline __m512 __DEFAULT_FN_ATTRS512
4404{
4405 return *(const __m512*)__p;
4406}
4407
4408static __inline __m512 __DEFAULT_FN_ATTRS512
4409_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4410{
4411 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4412 (__v16sf) __W,
4413 (__mmask16) __U);
4414}
4415
4416static __inline __m512 __DEFAULT_FN_ATTRS512
4418{
4419 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4420 (__v16sf)
4422 (__mmask16) __U);
4423}
4424
4425static __inline __m512d __DEFAULT_FN_ATTRS512
4427{
4428 return *(const __m512d*)__p;
4429}
4430
4431static __inline __m512d __DEFAULT_FN_ATTRS512
4432_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4433{
4434 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4435 (__v8df) __W,
4436 (__mmask8) __U);
4437}
4438
4439static __inline __m512d __DEFAULT_FN_ATTRS512
4441{
4442 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4443 (__v8df)
4445 (__mmask8) __U);
4446}
4447
4448static __inline __m512i __DEFAULT_FN_ATTRS512
4450{
4451 return *(const __m512i *) __P;
4452}
4453
4454static __inline __m512i __DEFAULT_FN_ATTRS512
4456{
4457 return *(const __m512i *) __P;
4458}
4459
4460static __inline __m512i __DEFAULT_FN_ATTRS512
4462{
4463 return *(const __m512i *) __P;
4464}
4465
4466/* SIMD store ops */
4467
4468static __inline void __DEFAULT_FN_ATTRS512
4469_mm512_storeu_epi64 (void *__P, __m512i __A)
4470{
4471 struct __storeu_epi64 {
4472 __m512i_u __v;
4473 } __attribute__((__packed__, __may_alias__));
4474 ((struct __storeu_epi64*)__P)->__v = __A;
4475}
4476
4477static __inline void __DEFAULT_FN_ATTRS512
4478_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4479{
4480 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4481 (__mmask8) __U);
4482}
4483
4484static __inline void __DEFAULT_FN_ATTRS512
4485_mm512_storeu_si512 (void *__P, __m512i __A)
4486{
4487 struct __storeu_si512 {
4488 __m512i_u __v;
4489 } __attribute__((__packed__, __may_alias__));
4490 ((struct __storeu_si512*)__P)->__v = __A;
4491}
4492
4493static __inline void __DEFAULT_FN_ATTRS512
4494_mm512_storeu_epi32 (void *__P, __m512i __A)
4495{
4496 struct __storeu_epi32 {
4497 __m512i_u __v;
4498 } __attribute__((__packed__, __may_alias__));
4499 ((struct __storeu_epi32*)__P)->__v = __A;
4500}
4501
4502static __inline void __DEFAULT_FN_ATTRS512
4504{
4505 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4506 (__mmask16) __U);
4507}
4508
4509static __inline void __DEFAULT_FN_ATTRS512
4510_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4511{
4512 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4513}
4514
4515static __inline void __DEFAULT_FN_ATTRS512
4516_mm512_storeu_pd(void *__P, __m512d __A)
4517{
4518 struct __storeu_pd {
4519 __m512d_u __v;
4520 } __attribute__((__packed__, __may_alias__));
4521 ((struct __storeu_pd*)__P)->__v = __A;
4522}
4523
4524static __inline void __DEFAULT_FN_ATTRS512
4525_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4526{
4527 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4528 (__mmask16) __U);
4529}
4530
4531static __inline void __DEFAULT_FN_ATTRS512
4532_mm512_storeu_ps(void *__P, __m512 __A)
4533{
4534 struct __storeu_ps {
4535 __m512_u __v;
4536 } __attribute__((__packed__, __may_alias__));
4537 ((struct __storeu_ps*)__P)->__v = __A;
4538}
4539
4540static __inline void __DEFAULT_FN_ATTRS512
4541_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4542{
4543 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4544}
4545
4546static __inline void __DEFAULT_FN_ATTRS512
4547_mm512_store_pd(void *__P, __m512d __A)
4548{
4549 *(__m512d*)__P = __A;
4550}
4551
4552static __inline void __DEFAULT_FN_ATTRS512
4553_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4554{
4555 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4556 (__mmask16) __U);
4557}
4558
4559static __inline void __DEFAULT_FN_ATTRS512
4560_mm512_store_ps(void *__P, __m512 __A)
4561{
4562 *(__m512*)__P = __A;
4563}
4564
4565static __inline void __DEFAULT_FN_ATTRS512
4566_mm512_store_si512 (void *__P, __m512i __A)
4567{
4568 *(__m512i *) __P = __A;
4569}
4570
4571static __inline void __DEFAULT_FN_ATTRS512
4572_mm512_store_epi32 (void *__P, __m512i __A)
4573{
4574 *(__m512i *) __P = __A;
4575}
4576
4577static __inline void __DEFAULT_FN_ATTRS512
4578_mm512_store_epi64 (void *__P, __m512i __A)
4579{
4580 *(__m512i *) __P = __A;
4581}
4582
4583/* Mask ops */
4584
4585static __inline __mmask16 __DEFAULT_FN_ATTRS
4587{
4588 return __builtin_ia32_knothi(__M);
4589}
4590
4591/* Integer compare */
4592
4593#define _mm512_cmpeq_epi32_mask(A, B) \
4594 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4595#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4596 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4597#define _mm512_cmpge_epi32_mask(A, B) \
4598 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4599#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4600 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4601#define _mm512_cmpgt_epi32_mask(A, B) \
4602 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4603#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4604 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4605#define _mm512_cmple_epi32_mask(A, B) \
4606 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4607#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4608 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4609#define _mm512_cmplt_epi32_mask(A, B) \
4610 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4611#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4612 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4613#define _mm512_cmpneq_epi32_mask(A, B) \
4614 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4615#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4616 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4617
4618#define _mm512_cmpeq_epu32_mask(A, B) \
4619 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4620#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4621 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4622#define _mm512_cmpge_epu32_mask(A, B) \
4623 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4624#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4625 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4626#define _mm512_cmpgt_epu32_mask(A, B) \
4627 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4628#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4629 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4630#define _mm512_cmple_epu32_mask(A, B) \
4631 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4632#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4633 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4634#define _mm512_cmplt_epu32_mask(A, B) \
4635 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4636#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4637 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4638#define _mm512_cmpneq_epu32_mask(A, B) \
4639 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4640#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4641 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4642
4643#define _mm512_cmpeq_epi64_mask(A, B) \
4644 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4645#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4646 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4647#define _mm512_cmpge_epi64_mask(A, B) \
4648 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4649#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4650 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4651#define _mm512_cmpgt_epi64_mask(A, B) \
4652 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4653#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4654 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4655#define _mm512_cmple_epi64_mask(A, B) \
4656 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4657#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4658 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4659#define _mm512_cmplt_epi64_mask(A, B) \
4660 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4661#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4662 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4663#define _mm512_cmpneq_epi64_mask(A, B) \
4664 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4665#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4666 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4667
4668#define _mm512_cmpeq_epu64_mask(A, B) \
4669 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4670#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4671 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4672#define _mm512_cmpge_epu64_mask(A, B) \
4673 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4674#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4675 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4676#define _mm512_cmpgt_epu64_mask(A, B) \
4677 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4678#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4679 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4680#define _mm512_cmple_epu64_mask(A, B) \
4681 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4682#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4683 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4684#define _mm512_cmplt_epu64_mask(A, B) \
4685 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4686#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4687 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4688#define _mm512_cmpneq_epu64_mask(A, B) \
4689 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4690#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4691 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4692
4693static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4695 /* This function always performs a signed extension, but __v16qi is a char
4696 which may be signed or unsigned, so use __v16qs. */
4697 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4698}
4699
4700static __inline__ __m512i __DEFAULT_FN_ATTRS512
4701_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4702{
4703 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4704 (__v16si)_mm512_cvtepi8_epi32(__A),
4705 (__v16si)__W);
4706}
4707
4708static __inline__ __m512i __DEFAULT_FN_ATTRS512
4710{
4711 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4712 (__v16si)_mm512_cvtepi8_epi32(__A),
4713 (__v16si)_mm512_setzero_si512());
4714}
4715
4716static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4718 /* This function always performs a signed extension, but __v16qi is a char
4719 which may be signed or unsigned, so use __v16qs. */
4720 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4721}
4722
4723static __inline__ __m512i __DEFAULT_FN_ATTRS512
4724_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4725{
4726 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4727 (__v8di)_mm512_cvtepi8_epi64(__A),
4728 (__v8di)__W);
4729}
4730
4731static __inline__ __m512i __DEFAULT_FN_ATTRS512
4733{
4734 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4735 (__v8di)_mm512_cvtepi8_epi64(__A),
4736 (__v8di)_mm512_setzero_si512 ());
4737}
4738
4739static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4741 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4742}
4743
4744static __inline__ __m512i __DEFAULT_FN_ATTRS512
4745_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4746{
4747 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4748 (__v8di)_mm512_cvtepi32_epi64(__X),
4749 (__v8di)__W);
4750}
4751
4752static __inline__ __m512i __DEFAULT_FN_ATTRS512
4754{
4755 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4756 (__v8di)_mm512_cvtepi32_epi64(__X),
4757 (__v8di)_mm512_setzero_si512());
4758}
4759
4760static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4762 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4763}
4764
4765static __inline__ __m512i __DEFAULT_FN_ATTRS512
4766_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4767{
4768 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4769 (__v16si)_mm512_cvtepi16_epi32(__A),
4770 (__v16si)__W);
4771}
4772
4773static __inline__ __m512i __DEFAULT_FN_ATTRS512
4775{
4776 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4777 (__v16si)_mm512_cvtepi16_epi32(__A),
4778 (__v16si)_mm512_setzero_si512 ());
4779}
4780
4781static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4783 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4784}
4785
4786static __inline__ __m512i __DEFAULT_FN_ATTRS512
4787_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4788{
4789 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4790 (__v8di)_mm512_cvtepi16_epi64(__A),
4791 (__v8di)__W);
4792}
4793
4794static __inline__ __m512i __DEFAULT_FN_ATTRS512
4796{
4797 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4798 (__v8di)_mm512_cvtepi16_epi64(__A),
4799 (__v8di)_mm512_setzero_si512());
4800}
4801
4802static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4804 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4805}
4806
4807static __inline__ __m512i __DEFAULT_FN_ATTRS512
4808_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4809{
4810 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4811 (__v16si)_mm512_cvtepu8_epi32(__A),
4812 (__v16si)__W);
4813}
4814
4815static __inline__ __m512i __DEFAULT_FN_ATTRS512
4817{
4818 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4819 (__v16si)_mm512_cvtepu8_epi32(__A),
4820 (__v16si)_mm512_setzero_si512());
4821}
4822
4823static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4825 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4826}
4827
4828static __inline__ __m512i __DEFAULT_FN_ATTRS512
4829_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4830{
4831 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4832 (__v8di)_mm512_cvtepu8_epi64(__A),
4833 (__v8di)__W);
4834}
4835
4836static __inline__ __m512i __DEFAULT_FN_ATTRS512
4838{
4839 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4840 (__v8di)_mm512_cvtepu8_epi64(__A),
4841 (__v8di)_mm512_setzero_si512());
4842}
4843
4844static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4846 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4847}
4848
4849static __inline__ __m512i __DEFAULT_FN_ATTRS512
4850_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4851{
4852 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4853 (__v8di)_mm512_cvtepu32_epi64(__X),
4854 (__v8di)__W);
4855}
4856
4857static __inline__ __m512i __DEFAULT_FN_ATTRS512
4859{
4860 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4861 (__v8di)_mm512_cvtepu32_epi64(__X),
4862 (__v8di)_mm512_setzero_si512());
4863}
4864
4865static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4867 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4868}
4869
4870static __inline__ __m512i __DEFAULT_FN_ATTRS512
4871_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4872{
4873 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4874 (__v16si)_mm512_cvtepu16_epi32(__A),
4875 (__v16si)__W);
4876}
4877
4878static __inline__ __m512i __DEFAULT_FN_ATTRS512
4880{
4881 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4882 (__v16si)_mm512_cvtepu16_epi32(__A),
4883 (__v16si)_mm512_setzero_si512());
4884}
4885
4886static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4888 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4889}
4890
4891static __inline__ __m512i __DEFAULT_FN_ATTRS512
4892_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4893{
4894 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4895 (__v8di)_mm512_cvtepu16_epi64(__A),
4896 (__v8di)__W);
4897}
4898
4899static __inline__ __m512i __DEFAULT_FN_ATTRS512
4901{
4902 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4903 (__v8di)_mm512_cvtepu16_epi64(__A),
4904 (__v8di)_mm512_setzero_si512());
4905}
4906
4907static __inline__ __m512i __DEFAULT_FN_ATTRS512
4908_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4909{
4910 return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B);
4911}
4912
4913static __inline__ __m512i __DEFAULT_FN_ATTRS512
4914_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4915{
4916 return (__m512i)__builtin_ia32_selectd_512(__U,
4917 (__v16si)_mm512_rorv_epi32(__A, __B),
4918 (__v16si)__W);
4919}
4920
4921static __inline__ __m512i __DEFAULT_FN_ATTRS512
4922_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4923{
4924 return (__m512i)__builtin_ia32_selectd_512(__U,
4925 (__v16si)_mm512_rorv_epi32(__A, __B),
4926 (__v16si)_mm512_setzero_si512());
4927}
4928
4929static __inline__ __m512i __DEFAULT_FN_ATTRS512
4930_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4931{
4932 return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B);
4933}
4934
4935static __inline__ __m512i __DEFAULT_FN_ATTRS512
4936_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4937{
4938 return (__m512i)__builtin_ia32_selectq_512(__U,
4939 (__v8di)_mm512_rorv_epi64(__A, __B),
4940 (__v8di)__W);
4941}
4942
4943static __inline__ __m512i __DEFAULT_FN_ATTRS512
4944_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4945{
4946 return (__m512i)__builtin_ia32_selectq_512(__U,
4947 (__v8di)_mm512_rorv_epi64(__A, __B),
4948 (__v8di)_mm512_setzero_si512());
4949}
4950
4951
4952
4953#define _mm512_cmp_epi32_mask(a, b, p) \
4954 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4955 (__v16si)(__m512i)(b), (int)(p), \
4956 (__mmask16)-1))
4957
4958#define _mm512_cmp_epu32_mask(a, b, p) \
4959 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4960 (__v16si)(__m512i)(b), (int)(p), \
4961 (__mmask16)-1))
4962
4963#define _mm512_cmp_epi64_mask(a, b, p) \
4964 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4965 (__v8di)(__m512i)(b), (int)(p), \
4966 (__mmask8)-1))
4967
4968#define _mm512_cmp_epu64_mask(a, b, p) \
4969 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4970 (__v8di)(__m512i)(b), (int)(p), \
4971 (__mmask8)-1))
4972
4973#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
4974 ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4975 (__v16si)(__m512i)(b), (int)(p), \
4976 (__mmask16)(m)))
4977
4978#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
4979 ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4980 (__v16si)(__m512i)(b), (int)(p), \
4981 (__mmask16)(m)))
4982
4983#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
4984 ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4985 (__v8di)(__m512i)(b), (int)(p), \
4986 (__mmask8)(m)))
4987
4988#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
4989 ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4990 (__v8di)(__m512i)(b), (int)(p), \
4991 (__mmask8)(m)))
4992
4993#define _mm512_rol_epi32(a, b) \
4994 ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
4995
4996#define _mm512_mask_rol_epi32(W, U, a, b) \
4997 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
4998 (__v16si)_mm512_rol_epi32((a), (b)), \
4999 (__v16si)(__m512i)(W)))
5000
5001#define _mm512_maskz_rol_epi32(U, a, b) \
5002 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5003 (__v16si)_mm512_rol_epi32((a), (b)), \
5004 (__v16si)_mm512_setzero_si512()))
5005
5006#define _mm512_rol_epi64(a, b) \
5007 ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
5008
5009#define _mm512_mask_rol_epi64(W, U, a, b) \
5010 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5011 (__v8di)_mm512_rol_epi64((a), (b)), \
5012 (__v8di)(__m512i)(W)))
5013
5014#define _mm512_maskz_rol_epi64(U, a, b) \
5015 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5016 (__v8di)_mm512_rol_epi64((a), (b)), \
5017 (__v8di)_mm512_setzero_si512()))
5018
5019static __inline__ __m512i __DEFAULT_FN_ATTRS512
5020_mm512_rolv_epi32 (__m512i __A, __m512i __B)
5021{
5022 return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B);
5023}
5024
5025static __inline__ __m512i __DEFAULT_FN_ATTRS512
5026_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5027{
5028 return (__m512i)__builtin_ia32_selectd_512(__U,
5029 (__v16si)_mm512_rolv_epi32(__A, __B),
5030 (__v16si)__W);
5031}
5032
5033static __inline__ __m512i __DEFAULT_FN_ATTRS512
5034_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5035{
5036 return (__m512i)__builtin_ia32_selectd_512(__U,
5037 (__v16si)_mm512_rolv_epi32(__A, __B),
5038 (__v16si)_mm512_setzero_si512());
5039}
5040
5041static __inline__ __m512i __DEFAULT_FN_ATTRS512
5042_mm512_rolv_epi64 (__m512i __A, __m512i __B)
5043{
5044 return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B);
5045}
5046
5047static __inline__ __m512i __DEFAULT_FN_ATTRS512
5048_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5049{
5050 return (__m512i)__builtin_ia32_selectq_512(__U,
5051 (__v8di)_mm512_rolv_epi64(__A, __B),
5052 (__v8di)__W);
5053}
5054
5055static __inline__ __m512i __DEFAULT_FN_ATTRS512
5056_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5057{
5058 return (__m512i)__builtin_ia32_selectq_512(__U,
5059 (__v8di)_mm512_rolv_epi64(__A, __B),
5060 (__v8di)_mm512_setzero_si512());
5061}
5062
5063#define _mm512_ror_epi32(A, B) \
5064 ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
5065
5066#define _mm512_mask_ror_epi32(W, U, A, B) \
5067 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5068 (__v16si)_mm512_ror_epi32((A), (B)), \
5069 (__v16si)(__m512i)(W)))
5070
5071#define _mm512_maskz_ror_epi32(U, A, B) \
5072 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5073 (__v16si)_mm512_ror_epi32((A), (B)), \
5074 (__v16si)_mm512_setzero_si512()))
5075
5076#define _mm512_ror_epi64(A, B) \
5077 ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
5078
5079#define _mm512_mask_ror_epi64(W, U, A, B) \
5080 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5081 (__v8di)_mm512_ror_epi64((A), (B)), \
5082 (__v8di)(__m512i)(W)))
5083
5084#define _mm512_maskz_ror_epi64(U, A, B) \
5085 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5086 (__v8di)_mm512_ror_epi64((A), (B)), \
5087 (__v8di)_mm512_setzero_si512()))
5088
5089static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5090_mm512_slli_epi32(__m512i __A, unsigned int __B) {
5091 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B);
5092}
5093
5094static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5095_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
5096 unsigned int __B) {
5097 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5098 (__v16si)_mm512_slli_epi32(__A, __B),
5099 (__v16si)__W);
5100}
5101
5102static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5103_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
5104 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5105 (__v16si)_mm512_slli_epi32(__A, __B),
5106 (__v16si)_mm512_setzero_si512());
5107}
5108
5109static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5110_mm512_slli_epi64(__m512i __A, unsigned int __B) {
5111 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B);
5112}
5113
5114static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5115_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
5116 unsigned int __B) {
5117 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5118 (__v8di)_mm512_slli_epi64(__A, __B),
5119 (__v8di)__W);
5120}
5121
5122static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5123_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
5124 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5125 (__v8di)_mm512_slli_epi64(__A, __B),
5126 (__v8di)_mm512_setzero_si512());
5127}
5128
5129static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5130_mm512_srli_epi32(__m512i __A, unsigned int __B) {
5131 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B);
5132}
5133
5134static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5135_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
5136 unsigned int __B) {
5137 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5138 (__v16si)_mm512_srli_epi32(__A, __B),
5139 (__v16si)__W);
5140}
5141
5142static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5143_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
5144 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5145 (__v16si)_mm512_srli_epi32(__A, __B),
5146 (__v16si)_mm512_setzero_si512());
5147}
5148
5149static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5150_mm512_srli_epi64(__m512i __A, unsigned int __B) {
5151 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B);
5152}
5153
5154static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5155_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
5156 unsigned int __B) {
5157 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5158 (__v8di)_mm512_srli_epi64(__A, __B),
5159 (__v8di)__W);
5160}
5161
5162static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
5163_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
5164 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5165 (__v8di)_mm512_srli_epi64(__A, __B),
5166 (__v8di)_mm512_setzero_si512());
5167}
5168
5169static __inline__ __m512i __DEFAULT_FN_ATTRS512
5170_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5171{
5172 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5173 (__v16si) __W,
5174 (__mmask16) __U);
5175}
5176
5177static __inline__ __m512i __DEFAULT_FN_ATTRS512
5179{
5180 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5181 (__v16si)
5183 (__mmask16) __U);
5184}
5185
5186static __inline__ void __DEFAULT_FN_ATTRS512
5187_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5188{
5189 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5190 (__mmask16) __U);
5191}
5192
5193static __inline__ __m512i __DEFAULT_FN_ATTRS512
5194_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5195{
5196 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5197 (__v16si) __A,
5198 (__v16si) __W);
5199}
5200
5201static __inline__ __m512i __DEFAULT_FN_ATTRS512
5203{
5204 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5205 (__v16si) __A,
5206 (__v16si) _mm512_setzero_si512 ());
5207}
5208
5209static __inline__ __m512i __DEFAULT_FN_ATTRS512
5210_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5211{
5212 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5213 (__v8di) __A,
5214 (__v8di) __W);
5215}
5216
5217static __inline__ __m512i __DEFAULT_FN_ATTRS512
5219{
5220 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5221 (__v8di) __A,
5222 (__v8di) _mm512_setzero_si512 ());
5223}
5224
5225static __inline__ __m512i __DEFAULT_FN_ATTRS512
5226_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5227{
5228 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5229 (__v8di) __W,
5230 (__mmask8) __U);
5231}
5232
5233static __inline__ __m512i __DEFAULT_FN_ATTRS512
5235{
5236 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5237 (__v8di)
5239 (__mmask8) __U);
5240}
5241
5242static __inline__ void __DEFAULT_FN_ATTRS512
5243_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5244{
5245 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5246 (__mmask8) __U);
5247}
5248
5249static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
5251{
5252 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5253 0, 0, 2, 2, 4, 4, 6, 6);
5254}
5255
5256static __inline__ __m512d __DEFAULT_FN_ATTRS512
5257_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5258{
5259 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5260 (__v8df)_mm512_movedup_pd(__A),
5261 (__v8df)__W);
5262}
5263
5264static __inline__ __m512d __DEFAULT_FN_ATTRS512
5266{
5267 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5268 (__v8df)_mm512_movedup_pd(__A),
5269 (__v8df)_mm512_setzero_pd());
5270}
5271
5272#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5273 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5274 (__v8df)(__m512d)(B), \
5275 (__v8di)(__m512i)(C), (int)(imm), \
5276 (__mmask8)-1, (int)(R)))
5277
5278#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5279 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5280 (__v8df)(__m512d)(B), \
5281 (__v8di)(__m512i)(C), (int)(imm), \
5282 (__mmask8)(U), (int)(R)))
5283
5284#define _mm512_fixupimm_pd(A, B, C, imm) \
5285 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5286 (__v8df)(__m512d)(B), \
5287 (__v8di)(__m512i)(C), (int)(imm), \
5288 (__mmask8)-1, \
5289 _MM_FROUND_CUR_DIRECTION))
5290
5291#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5292 ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5293 (__v8df)(__m512d)(B), \
5294 (__v8di)(__m512i)(C), (int)(imm), \
5295 (__mmask8)(U), \
5296 _MM_FROUND_CUR_DIRECTION))
5297
5298#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5299 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5300 (__v8df)(__m512d)(B), \
5301 (__v8di)(__m512i)(C), \
5302 (int)(imm), (__mmask8)(U), \
5303 (int)(R)))
5304
5305#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5306 ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5307 (__v8df)(__m512d)(B), \
5308 (__v8di)(__m512i)(C), \
5309 (int)(imm), (__mmask8)(U), \
5310 _MM_FROUND_CUR_DIRECTION))
5311
5312#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5313 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5314 (__v16sf)(__m512)(B), \
5315 (__v16si)(__m512i)(C), (int)(imm), \
5316 (__mmask16)-1, (int)(R)))
5317
5318#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5319 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5320 (__v16sf)(__m512)(B), \
5321 (__v16si)(__m512i)(C), (int)(imm), \
5322 (__mmask16)(U), (int)(R)))
5323
5324#define _mm512_fixupimm_ps(A, B, C, imm) \
5325 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5326 (__v16sf)(__m512)(B), \
5327 (__v16si)(__m512i)(C), (int)(imm), \
5328 (__mmask16)-1, \
5329 _MM_FROUND_CUR_DIRECTION))
5330
5331#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5332 ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5333 (__v16sf)(__m512)(B), \
5334 (__v16si)(__m512i)(C), (int)(imm), \
5335 (__mmask16)(U), \
5336 _MM_FROUND_CUR_DIRECTION))
5337
5338#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5339 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5340 (__v16sf)(__m512)(B), \
5341 (__v16si)(__m512i)(C), \
5342 (int)(imm), (__mmask16)(U), \
5343 (int)(R)))
5344
5345#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5346 ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5347 (__v16sf)(__m512)(B), \
5348 (__v16si)(__m512i)(C), \
5349 (int)(imm), (__mmask16)(U), \
5350 _MM_FROUND_CUR_DIRECTION))
5351
5352#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5353 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5354 (__v2df)(__m128d)(B), \
5355 (__v2di)(__m128i)(C), (int)(imm), \
5356 (__mmask8)-1, (int)(R)))
5357
5358#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5359 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5360 (__v2df)(__m128d)(B), \
5361 (__v2di)(__m128i)(C), (int)(imm), \
5362 (__mmask8)(U), (int)(R)))
5363
5364#define _mm_fixupimm_sd(A, B, C, imm) \
5365 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5366 (__v2df)(__m128d)(B), \
5367 (__v2di)(__m128i)(C), (int)(imm), \
5368 (__mmask8)-1, \
5369 _MM_FROUND_CUR_DIRECTION))
5370
5371#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5372 ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5373 (__v2df)(__m128d)(B), \
5374 (__v2di)(__m128i)(C), (int)(imm), \
5375 (__mmask8)(U), \
5376 _MM_FROUND_CUR_DIRECTION))
5377
5378#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5379 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5380 (__v2df)(__m128d)(B), \
5381 (__v2di)(__m128i)(C), (int)(imm), \
5382 (__mmask8)(U), (int)(R)))
5383
5384#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5385 ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5386 (__v2df)(__m128d)(B), \
5387 (__v2di)(__m128i)(C), (int)(imm), \
5388 (__mmask8)(U), \
5389 _MM_FROUND_CUR_DIRECTION))
5390
5391#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5392 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5393 (__v4sf)(__m128)(B), \
5394 (__v4si)(__m128i)(C), (int)(imm), \
5395 (__mmask8)-1, (int)(R)))
5396
5397#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5398 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5399 (__v4sf)(__m128)(B), \
5400 (__v4si)(__m128i)(C), (int)(imm), \
5401 (__mmask8)(U), (int)(R)))
5402
5403#define _mm_fixupimm_ss(A, B, C, imm) \
5404 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5405 (__v4sf)(__m128)(B), \
5406 (__v4si)(__m128i)(C), (int)(imm), \
5407 (__mmask8)-1, \
5408 _MM_FROUND_CUR_DIRECTION))
5409
5410#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5411 ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5412 (__v4sf)(__m128)(B), \
5413 (__v4si)(__m128i)(C), (int)(imm), \
5414 (__mmask8)(U), \
5415 _MM_FROUND_CUR_DIRECTION))
5416
5417#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5418 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5419 (__v4sf)(__m128)(B), \
5420 (__v4si)(__m128i)(C), (int)(imm), \
5421 (__mmask8)(U), (int)(R)))
5422
5423#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5424 ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5425 (__v4sf)(__m128)(B), \
5426 (__v4si)(__m128i)(C), (int)(imm), \
5427 (__mmask8)(U), \
5428 _MM_FROUND_CUR_DIRECTION))
5429
5430#define _mm_getexp_round_sd(A, B, R) \
5431 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5432 (__v2df)(__m128d)(B), \
5433 (__v2df)_mm_setzero_pd(), \
5434 (__mmask8)-1, (int)(R)))
5435
5436
5437static __inline__ __m128d __DEFAULT_FN_ATTRS128
5438_mm_getexp_sd (__m128d __A, __m128d __B)
5439{
5440 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5441 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5442}
5443
5444static __inline__ __m128d __DEFAULT_FN_ATTRS128
5445_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5446{
5447 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5448 (__v2df) __B,
5449 (__v2df) __W,
5450 (__mmask8) __U,
5452}
5453
5454#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5455 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5456 (__v2df)(__m128d)(B), \
5457 (__v2df)(__m128d)(W), \
5458 (__mmask8)(U), (int)(R)))
5459
5460static __inline__ __m128d __DEFAULT_FN_ATTRS128
5461_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5462{
5463 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5464 (__v2df) __B,
5465 (__v2df) _mm_setzero_pd (),
5466 (__mmask8) __U,
5468}
5469
5470#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5471 ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5472 (__v2df)(__m128d)(B), \
5473 (__v2df)_mm_setzero_pd(), \
5474 (__mmask8)(U), (int)(R)))
5475
5476#define _mm_getexp_round_ss(A, B, R) \
5477 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5478 (__v4sf)(__m128)(B), \
5479 (__v4sf)_mm_setzero_ps(), \
5480 (__mmask8)-1, (int)(R)))
5481
5482static __inline__ __m128 __DEFAULT_FN_ATTRS128
5483_mm_getexp_ss (__m128 __A, __m128 __B)
5484{
5485 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5486 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5487}
5488
5489static __inline__ __m128 __DEFAULT_FN_ATTRS128
5490_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5491{
5492 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5493 (__v4sf) __B,
5494 (__v4sf) __W,
5495 (__mmask8) __U,
5497}
5498
5499#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5500 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5501 (__v4sf)(__m128)(B), \
5502 (__v4sf)(__m128)(W), \
5503 (__mmask8)(U), (int)(R)))
5504
5505static __inline__ __m128 __DEFAULT_FN_ATTRS128
5506_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5507{
5508 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5509 (__v4sf) __B,
5510 (__v4sf) _mm_setzero_ps (),
5511 (__mmask8) __U,
5513}
5514
5515#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5516 ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5517 (__v4sf)(__m128)(B), \
5518 (__v4sf)_mm_setzero_ps(), \
5519 (__mmask8)(U), (int)(R)))
5520
5521#define _mm_getmant_round_sd(A, B, C, D, R) \
5522 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5523 (__v2df)(__m128d)(B), \
5524 (int)(((D)<<2) | (C)), \
5525 (__v2df)_mm_setzero_pd(), \
5526 (__mmask8)-1, (int)(R)))
5527
5528#define _mm_getmant_sd(A, B, C, D) \
5529 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5530 (__v2df)(__m128d)(B), \
5531 (int)(((D)<<2) | (C)), \
5532 (__v2df)_mm_setzero_pd(), \
5533 (__mmask8)-1, \
5534 _MM_FROUND_CUR_DIRECTION))
5535
5536#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5537 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5538 (__v2df)(__m128d)(B), \
5539 (int)(((D)<<2) | (C)), \
5540 (__v2df)(__m128d)(W), \
5541 (__mmask8)(U), \
5542 _MM_FROUND_CUR_DIRECTION))
5543
5544#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5545 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5546 (__v2df)(__m128d)(B), \
5547 (int)(((D)<<2) | (C)), \
5548 (__v2df)(__m128d)(W), \
5549 (__mmask8)(U), (int)(R)))
5550
5551#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5552 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5553 (__v2df)(__m128d)(B), \
5554 (int)(((D)<<2) | (C)), \
5555 (__v2df)_mm_setzero_pd(), \
5556 (__mmask8)(U), \
5557 _MM_FROUND_CUR_DIRECTION))
5558
5559#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5560 ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5561 (__v2df)(__m128d)(B), \
5562 (int)(((D)<<2) | (C)), \
5563 (__v2df)_mm_setzero_pd(), \
5564 (__mmask8)(U), (int)(R)))
5565
5566#define _mm_getmant_round_ss(A, B, C, D, R) \
5567 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5568 (__v4sf)(__m128)(B), \
5569 (int)(((D)<<2) | (C)), \
5570 (__v4sf)_mm_setzero_ps(), \
5571 (__mmask8)-1, (int)(R)))
5572
5573#define _mm_getmant_ss(A, B, C, D) \
5574 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5575 (__v4sf)(__m128)(B), \
5576 (int)(((D)<<2) | (C)), \
5577 (__v4sf)_mm_setzero_ps(), \
5578 (__mmask8)-1, \
5579 _MM_FROUND_CUR_DIRECTION))
5580
5581#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5582 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5583 (__v4sf)(__m128)(B), \
5584 (int)(((D)<<2) | (C)), \
5585 (__v4sf)(__m128)(W), \
5586 (__mmask8)(U), \
5587 _MM_FROUND_CUR_DIRECTION))
5588
5589#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5590 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5591 (__v4sf)(__m128)(B), \
5592 (int)(((D)<<2) | (C)), \
5593 (__v4sf)(__m128)(W), \
5594 (__mmask8)(U), (int)(R)))
5595
5596#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5597 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5598 (__v4sf)(__m128)(B), \
5599 (int)(((D)<<2) | (C)), \
5600 (__v4sf)_mm_setzero_ps(), \
5601 (__mmask8)(U), \
5602 _MM_FROUND_CUR_DIRECTION))
5603
5604#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5605 ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5606 (__v4sf)(__m128)(B), \
5607 (int)(((D)<<2) | (C)), \
5608 (__v4sf)_mm_setzero_ps(), \
5609 (__mmask8)(U), (int)(R)))
5610
5611static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5613{
5614 return __A;
5615}
5616
5617#define _mm_comi_round_sd(A, B, P, R) \
5618 ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5619 (int)(P), (int)(R)))
5620
5621#define _mm_comi_round_ss(A, B, P, R) \
5622 ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5623 (int)(P), (int)(R)))
5624
5625#ifdef __x86_64__
5626#define _mm_cvt_roundsd_si64(A, R) \
5627 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5628#endif
5629
5630static __inline__ __m512i __DEFAULT_FN_ATTRS512
5631_mm512_sll_epi32(__m512i __A, __m128i __B)
5632{
5633 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5634}
5635
5636static __inline__ __m512i __DEFAULT_FN_ATTRS512
5637_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5638{
5639 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5640 (__v16si)_mm512_sll_epi32(__A, __B),
5641 (__v16si)__W);
5642}
5643
5644static __inline__ __m512i __DEFAULT_FN_ATTRS512
5645_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5646{
5647 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5648 (__v16si)_mm512_sll_epi32(__A, __B),
5649 (__v16si)_mm512_setzero_si512());
5650}
5651
5652static __inline__ __m512i __DEFAULT_FN_ATTRS512
5653_mm512_sll_epi64(__m512i __A, __m128i __B)
5654{
5655 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5656}
5657
5658static __inline__ __m512i __DEFAULT_FN_ATTRS512
5659_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5660{
5661 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5662 (__v8di)_mm512_sll_epi64(__A, __B),
5663 (__v8di)__W);
5664}
5665
5666static __inline__ __m512i __DEFAULT_FN_ATTRS512
5667_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5668{
5669 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5670 (__v8di)_mm512_sll_epi64(__A, __B),
5671 (__v8di)_mm512_setzero_si512());
5672}
5673
5674static __inline__ __m512i __DEFAULT_FN_ATTRS512
5675_mm512_sllv_epi32(__m512i __X, __m512i __Y)
5676{
5677 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5678}
5679
5680static __inline__ __m512i __DEFAULT_FN_ATTRS512
5681_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5682{
5683 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5684 (__v16si)_mm512_sllv_epi32(__X, __Y),
5685 (__v16si)__W);
5686}
5687
5688static __inline__ __m512i __DEFAULT_FN_ATTRS512
5689_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5690{
5691 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5692 (__v16si)_mm512_sllv_epi32(__X, __Y),
5693 (__v16si)_mm512_setzero_si512());
5694}
5695
5696static __inline__ __m512i __DEFAULT_FN_ATTRS512
5697_mm512_sllv_epi64(__m512i __X, __m512i __Y)
5698{
5699 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5700}
5701
5702static __inline__ __m512i __DEFAULT_FN_ATTRS512
5703_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5704{
5705 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5706 (__v8di)_mm512_sllv_epi64(__X, __Y),
5707 (__v8di)__W);
5708}
5709
5710static __inline__ __m512i __DEFAULT_FN_ATTRS512
5711_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5712{
5713 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5714 (__v8di)_mm512_sllv_epi64(__X, __Y),
5715 (__v8di)_mm512_setzero_si512());
5716}
5717
5718static __inline__ __m512i __DEFAULT_FN_ATTRS512
5719_mm512_sra_epi32(__m512i __A, __m128i __B)
5720{
5721 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5722}
5723
5724static __inline__ __m512i __DEFAULT_FN_ATTRS512
5725_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5726{
5727 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5728 (__v16si)_mm512_sra_epi32(__A, __B),
5729 (__v16si)__W);
5730}
5731
5732static __inline__ __m512i __DEFAULT_FN_ATTRS512
5733_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5734{
5735 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5736 (__v16si)_mm512_sra_epi32(__A, __B),
5737 (__v16si)_mm512_setzero_si512());
5738}
5739
5740static __inline__ __m512i __DEFAULT_FN_ATTRS512
5741_mm512_sra_epi64(__m512i __A, __m128i __B)
5742{
5743 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5744}
5745
5746static __inline__ __m512i __DEFAULT_FN_ATTRS512
5747_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5748{
5749 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5750 (__v8di)_mm512_sra_epi64(__A, __B),
5751 (__v8di)__W);
5752}
5753
5754static __inline__ __m512i __DEFAULT_FN_ATTRS512
5755_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5756{
5757 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5758 (__v8di)_mm512_sra_epi64(__A, __B),
5759 (__v8di)_mm512_setzero_si512());
5760}
5761
5762static __inline__ __m512i __DEFAULT_FN_ATTRS512
5763_mm512_srav_epi32(__m512i __X, __m512i __Y)
5764{
5765 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5766}
5767
5768static __inline__ __m512i __DEFAULT_FN_ATTRS512
5769_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5770{
5771 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5772 (__v16si)_mm512_srav_epi32(__X, __Y),
5773 (__v16si)__W);
5774}
5775
5776static __inline__ __m512i __DEFAULT_FN_ATTRS512
5777_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5778{
5779 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5780 (__v16si)_mm512_srav_epi32(__X, __Y),
5781 (__v16si)_mm512_setzero_si512());
5782}
5783
5784static __inline__ __m512i __DEFAULT_FN_ATTRS512
5785_mm512_srav_epi64(__m512i __X, __m512i __Y)
5786{
5787 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5788}
5789
5790static __inline__ __m512i __DEFAULT_FN_ATTRS512
5791_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5792{
5793 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5794 (__v8di)_mm512_srav_epi64(__X, __Y),
5795 (__v8di)__W);
5796}
5797
5798static __inline__ __m512i __DEFAULT_FN_ATTRS512
5799_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5800{
5801 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5802 (__v8di)_mm512_srav_epi64(__X, __Y),
5803 (__v8di)_mm512_setzero_si512());
5804}
5805
5806static __inline__ __m512i __DEFAULT_FN_ATTRS512
5807_mm512_srl_epi32(__m512i __A, __m128i __B)
5808{
5809 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5810}
5811
5812static __inline__ __m512i __DEFAULT_FN_ATTRS512
5813_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5814{
5815 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5816 (__v16si)_mm512_srl_epi32(__A, __B),
5817 (__v16si)__W);
5818}
5819
5820static __inline__ __m512i __DEFAULT_FN_ATTRS512
5821_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5822{
5823 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5824 (__v16si)_mm512_srl_epi32(__A, __B),
5825 (__v16si)_mm512_setzero_si512());
5826}
5827
5828static __inline__ __m512i __DEFAULT_FN_ATTRS512
5829_mm512_srl_epi64(__m512i __A, __m128i __B)
5830{
5831 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5832}
5833
5834static __inline__ __m512i __DEFAULT_FN_ATTRS512
5835_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5836{
5837 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5838 (__v8di)_mm512_srl_epi64(__A, __B),
5839 (__v8di)__W);
5840}
5841
5842static __inline__ __m512i __DEFAULT_FN_ATTRS512
5843_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5844{
5845 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5846 (__v8di)_mm512_srl_epi64(__A, __B),
5847 (__v8di)_mm512_setzero_si512());
5848}
5849
5850static __inline__ __m512i __DEFAULT_FN_ATTRS512
5851_mm512_srlv_epi32(__m512i __X, __m512i __Y)
5852{
5853 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5854}
5855
5856static __inline__ __m512i __DEFAULT_FN_ATTRS512
5857_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5858{
5859 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5860 (__v16si)_mm512_srlv_epi32(__X, __Y),
5861 (__v16si)__W);
5862}
5863
5864static __inline__ __m512i __DEFAULT_FN_ATTRS512
5865_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5866{
5867 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5868 (__v16si)_mm512_srlv_epi32(__X, __Y),
5869 (__v16si)_mm512_setzero_si512());
5870}
5871
5872static __inline__ __m512i __DEFAULT_FN_ATTRS512
5873_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5874{
5875 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5876}
5877
5878static __inline__ __m512i __DEFAULT_FN_ATTRS512
5879_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5880{
5881 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5882 (__v8di)_mm512_srlv_epi64(__X, __Y),
5883 (__v8di)__W);
5884}
5885
5886static __inline__ __m512i __DEFAULT_FN_ATTRS512
5887_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5888{
5889 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5890 (__v8di)_mm512_srlv_epi64(__X, __Y),
5891 (__v8di)_mm512_setzero_si512());
5892}
5893
5894/// \enum _MM_TERNLOG_ENUM
5895/// A helper to represent the ternary logic operations among vector \a A,
5896/// \a B and \a C. The representation is passed to \a imm.
5897typedef enum {
5900 _MM_TERNLOG_C = 0xAA
5902
5903#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5904 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5905 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5906 (unsigned char)(imm), (__mmask16)-1))
5907
5908#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5909 ((__m512i)__builtin_ia32_pternlogd512_mask( \
5910 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5911 (unsigned char)(imm), (__mmask16)(U)))
5912
5913#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5914 ((__m512i)__builtin_ia32_pternlogd512_maskz( \
5915 (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \
5916 (unsigned char)(imm), (__mmask16)(U)))
5917
5918#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5919 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5920 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5921 (unsigned char)(imm), (__mmask8)-1))
5922
5923#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5924 ((__m512i)__builtin_ia32_pternlogq512_mask( \
5925 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5926 (unsigned char)(imm), (__mmask8)(U)))
5927
5928#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5929 ((__m512i)__builtin_ia32_pternlogq512_maskz( \
5930 (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \
5931 (unsigned char)(imm), (__mmask8)(U)))
5932
5933#ifdef __x86_64__
5934#define _mm_cvt_roundsd_i64(A, R) \
5935 ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5936#endif
5937
5938#define _mm_cvt_roundsd_si32(A, R) \
5939 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5940
5941#define _mm_cvt_roundsd_i32(A, R) \
5942 ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5943
5944#define _mm_cvt_roundsd_u32(A, R) \
5945 ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5946
5947static __inline__ unsigned __DEFAULT_FN_ATTRS128
5948_mm_cvtsd_u32 (__m128d __A)
5949{
5950 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5952}
5953
5954#ifdef __x86_64__
5955#define _mm_cvt_roundsd_u64(A, R) \
5956 ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5957 (int)(R)))
5958
5959static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5960_mm_cvtsd_u64 (__m128d __A)
5961{
5962 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5963 __A,
5965}
5966#endif
5967
5968#define _mm_cvt_roundss_si32(A, R) \
5969 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5970
5971#define _mm_cvt_roundss_i32(A, R) \
5972 ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
5973
5974#ifdef __x86_64__
5975#define _mm_cvt_roundss_si64(A, R) \
5976 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5977
5978#define _mm_cvt_roundss_i64(A, R) \
5979 ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
5980#endif
5981
5982#define _mm_cvt_roundss_u32(A, R) \
5983 ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
5984
5985static __inline__ unsigned __DEFAULT_FN_ATTRS128
5986_mm_cvtss_u32 (__m128 __A)
5987{
5988 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
5990}
5991
5992#ifdef __x86_64__
5993#define _mm_cvt_roundss_u64(A, R) \
5994 ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
5995 (int)(R)))
5996
5997static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5998_mm_cvtss_u64 (__m128 __A)
5999{
6000 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6001 __A,
6003}
6004#endif
6005
6006#define _mm_cvtt_roundsd_i32(A, R) \
6007 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
6008
6009#define _mm_cvtt_roundsd_si32(A, R) \
6010 ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
6011
6012static __inline__ int __DEFAULT_FN_ATTRS128
6013_mm_cvttsd_i32 (__m128d __A)
6014{
6015 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6017}
6018
6019#ifdef __x86_64__
6020#define _mm_cvtt_roundsd_si64(A, R) \
6021 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
6022
6023#define _mm_cvtt_roundsd_i64(A, R) \
6024 ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
6025
6026static __inline__ long long __DEFAULT_FN_ATTRS128
6027_mm_cvttsd_i64 (__m128d __A)
6028{
6029 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6031}
6032#endif
6033
6034#define _mm_cvtt_roundsd_u32(A, R) \
6035 ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
6036
6037static __inline__ unsigned __DEFAULT_FN_ATTRS128
6038_mm_cvttsd_u32 (__m128d __A)
6039{
6040 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6042}
6043
6044#ifdef __x86_64__
6045#define _mm_cvtt_roundsd_u64(A, R) \
6046 ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6047 (int)(R)))
6048
6049static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6050_mm_cvttsd_u64 (__m128d __A)
6051{
6052 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6053 __A,
6055}
6056#endif
6057
6058#define _mm_cvtt_roundss_i32(A, R) \
6059 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
6060
6061#define _mm_cvtt_roundss_si32(A, R) \
6062 ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
6063
6064static __inline__ int __DEFAULT_FN_ATTRS128
6065_mm_cvttss_i32 (__m128 __A)
6066{
6067 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6069}
6070
6071#ifdef __x86_64__
6072#define _mm_cvtt_roundss_i64(A, R) \
6073 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
6074
6075#define _mm_cvtt_roundss_si64(A, R) \
6076 ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
6077
6078static __inline__ long long __DEFAULT_FN_ATTRS128
6079_mm_cvttss_i64 (__m128 __A)
6080{
6081 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6083}
6084#endif
6085
6086#define _mm_cvtt_roundss_u32(A, R) \
6087 ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
6088
6089static __inline__ unsigned __DEFAULT_FN_ATTRS128
6090_mm_cvttss_u32 (__m128 __A)
6091{
6092 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6094}
6095
6096#ifdef __x86_64__
6097#define _mm_cvtt_roundss_u64(A, R) \
6098 ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6099 (int)(R)))
6100
6101static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6102_mm_cvttss_u64 (__m128 __A)
6103{
6104 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6105 __A,
6107}
6108#endif
6109
6110#define _mm512_permute_pd(X, C) \
6111 ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
6112
6113#define _mm512_mask_permute_pd(W, U, X, C) \
6114 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6115 (__v8df)_mm512_permute_pd((X), (C)), \
6116 (__v8df)(__m512d)(W)))
6117
6118#define _mm512_maskz_permute_pd(U, X, C) \
6119 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6120 (__v8df)_mm512_permute_pd((X), (C)), \
6121 (__v8df)_mm512_setzero_pd()))
6122
6123#define _mm512_permute_ps(X, C) \
6124 ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
6125
6126#define _mm512_mask_permute_ps(W, U, X, C) \
6127 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6128 (__v16sf)_mm512_permute_ps((X), (C)), \
6129 (__v16sf)(__m512)(W)))
6130
6131#define _mm512_maskz_permute_ps(U, X, C) \
6132 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6133 (__v16sf)_mm512_permute_ps((X), (C)), \
6134 (__v16sf)_mm512_setzero_ps()))
6135
6136static __inline__ __m512d __DEFAULT_FN_ATTRS512
6137_mm512_permutevar_pd(__m512d __A, __m512i __C)
6138{
6139 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6140}
6141
6142static __inline__ __m512d __DEFAULT_FN_ATTRS512
6143_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6144{
6145 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6146 (__v8df)_mm512_permutevar_pd(__A, __C),
6147 (__v8df)__W);
6148}
6149
6150static __inline__ __m512d __DEFAULT_FN_ATTRS512
6151_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
6152{
6153 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6154 (__v8df)_mm512_permutevar_pd(__A, __C),
6155 (__v8df)_mm512_setzero_pd());
6156}
6157
6158static __inline__ __m512 __DEFAULT_FN_ATTRS512
6159_mm512_permutevar_ps(__m512 __A, __m512i __C)
6160{
6161 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6162}
6163
6164static __inline__ __m512 __DEFAULT_FN_ATTRS512
6165_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6166{
6167 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6168 (__v16sf)_mm512_permutevar_ps(__A, __C),
6169 (__v16sf)__W);
6170}
6171
6172static __inline__ __m512 __DEFAULT_FN_ATTRS512
6173_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
6174{
6175 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6176 (__v16sf)_mm512_permutevar_ps(__A, __C),
6177 (__v16sf)_mm512_setzero_ps());
6178}
6179
6180static __inline __m512d __DEFAULT_FN_ATTRS512
6181_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6182{
6183 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6184 (__v8df)__B);
6185}
6186
6187static __inline__ __m512d __DEFAULT_FN_ATTRS512
6188_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6189{
6190 return (__m512d)__builtin_ia32_selectpd_512(__U,
6191 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6192 (__v8df)__A);
6193}
6194
6195static __inline__ __m512d __DEFAULT_FN_ATTRS512
6196_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
6197 __m512d __B)
6198{
6199 return (__m512d)__builtin_ia32_selectpd_512(__U,
6200 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6201 (__v8df)(__m512d)__I);
6202}
6203
6204static __inline__ __m512d __DEFAULT_FN_ATTRS512
6205_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
6206 __m512d __B)
6207{
6208 return (__m512d)__builtin_ia32_selectpd_512(__U,
6209 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6210 (__v8df)_mm512_setzero_pd());
6211}
6212
6213static __inline __m512 __DEFAULT_FN_ATTRS512
6214_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6215{
6216 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6217 (__v16sf) __B);
6218}
6219
6220static __inline__ __m512 __DEFAULT_FN_ATTRS512
6221_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6222{
6223 return (__m512)__builtin_ia32_selectps_512(__U,
6224 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6225 (__v16sf)__A);
6226}
6227
6228static __inline__ __m512 __DEFAULT_FN_ATTRS512
6229_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
6230{
6231 return (__m512)__builtin_ia32_selectps_512(__U,
6232 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6233 (__v16sf)(__m512)__I);
6234}
6235
6236static __inline__ __m512 __DEFAULT_FN_ATTRS512
6237_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
6238{
6239 return (__m512)__builtin_ia32_selectps_512(__U,
6240 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6241 (__v16sf)_mm512_setzero_ps());
6242}
6243
6244
6245#define _mm512_cvtt_roundpd_epu32(A, R) \
6246 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6247 (__v8si)_mm256_undefined_si256(), \
6248 (__mmask8)-1, (int)(R)))
6249
6250#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
6251 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6252 (__v8si)(__m256i)(W), \
6253 (__mmask8)(U), (int)(R)))
6254
6255#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
6256 ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6257 (__v8si)_mm256_setzero_si256(), \
6258 (__mmask8)(U), (int)(R)))
6259
6260static __inline__ __m256i __DEFAULT_FN_ATTRS512
6262{
6263 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6264 (__v8si)
6266 (__mmask8) -1,
6268}
6269
6270static __inline__ __m256i __DEFAULT_FN_ATTRS512
6271_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6272{
6273 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6274 (__v8si) __W,
6275 (__mmask8) __U,
6277}
6278
6279static __inline__ __m256i __DEFAULT_FN_ATTRS512
6281{
6282 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6283 (__v8si)
6285 (__mmask8) __U,
6287}
6288
6289#define _mm_roundscale_round_sd(A, B, imm, R) \
6290 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6291 (__v2df)(__m128d)(B), \
6292 (__v2df)_mm_setzero_pd(), \
6293 (__mmask8)-1, (int)(imm), \
6294 (int)(R)))
6295
6296#define _mm_roundscale_sd(A, B, imm) \
6297 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6298 (__v2df)(__m128d)(B), \
6299 (__v2df)_mm_setzero_pd(), \
6300 (__mmask8)-1, (int)(imm), \
6301 _MM_FROUND_CUR_DIRECTION))
6302
6303#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6304 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6305 (__v2df)(__m128d)(B), \
6306 (__v2df)(__m128d)(W), \
6307 (__mmask8)(U), (int)(imm), \
6308 _MM_FROUND_CUR_DIRECTION))
6309
6310#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6311 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6312 (__v2df)(__m128d)(B), \
6313 (__v2df)(__m128d)(W), \
6314 (__mmask8)(U), (int)(I), \
6315 (int)(R)))
6316
6317#define _mm_maskz_roundscale_sd(U, A, B, I) \
6318 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6319 (__v2df)(__m128d)(B), \
6320 (__v2df)_mm_setzero_pd(), \
6321 (__mmask8)(U), (int)(I), \
6322 _MM_FROUND_CUR_DIRECTION))
6323
6324#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6325 ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6326 (__v2df)(__m128d)(B), \
6327 (__v2df)_mm_setzero_pd(), \
6328 (__mmask8)(U), (int)(I), \
6329 (int)(R)))
6330
6331#define _mm_roundscale_round_ss(A, B, imm, R) \
6332 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6333 (__v4sf)(__m128)(B), \
6334 (__v4sf)_mm_setzero_ps(), \
6335 (__mmask8)-1, (int)(imm), \
6336 (int)(R)))
6337
6338#define _mm_roundscale_ss(A, B, imm) \
6339 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6340 (__v4sf)(__m128)(B), \
6341 (__v4sf)_mm_setzero_ps(), \
6342 (__mmask8)-1, (int)(imm), \
6343 _MM_FROUND_CUR_DIRECTION))
6344
6345#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6346 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6347 (__v4sf)(__m128)(B), \
6348 (__v4sf)(__m128)(W), \
6349 (__mmask8)(U), (int)(I), \
6350 _MM_FROUND_CUR_DIRECTION))
6351
6352#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6353 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6354 (__v4sf)(__m128)(B), \
6355 (__v4sf)(__m128)(W), \
6356 (__mmask8)(U), (int)(I), \
6357 (int)(R)))
6358
6359#define _mm_maskz_roundscale_ss(U, A, B, I) \
6360 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6361 (__v4sf)(__m128)(B), \
6362 (__v4sf)_mm_setzero_ps(), \
6363 (__mmask8)(U), (int)(I), \
6364 _MM_FROUND_CUR_DIRECTION))
6365
6366#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6367 ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6368 (__v4sf)(__m128)(B), \
6369 (__v4sf)_mm_setzero_ps(), \
6370 (__mmask8)(U), (int)(I), \
6371 (int)(R)))
6372
6373#define _mm512_scalef_round_pd(A, B, R) \
6374 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6375 (__v8df)(__m512d)(B), \
6376 (__v8df)_mm512_undefined_pd(), \
6377 (__mmask8)-1, (int)(R)))
6378
6379#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6380 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6381 (__v8df)(__m512d)(B), \
6382 (__v8df)(__m512d)(W), \
6383 (__mmask8)(U), (int)(R)))
6384
6385#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6386 ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6387 (__v8df)(__m512d)(B), \
6388 (__v8df)_mm512_setzero_pd(), \
6389 (__mmask8)(U), (int)(R)))
6390
6391static __inline__ __m512d __DEFAULT_FN_ATTRS512
6392_mm512_scalef_pd (__m512d __A, __m512d __B)
6393{
6394 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6395 (__v8df) __B,
6396 (__v8df)
6398 (__mmask8) -1,
6400}
6401
6402static __inline__ __m512d __DEFAULT_FN_ATTRS512
6403_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6404{
6405 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6406 (__v8df) __B,
6407 (__v8df) __W,
6408 (__mmask8) __U,
6410}
6411
6412static __inline__ __m512d __DEFAULT_FN_ATTRS512
6413_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6414{
6415 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6416 (__v8df) __B,
6417 (__v8df)
6419 (__mmask8) __U,
6421}
6422
6423#define _mm512_scalef_round_ps(A, B, R) \
6424 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6425 (__v16sf)(__m512)(B), \
6426 (__v16sf)_mm512_undefined_ps(), \
6427 (__mmask16)-1, (int)(R)))
6428
6429#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6430 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6431 (__v16sf)(__m512)(B), \
6432 (__v16sf)(__m512)(W), \
6433 (__mmask16)(U), (int)(R)))
6434
6435#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6436 ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6437 (__v16sf)(__m512)(B), \
6438 (__v16sf)_mm512_setzero_ps(), \
6439 (__mmask16)(U), (int)(R)))
6440
6441static __inline__ __m512 __DEFAULT_FN_ATTRS512
6442_mm512_scalef_ps (__m512 __A, __m512 __B)
6443{
6444 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6445 (__v16sf) __B,
6446 (__v16sf)
6448 (__mmask16) -1,
6450}
6451
6452static __inline__ __m512 __DEFAULT_FN_ATTRS512
6453_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6454{
6455 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6456 (__v16sf) __B,
6457 (__v16sf) __W,
6458 (__mmask16) __U,
6460}
6461
6462static __inline__ __m512 __DEFAULT_FN_ATTRS512
6463_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6464{
6465 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6466 (__v16sf) __B,
6467 (__v16sf)
6469 (__mmask16) __U,
6471}
6472
6473#define _mm_scalef_round_sd(A, B, R) \
6474 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6475 (__v2df)(__m128d)(B), \
6476 (__v2df)_mm_setzero_pd(), \
6477 (__mmask8)-1, (int)(R)))
6478
6479static __inline__ __m128d __DEFAULT_FN_ATTRS128
6480_mm_scalef_sd (__m128d __A, __m128d __B)
6481{
6482 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6483 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6484 (__mmask8) -1,
6486}
6487
6488static __inline__ __m128d __DEFAULT_FN_ATTRS128
6489_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6490{
6491 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6492 (__v2df) __B,
6493 (__v2df) __W,
6494 (__mmask8) __U,
6496}
6497
6498#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6499 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6500 (__v2df)(__m128d)(B), \
6501 (__v2df)(__m128d)(W), \
6502 (__mmask8)(U), (int)(R)))
6503
6504static __inline__ __m128d __DEFAULT_FN_ATTRS128
6505_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6506{
6507 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6508 (__v2df) __B,
6509 (__v2df) _mm_setzero_pd (),
6510 (__mmask8) __U,
6512}
6513
6514#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6515 ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6516 (__v2df)(__m128d)(B), \
6517 (__v2df)_mm_setzero_pd(), \
6518 (__mmask8)(U), (int)(R)))
6519
6520#define _mm_scalef_round_ss(A, B, R) \
6521 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6522 (__v4sf)(__m128)(B), \
6523 (__v4sf)_mm_setzero_ps(), \
6524 (__mmask8)-1, (int)(R)))
6525
6526static __inline__ __m128 __DEFAULT_FN_ATTRS128
6527_mm_scalef_ss (__m128 __A, __m128 __B)
6528{
6529 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6530 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6531 (__mmask8) -1,
6533}
6534
6535static __inline__ __m128 __DEFAULT_FN_ATTRS128
6536_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6537{
6538 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6539 (__v4sf) __B,
6540 (__v4sf) __W,
6541 (__mmask8) __U,
6543}
6544
6545#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6546 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6547 (__v4sf)(__m128)(B), \
6548 (__v4sf)(__m128)(W), \
6549 (__mmask8)(U), (int)(R)))
6550
6551static __inline__ __m128 __DEFAULT_FN_ATTRS128
6552_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6553{
6554 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6555 (__v4sf) __B,
6556 (__v4sf) _mm_setzero_ps (),
6557 (__mmask8) __U,
6559}
6560
6561#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6562 ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6563 (__v4sf)(__m128)(B), \
6564 (__v4sf)_mm_setzero_ps(), \
6565 (__mmask8)(U), \
6566 (int)(R)))
6567
6568static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6569_mm512_srai_epi32(__m512i __A, unsigned int __B) {
6570 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B);
6571}
6572
6573static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6574_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
6575 unsigned int __B) {
6576 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6577 (__v16si)_mm512_srai_epi32(__A, __B),
6578 (__v16si)__W);
6579}
6580
6581static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6582_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
6583 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6584 (__v16si)_mm512_srai_epi32(__A, __B),
6585 (__v16si)_mm512_setzero_si512());
6586}
6587
6588static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6589_mm512_srai_epi64(__m512i __A, unsigned int __B) {
6590 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B);
6591}
6592
6593static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6594_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A,
6595 unsigned int __B) {
6596 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6597 (__v8di)_mm512_srai_epi64(__A, __B),
6598 (__v8di)__W);
6599}
6600
6601static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6602_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
6603 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6604 (__v8di)_mm512_srai_epi64(__A, __B),
6605 (__v8di)_mm512_setzero_si512());
6606}
6607
6608#define _mm512_shuffle_f32x4(A, B, imm) \
6609 ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6610 (__v16sf)(__m512)(B), (int)(imm)))
6611
6612#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6613 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6614 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6615 (__v16sf)(__m512)(W)))
6616
6617#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6618 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6619 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6620 (__v16sf)_mm512_setzero_ps()))
6621
6622#define _mm512_shuffle_f64x2(A, B, imm) \
6623 ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6624 (__v8df)(__m512d)(B), (int)(imm)))
6625
6626#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6627 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6628 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6629 (__v8df)(__m512d)(W)))
6630
6631#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6632 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6633 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6634 (__v8df)_mm512_setzero_pd()))
6635
6636#define _mm512_shuffle_i32x4(A, B, imm) \
6637 ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6638 (__v16si)(__m512i)(B), (int)(imm)))
6639
6640#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6641 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6642 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6643 (__v16si)(__m512i)(W)))
6644
6645#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6646 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6647 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6648 (__v16si)_mm512_setzero_si512()))
6649
6650#define _mm512_shuffle_i64x2(A, B, imm) \
6651 ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6652 (__v8di)(__m512i)(B), (int)(imm)))
6653
6654#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6655 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6656 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6657 (__v8di)(__m512i)(W)))
6658
6659#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6660 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6661 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6662 (__v8di)_mm512_setzero_si512()))
6663
6664#define _mm512_shuffle_pd(A, B, M) \
6665 ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6666 (__v8df)(__m512d)(B), (int)(M)))
6667
6668#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6669 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6670 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6671 (__v8df)(__m512d)(W)))
6672
6673#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6674 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6675 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6676 (__v8df)_mm512_setzero_pd()))
6677
6678#define _mm512_shuffle_ps(A, B, M) \
6679 ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6680 (__v16sf)(__m512)(B), (int)(M)))
6681
6682#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6683 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6684 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6685 (__v16sf)(__m512)(W)))
6686
6687#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6688 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6689 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6690 (__v16sf)_mm512_setzero_ps()))
6691
6692#define _mm_sqrt_round_sd(A, B, R) \
6693 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6694 (__v2df)(__m128d)(B), \
6695 (__v2df)_mm_setzero_pd(), \
6696 (__mmask8)-1, (int)(R)))
6697
6698static __inline__ __m128d __DEFAULT_FN_ATTRS128
6699_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6700{
6701 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6702 (__v2df) __B,
6703 (__v2df) __W,
6704 (__mmask8) __U,
6706}
6707
6708#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6709 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6710 (__v2df)(__m128d)(B), \
6711 (__v2df)(__m128d)(W), \
6712 (__mmask8)(U), (int)(R)))
6713
6714static __inline__ __m128d __DEFAULT_FN_ATTRS128
6715_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6716{
6717 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6718 (__v2df) __B,
6719 (__v2df) _mm_setzero_pd (),
6720 (__mmask8) __U,
6722}
6723
6724#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6725 ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6726 (__v2df)(__m128d)(B), \
6727 (__v2df)_mm_setzero_pd(), \
6728 (__mmask8)(U), (int)(R)))
6729
6730#define _mm_sqrt_round_ss(A, B, R) \
6731 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6732 (__v4sf)(__m128)(B), \
6733 (__v4sf)_mm_setzero_ps(), \
6734 (__mmask8)-1, (int)(R)))
6735
6736static __inline__ __m128 __DEFAULT_FN_ATTRS128
6737_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6738{
6739 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6740 (__v4sf) __B,
6741 (__v4sf) __W,
6742 (__mmask8) __U,
6744}
6745
6746#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6747 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6748 (__v4sf)(__m128)(B), \
6749 (__v4sf)(__m128)(W), (__mmask8)(U), \
6750 (int)(R)))
6751
6752static __inline__ __m128 __DEFAULT_FN_ATTRS128
6753_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6754{
6755 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6756 (__v4sf) __B,
6757 (__v4sf) _mm_setzero_ps (),
6758 (__mmask8) __U,
6760}
6761
6762#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6763 ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6764 (__v4sf)(__m128)(B), \
6765 (__v4sf)_mm_setzero_ps(), \
6766 (__mmask8)(U), (int)(R)))
6767
6768static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
6770 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6771 0, 1, 2, 3, 0, 1, 2, 3,
6772 0, 1, 2, 3, 0, 1, 2, 3);
6773}
6774
6775static __inline__ __m512 __DEFAULT_FN_ATTRS512
6776_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
6777{
6778 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6779 (__v16sf)_mm512_broadcast_f32x4(__A),
6780 (__v16sf)__O);
6781}
6782
6783static __inline__ __m512 __DEFAULT_FN_ATTRS512
6785{
6786 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6787 (__v16sf)_mm512_broadcast_f32x4(__A),
6788 (__v16sf)_mm512_setzero_ps());
6789}
6790
6791static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
6793 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6794 0, 1, 2, 3, 0, 1, 2, 3);
6795}
6796
6797static __inline__ __m512d __DEFAULT_FN_ATTRS512
6798_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
6799{
6800 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6801 (__v8df)_mm512_broadcast_f64x4(__A),
6802 (__v8df)__O);
6803}
6804
6805static __inline__ __m512d __DEFAULT_FN_ATTRS512
6807{
6808 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6809 (__v8df)_mm512_broadcast_f64x4(__A),
6810 (__v8df)_mm512_setzero_pd());
6811}
6812
6813static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6815 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6816 0, 1, 2, 3, 0, 1, 2, 3,
6817 0, 1, 2, 3, 0, 1, 2, 3);
6818}
6819
6820static __inline__ __m512i __DEFAULT_FN_ATTRS512
6821_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
6822{
6823 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6824 (__v16si)_mm512_broadcast_i32x4(__A),
6825 (__v16si)__O);
6826}
6827
6828static __inline__ __m512i __DEFAULT_FN_ATTRS512
6830{
6831 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6832 (__v16si)_mm512_broadcast_i32x4(__A),
6833 (__v16si)_mm512_setzero_si512());
6834}
6835
6836static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
6838 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6839 0, 1, 2, 3, 0, 1, 2, 3);
6840}
6841
6842static __inline__ __m512i __DEFAULT_FN_ATTRS512
6843_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
6844{
6845 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6846 (__v8di)_mm512_broadcast_i64x4(__A),
6847 (__v8di)__O);
6848}
6849
6850static __inline__ __m512i __DEFAULT_FN_ATTRS512
6852{
6853 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6854 (__v8di)_mm512_broadcast_i64x4(__A),
6855 (__v8di)_mm512_setzero_si512());
6856}
6857
6858static __inline__ __m512d __DEFAULT_FN_ATTRS512
6859_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
6860{
6861 return (__m512d)__builtin_ia32_selectpd_512(__M,
6862 (__v8df) _mm512_broadcastsd_pd(__A),
6863 (__v8df) __O);
6864}
6865
6866static __inline__ __m512d __DEFAULT_FN_ATTRS512
6868{
6869 return (__m512d)__builtin_ia32_selectpd_512(__M,
6870 (__v8df) _mm512_broadcastsd_pd(__A),
6871 (__v8df) _mm512_setzero_pd());
6872}
6873
6874static __inline__ __m512 __DEFAULT_FN_ATTRS512
6875_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
6876{
6877 return (__m512)__builtin_ia32_selectps_512(__M,
6878 (__v16sf) _mm512_broadcastss_ps(__A),
6879 (__v16sf) __O);
6880}
6881
6882static __inline__ __m512 __DEFAULT_FN_ATTRS512
6884{
6885 return (__m512)__builtin_ia32_selectps_512(__M,
6886 (__v16sf) _mm512_broadcastss_ps(__A),
6887 (__v16sf) _mm512_setzero_ps());
6888}
6889
6890static __inline__ __m128i __DEFAULT_FN_ATTRS512
6892{
6893 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6894 (__v16qi) _mm_undefined_si128 (),
6895 (__mmask16) -1);
6896}
6897
6898static __inline__ __m128i __DEFAULT_FN_ATTRS512
6899_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6900{
6901 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6902 (__v16qi) __O, __M);
6903}
6904
6905static __inline__ __m128i __DEFAULT_FN_ATTRS512
6907{
6908 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6909 (__v16qi) _mm_setzero_si128 (),
6910 __M);
6911}
6912
6913static __inline__ void __DEFAULT_FN_ATTRS512
6915{
6916 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6917}
6918
6919static __inline__ __m256i __DEFAULT_FN_ATTRS512
6921{
6922 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6923 (__v16hi) _mm256_undefined_si256 (),
6924 (__mmask16) -1);
6925}
6926
6927static __inline__ __m256i __DEFAULT_FN_ATTRS512
6928_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6929{
6930 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6931 (__v16hi) __O, __M);
6932}
6933
6934static __inline__ __m256i __DEFAULT_FN_ATTRS512
6936{
6937 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6938 (__v16hi) _mm256_setzero_si256 (),
6939 __M);
6940}
6941
6942static __inline__ void __DEFAULT_FN_ATTRS512
6944{
6945 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6946}
6947
6948static __inline__ __m128i __DEFAULT_FN_ATTRS512
6950{
6951 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6952 (__v16qi) _mm_undefined_si128 (),
6953 (__mmask8) -1);
6954}
6955
6956static __inline__ __m128i __DEFAULT_FN_ATTRS512
6957_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6958{
6959 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6960 (__v16qi) __O, __M);
6961}
6962
6963static __inline__ __m128i __DEFAULT_FN_ATTRS512
6965{
6966 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6967 (__v16qi) _mm_setzero_si128 (),
6968 __M);
6969}
6970
6971static __inline__ void __DEFAULT_FN_ATTRS512
6973{
6974 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6975}
6976
6977static __inline__ __m256i __DEFAULT_FN_ATTRS512
6979{
6980 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6981 (__v8si) _mm256_undefined_si256 (),
6982 (__mmask8) -1);
6983}
6984
6985static __inline__ __m256i __DEFAULT_FN_ATTRS512
6986_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
6987{
6988 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6989 (__v8si) __O, __M);
6990}
6991
6992static __inline__ __m256i __DEFAULT_FN_ATTRS512
6994{
6995 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
6996 (__v8si) _mm256_setzero_si256 (),
6997 __M);
6998}
6999
7000static __inline__ void __DEFAULT_FN_ATTRS512
7002{
7003 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7004}
7005
7006static __inline__ __m128i __DEFAULT_FN_ATTRS512
7008{
7009 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7010 (__v8hi) _mm_undefined_si128 (),
7011 (__mmask8) -1);
7012}
7013
7014static __inline__ __m128i __DEFAULT_FN_ATTRS512
7015_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7016{
7017 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7018 (__v8hi) __O, __M);
7019}
7020
7021static __inline__ __m128i __DEFAULT_FN_ATTRS512
7023{
7024 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7025 (__v8hi) _mm_setzero_si128 (),
7026 __M);
7027}
7028
7029static __inline__ void __DEFAULT_FN_ATTRS512
7031{
7032 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7033}
7034
7035static __inline__ __m128i __DEFAULT_FN_ATTRS512
7037{
7038 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7039 (__v16qi) _mm_undefined_si128 (),
7040 (__mmask16) -1);
7041}
7042
7043static __inline__ __m128i __DEFAULT_FN_ATTRS512
7044_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7045{
7046 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7047 (__v16qi) __O,
7048 __M);
7049}
7050
7051static __inline__ __m128i __DEFAULT_FN_ATTRS512
7053{
7054 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7055 (__v16qi) _mm_setzero_si128 (),
7056 __M);
7057}
7058
7059static __inline__ void __DEFAULT_FN_ATTRS512
7061{
7062 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7063}
7064
7065static __inline__ __m256i __DEFAULT_FN_ATTRS512
7067{
7068 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7069 (__v16hi) _mm256_undefined_si256 (),
7070 (__mmask16) -1);
7071}
7072
7073static __inline__ __m256i __DEFAULT_FN_ATTRS512
7074_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7075{
7076 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7077 (__v16hi) __O,
7078 __M);
7079}
7080
7081static __inline__ __m256i __DEFAULT_FN_ATTRS512
7083{
7084 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7085 (__v16hi) _mm256_setzero_si256 (),
7086 __M);
7087}
7088
7089static __inline__ void __DEFAULT_FN_ATTRS512
7091{
7092 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7093}
7094
7095static __inline__ __m128i __DEFAULT_FN_ATTRS512
7097{
7098 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7099 (__v16qi) _mm_undefined_si128 (),
7100 (__mmask8) -1);
7101}
7102
7103static __inline__ __m128i __DEFAULT_FN_ATTRS512
7104_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7105{
7106 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7107 (__v16qi) __O,
7108 __M);
7109}
7110
7111static __inline__ __m128i __DEFAULT_FN_ATTRS512
7113{
7114 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7115 (__v16qi) _mm_setzero_si128 (),
7116 __M);
7117}
7118
7119static __inline__ void __DEFAULT_FN_ATTRS512
7121{
7122 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7123}
7124
7125static __inline__ __m256i __DEFAULT_FN_ATTRS512
7127{
7128 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7129 (__v8si) _mm256_undefined_si256 (),
7130 (__mmask8) -1);
7131}
7132
7133static __inline__ __m256i __DEFAULT_FN_ATTRS512
7134_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7135{
7136 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7137 (__v8si) __O, __M);
7138}
7139
7140static __inline__ __m256i __DEFAULT_FN_ATTRS512
7142{
7143 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7144 (__v8si) _mm256_setzero_si256 (),
7145 __M);
7146}
7147
7148static __inline__ void __DEFAULT_FN_ATTRS512
7150{
7151 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7152}
7153
7154static __inline__ __m128i __DEFAULT_FN_ATTRS512
7156{
7157 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7158 (__v8hi) _mm_undefined_si128 (),
7159 (__mmask8) -1);
7160}
7161
7162static __inline__ __m128i __DEFAULT_FN_ATTRS512
7163_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7164{
7165 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7166 (__v8hi) __O, __M);
7167}
7168
7169static __inline__ __m128i __DEFAULT_FN_ATTRS512
7171{
7172 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7173 (__v8hi) _mm_setzero_si128 (),
7174 __M);
7175}
7176
7177static __inline__ void __DEFAULT_FN_ATTRS512
7179{
7180 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7181}
7182
7183static __inline__ __m128i __DEFAULT_FN_ATTRS512
7185{
7186 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7187 (__v16qi) _mm_undefined_si128 (),
7188 (__mmask16) -1);
7189}
7190
7191static __inline__ __m128i __DEFAULT_FN_ATTRS512
7192_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7193{
7194 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7195 (__v16qi) __O, __M);
7196}
7197
7198static __inline__ __m128i __DEFAULT_FN_ATTRS512
7200{
7201 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7202 (__v16qi) _mm_setzero_si128 (),
7203 __M);
7204}
7205
7206static __inline__ void __DEFAULT_FN_ATTRS512
7208{
7209 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7210}
7211
7212static __inline__ __m256i __DEFAULT_FN_ATTRS512
7214{
7215 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7216 (__v16hi) _mm256_undefined_si256 (),
7217 (__mmask16) -1);
7218}
7219
7220static __inline__ __m256i __DEFAULT_FN_ATTRS512
7221_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7222{
7223 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7224 (__v16hi) __O, __M);
7225}
7226
7227static __inline__ __m256i __DEFAULT_FN_ATTRS512
7229{
7230 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7231 (__v16hi) _mm256_setzero_si256 (),
7232 __M);
7233}
7234
7235static __inline__ void __DEFAULT_FN_ATTRS512
7237{
7238 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7239}
7240
7241static __inline__ __m128i __DEFAULT_FN_ATTRS512
7243{
7244 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7245 (__v16qi) _mm_undefined_si128 (),
7246 (__mmask8) -1);
7247}
7248
7249static __inline__ __m128i __DEFAULT_FN_ATTRS512
7250_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7251{
7252 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7253 (__v16qi) __O, __M);
7254}
7255
7256static __inline__ __m128i __DEFAULT_FN_ATTRS512
7258{
7259 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7260 (__v16qi) _mm_setzero_si128 (),
7261 __M);
7262}
7263
7264static __inline__ void __DEFAULT_FN_ATTRS512
7266{
7267 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7268}
7269
7270static __inline__ __m256i __DEFAULT_FN_ATTRS512
7272{
7273 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7274 (__v8si) _mm256_undefined_si256 (),
7275 (__mmask8) -1);
7276}
7277
7278static __inline__ __m256i __DEFAULT_FN_ATTRS512
7279_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7280{
7281 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7282 (__v8si) __O, __M);
7283}
7284
7285static __inline__ __m256i __DEFAULT_FN_ATTRS512
7287{
7288 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7289 (__v8si) _mm256_setzero_si256 (),
7290 __M);
7291}
7292
7293static __inline__ void __DEFAULT_FN_ATTRS512
7295{
7296 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7297}
7298
7299static __inline__ __m128i __DEFAULT_FN_ATTRS512
7301{
7302 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7303 (__v8hi) _mm_undefined_si128 (),
7304 (__mmask8) -1);
7305}
7306
7307static __inline__ __m128i __DEFAULT_FN_ATTRS512
7308_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7309{
7310 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7311 (__v8hi) __O, __M);
7312}
7313
7314static __inline__ __m128i __DEFAULT_FN_ATTRS512
7316{
7317 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7318 (__v8hi) _mm_setzero_si128 (),
7319 __M);
7320}
7321
7322static __inline__ void __DEFAULT_FN_ATTRS512
7324{
7325 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7326}
7327
7328#define _mm512_extracti32x4_epi32(A, imm) \
7329 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7330 (__v4si)_mm_undefined_si128(), \
7331 (__mmask8)-1))
7332
7333#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7334 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7335 (__v4si)(__m128i)(W), \
7336 (__mmask8)(U)))
7337
7338#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7339 ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7340 (__v4si)_mm_setzero_si128(), \
7341 (__mmask8)(U)))
7342
7343#define _mm512_extracti64x4_epi64(A, imm) \
7344 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7345 (__v4di)_mm256_undefined_si256(), \
7346 (__mmask8)-1))
7347
7348#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7349 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7350 (__v4di)(__m256i)(W), \
7351 (__mmask8)(U)))
7352
7353#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7354 ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7355 (__v4di)_mm256_setzero_si256(), \
7356 (__mmask8)(U)))
7357
7358#define _mm512_insertf64x4(A, B, imm) \
7359 ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7360 (__v4df)(__m256d)(B), (int)(imm)))
7361
7362#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7363 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7364 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7365 (__v8df)(__m512d)(W)))
7366
7367#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7368 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7369 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7370 (__v8df)_mm512_setzero_pd()))
7371
7372#define _mm512_inserti64x4(A, B, imm) \
7373 ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7374 (__v4di)(__m256i)(B), (int)(imm)))
7375
7376#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7377 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7378 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7379 (__v8di)(__m512i)(W)))
7380
7381#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7382 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7383 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7384 (__v8di)_mm512_setzero_si512()))
7385
7386#define _mm512_insertf32x4(A, B, imm) \
7387 ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7388 (__v4sf)(__m128)(B), (int)(imm)))
7389
7390#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7391 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7392 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7393 (__v16sf)(__m512)(W)))
7394
7395#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7396 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7397 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7398 (__v16sf)_mm512_setzero_ps()))
7399
7400#define _mm512_inserti32x4(A, B, imm) \
7401 ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7402 (__v4si)(__m128i)(B), (int)(imm)))
7403
7404#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7405 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7406 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7407 (__v16si)(__m512i)(W)))
7408
7409#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7410 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7411 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7412 (__v16si)_mm512_setzero_si512()))
7413
7414#define _mm512_getmant_round_pd(A, B, C, R) \
7415 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7416 (int)(((C)<<2) | (B)), \
7417 (__v8df)_mm512_undefined_pd(), \
7418 (__mmask8)-1, (int)(R)))
7419
7420#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7421 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7422 (int)(((C)<<2) | (B)), \
7423 (__v8df)(__m512d)(W), \
7424 (__mmask8)(U), (int)(R)))
7425
7426#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7427 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7428 (int)(((C)<<2) | (B)), \
7429 (__v8df)_mm512_setzero_pd(), \
7430 (__mmask8)(U), (int)(R)))
7431
7432#define _mm512_getmant_pd(A, B, C) \
7433 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7434 (int)(((C)<<2) | (B)), \
7435 (__v8df)_mm512_setzero_pd(), \
7436 (__mmask8)-1, \
7437 _MM_FROUND_CUR_DIRECTION))
7438
7439#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7440 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7441 (int)(((C)<<2) | (B)), \
7442 (__v8df)(__m512d)(W), \
7443 (__mmask8)(U), \
7444 _MM_FROUND_CUR_DIRECTION))
7445
7446#define _mm512_maskz_getmant_pd(U, A, B, C) \
7447 ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7448 (int)(((C)<<2) | (B)), \
7449 (__v8df)_mm512_setzero_pd(), \
7450 (__mmask8)(U), \
7451 _MM_FROUND_CUR_DIRECTION))
7452
7453#define _mm512_getmant_round_ps(A, B, C, R) \
7454 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7455 (int)(((C)<<2) | (B)), \
7456 (__v16sf)_mm512_undefined_ps(), \
7457 (__mmask16)-1, (int)(R)))
7458
7459#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7460 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7461 (int)(((C)<<2) | (B)), \
7462 (__v16sf)(__m512)(W), \
7463 (__mmask16)(U), (int)(R)))
7464
7465#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7466 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7467 (int)(((C)<<2) | (B)), \
7468 (__v16sf)_mm512_setzero_ps(), \
7469 (__mmask16)(U), (int)(R)))
7470
7471#define _mm512_getmant_ps(A, B, C) \
7472 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7473 (int)(((C)<<2)|(B)), \
7474 (__v16sf)_mm512_undefined_ps(), \
7475 (__mmask16)-1, \
7476 _MM_FROUND_CUR_DIRECTION))
7477
7478#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7479 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7480 (int)(((C)<<2)|(B)), \
7481 (__v16sf)(__m512)(W), \
7482 (__mmask16)(U), \
7483 _MM_FROUND_CUR_DIRECTION))
7484
7485#define _mm512_maskz_getmant_ps(U, A, B, C) \
7486 ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7487 (int)(((C)<<2)|(B)), \
7488 (__v16sf)_mm512_setzero_ps(), \
7489 (__mmask16)(U), \
7490 _MM_FROUND_CUR_DIRECTION))
7491
7492#define _mm512_getexp_round_pd(A, R) \
7493 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7494 (__v8df)_mm512_undefined_pd(), \
7495 (__mmask8)-1, (int)(R)))
7496
7497#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7498 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7499 (__v8df)(__m512d)(W), \
7500 (__mmask8)(U), (int)(R)))
7501
7502#define _mm512_maskz_getexp_round_pd(U, A, R) \
7503 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7504 (__v8df)_mm512_setzero_pd(), \
7505 (__mmask8)(U), (int)(R)))
7506
7507static __inline__ __m512d __DEFAULT_FN_ATTRS512
7508_mm512_getexp_pd (__m512d __A)
7509{
7510 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7511 (__v8df) _mm512_undefined_pd (),
7512 (__mmask8) -1,
7514}
7515
7516static __inline__ __m512d __DEFAULT_FN_ATTRS512
7517_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7518{
7519 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7520 (__v8df) __W,
7521 (__mmask8) __U,
7523}
7524
7525static __inline__ __m512d __DEFAULT_FN_ATTRS512
7527{
7528 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7529 (__v8df) _mm512_setzero_pd (),
7530 (__mmask8) __U,
7532}
7533
7534#define _mm512_getexp_round_ps(A, R) \
7535 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7536 (__v16sf)_mm512_undefined_ps(), \
7537 (__mmask16)-1, (int)(R)))
7538
7539#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7540 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7541 (__v16sf)(__m512)(W), \
7542 (__mmask16)(U), (int)(R)))
7543
7544#define _mm512_maskz_getexp_round_ps(U, A, R) \
7545 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7546 (__v16sf)_mm512_setzero_ps(), \
7547 (__mmask16)(U), (int)(R)))
7548
7549static __inline__ __m512 __DEFAULT_FN_ATTRS512
7551{
7552 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7553 (__v16sf) _mm512_undefined_ps (),
7554 (__mmask16) -1,
7556}
7557
7558static __inline__ __m512 __DEFAULT_FN_ATTRS512
7559_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7560{
7561 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7562 (__v16sf) __W,
7563 (__mmask16) __U,
7565}
7566
7567static __inline__ __m512 __DEFAULT_FN_ATTRS512
7569{
7570 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7571 (__v16sf) _mm512_setzero_ps (),
7572 (__mmask16) __U,
7574}
7575
7576#define _mm512_i64gather_ps(index, addr, scale) \
7577 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7578 (void const *)(addr), \
7579 (__v8di)(__m512i)(index), (__mmask8)-1, \
7580 (int)(scale)))
7581
7582#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7583 ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7584 (void const *)(addr), \
7585 (__v8di)(__m512i)(index), \
7586 (__mmask8)(mask), (int)(scale)))
7587
7588#define _mm512_i64gather_epi32(index, addr, scale) \
7589 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7590 (void const *)(addr), \
7591 (__v8di)(__m512i)(index), \
7592 (__mmask8)-1, (int)(scale)))
7593
7594#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7595 ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7596 (void const *)(addr), \
7597 (__v8di)(__m512i)(index), \
7598 (__mmask8)(mask), (int)(scale)))
7599
7600#define _mm512_i64gather_pd(index, addr, scale) \
7601 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7602 (void const *)(addr), \
7603 (__v8di)(__m512i)(index), (__mmask8)-1, \
7604 (int)(scale)))
7605
7606#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7607 ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7608 (void const *)(addr), \
7609 (__v8di)(__m512i)(index), \
7610 (__mmask8)(mask), (int)(scale)))
7611
7612#define _mm512_i64gather_epi64(index, addr, scale) \
7613 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7614 (void const *)(addr), \
7615 (__v8di)(__m512i)(index), (__mmask8)-1, \
7616 (int)(scale)))
7617
7618#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7619 ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7620 (void const *)(addr), \
7621 (__v8di)(__m512i)(index), \
7622 (__mmask8)(mask), (int)(scale)))
7623
7624#define _mm512_i32gather_ps(index, addr, scale) \
7625 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7626 (void const *)(addr), \
7627 (__v16si)(__m512)(index), \
7628 (__mmask16)-1, (int)(scale)))
7629
7630#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7631 ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7632 (void const *)(addr), \
7633 (__v16si)(__m512)(index), \
7634 (__mmask16)(mask), (int)(scale)))
7635
7636#define _mm512_i32gather_epi32(index, addr, scale) \
7637 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7638 (void const *)(addr), \
7639 (__v16si)(__m512i)(index), \
7640 (__mmask16)-1, (int)(scale)))
7641
7642#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7643 ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7644 (void const *)(addr), \
7645 (__v16si)(__m512i)(index), \
7646 (__mmask16)(mask), (int)(scale)))
7647
7648#define _mm512_i32gather_pd(index, addr, scale) \
7649 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7650 (void const *)(addr), \
7651 (__v8si)(__m256i)(index), (__mmask8)-1, \
7652 (int)(scale)))
7653
7654#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7655 ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7656 (void const *)(addr), \
7657 (__v8si)(__m256i)(index), \
7658 (__mmask8)(mask), (int)(scale)))
7659
7660#define _mm512_i32gather_epi64(index, addr, scale) \
7661 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7662 (void const *)(addr), \
7663 (__v8si)(__m256i)(index), (__mmask8)-1, \
7664 (int)(scale)))
7665
7666#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7667 ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7668 (void const *)(addr), \
7669 (__v8si)(__m256i)(index), \
7670 (__mmask8)(mask), (int)(scale)))
7671
7672#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7673 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7674 (__v8di)(__m512i)(index), \
7675 (__v8sf)(__m256)(v1), (int)(scale))
7676
7677#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7678 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7679 (__v8di)(__m512i)(index), \
7680 (__v8sf)(__m256)(v1), (int)(scale))
7681
7682#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7683 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7684 (__v8di)(__m512i)(index), \
7685 (__v8si)(__m256i)(v1), (int)(scale))
7686
7687#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7688 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7689 (__v8di)(__m512i)(index), \
7690 (__v8si)(__m256i)(v1), (int)(scale))
7691
7692#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7693 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7694 (__v8di)(__m512i)(index), \
7695 (__v8df)(__m512d)(v1), (int)(scale))
7696
7697#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7698 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7699 (__v8di)(__m512i)(index), \
7700 (__v8df)(__m512d)(v1), (int)(scale))
7701
7702#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7703 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7704 (__v8di)(__m512i)(index), \
7705 (__v8di)(__m512i)(v1), (int)(scale))
7706
7707#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7708 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7709 (__v8di)(__m512i)(index), \
7710 (__v8di)(__m512i)(v1), (int)(scale))
7711
7712#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7713 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7714 (__v16si)(__m512i)(index), \
7715 (__v16sf)(__m512)(v1), (int)(scale))
7716
7717#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7718 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7719 (__v16si)(__m512i)(index), \
7720 (__v16sf)(__m512)(v1), (int)(scale))
7721
7722#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7723 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7724 (__v16si)(__m512i)(index), \
7725 (__v16si)(__m512i)(v1), (int)(scale))
7726
7727#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7728 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7729 (__v16si)(__m512i)(index), \
7730 (__v16si)(__m512i)(v1), (int)(scale))
7731
7732#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7733 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7734 (__v8si)(__m256i)(index), \
7735 (__v8df)(__m512d)(v1), (int)(scale))
7736
7737#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7738 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7739 (__v8si)(__m256i)(index), \
7740 (__v8df)(__m512d)(v1), (int)(scale))
7741
7742#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7743 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7744 (__v8si)(__m256i)(index), \
7745 (__v8di)(__m512i)(v1), (int)(scale))
7746
7747#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7748 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7749 (__v8si)(__m256i)(index), \
7750 (__v8di)(__m512i)(v1), (int)(scale))
7751
7752static __inline__ __m128 __DEFAULT_FN_ATTRS128
7753_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7754{
7755 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7756 (__v4sf)__A,
7757 (__v4sf)__B,
7758 (__mmask8)__U,
7760}
7761
7762#define _mm_fmadd_round_ss(A, B, C, R) \
7763 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7764 (__v4sf)(__m128)(B), \
7765 (__v4sf)(__m128)(C), (__mmask8)-1, \
7766 (int)(R)))
7767
7768#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7769 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7770 (__v4sf)(__m128)(A), \
7771 (__v4sf)(__m128)(B), (__mmask8)(U), \
7772 (int)(R)))
7773
7774static __inline__ __m128 __DEFAULT_FN_ATTRS128
7775_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7776{
7777 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7778 (__v4sf)__B,
7779 (__v4sf)__C,
7780 (__mmask8)__U,
7782}
7783
7784#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7785 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7786 (__v4sf)(__m128)(B), \
7787 (__v4sf)(__m128)(C), (__mmask8)(U), \
7788 (int)(R)))
7789
7790static __inline__ __m128 __DEFAULT_FN_ATTRS128
7791_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7792{
7793 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7794 (__v4sf)__X,
7795 (__v4sf)__Y,
7796 (__mmask8)__U,
7798}
7799
7800#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7801 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7802 (__v4sf)(__m128)(X), \
7803 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7804 (int)(R)))
7805
7806static __inline__ __m128 __DEFAULT_FN_ATTRS128
7807_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7808{
7809 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7810 (__v4sf)__A,
7811 -(__v4sf)__B,
7812 (__mmask8)__U,
7814}
7815
7816#define _mm_fmsub_round_ss(A, B, C, R) \
7817 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7818 (__v4sf)(__m128)(B), \
7819 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7820 (int)(R)))
7821
7822#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7823 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7824 (__v4sf)(__m128)(A), \
7825 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7826 (int)(R)))
7827
7828static __inline__ __m128 __DEFAULT_FN_ATTRS128
7829_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7830{
7831 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7832 (__v4sf)__B,
7833 -(__v4sf)__C,
7834 (__mmask8)__U,
7836}
7837
7838#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7839 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7840 (__v4sf)(__m128)(B), \
7841 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7842 (int)(R)))
7843
7844static __inline__ __m128 __DEFAULT_FN_ATTRS128
7845_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7846{
7847 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7848 (__v4sf)__X,
7849 (__v4sf)__Y,
7850 (__mmask8)__U,
7852}
7853
7854#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7855 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7856 (__v4sf)(__m128)(X), \
7857 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7858 (int)(R)))
7859
7860static __inline__ __m128 __DEFAULT_FN_ATTRS128
7861_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7862{
7863 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7864 -(__v4sf)__A,
7865 (__v4sf)__B,
7866 (__mmask8)__U,
7868}
7869
7870#define _mm_fnmadd_round_ss(A, B, C, R) \
7871 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7872 -(__v4sf)(__m128)(B), \
7873 (__v4sf)(__m128)(C), (__mmask8)-1, \
7874 (int)(R)))
7875
7876#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7877 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7878 -(__v4sf)(__m128)(A), \
7879 (__v4sf)(__m128)(B), (__mmask8)(U), \
7880 (int)(R)))
7881
7882static __inline__ __m128 __DEFAULT_FN_ATTRS128
7883_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7884{
7885 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7886 -(__v4sf)__B,
7887 (__v4sf)__C,
7888 (__mmask8)__U,
7890}
7891
7892#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7893 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7894 -(__v4sf)(__m128)(B), \
7895 (__v4sf)(__m128)(C), (__mmask8)(U), \
7896 (int)(R)))
7897
7898static __inline__ __m128 __DEFAULT_FN_ATTRS128
7899_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7900{
7901 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7902 -(__v4sf)__X,
7903 (__v4sf)__Y,
7904 (__mmask8)__U,
7906}
7907
7908#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7909 ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7910 -(__v4sf)(__m128)(X), \
7911 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7912 (int)(R)))
7913
7914static __inline__ __m128 __DEFAULT_FN_ATTRS128
7915_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7916{
7917 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7918 -(__v4sf)__A,
7919 -(__v4sf)__B,
7920 (__mmask8)__U,
7922}
7923
7924#define _mm_fnmsub_round_ss(A, B, C, R) \
7925 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7926 -(__v4sf)(__m128)(B), \
7927 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7928 (int)(R)))
7929
7930#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7931 ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7932 -(__v4sf)(__m128)(A), \
7933 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7934 (int)(R)))
7935
7936static __inline__ __m128 __DEFAULT_FN_ATTRS128
7937_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7938{
7939 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7940 -(__v4sf)__B,
7941 -(__v4sf)__C,
7942 (__mmask8)__U,
7944}
7945
7946#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
7947 ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7948 -(__v4sf)(__m128)(B), \
7949 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7950 (int)(R)))
7951
7952static __inline__ __m128 __DEFAULT_FN_ATTRS128
7953_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7954{
7955 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7956 -(__v4sf)__X,
7957 (__v4sf)__Y,
7958 (__mmask8)__U,
7960}
7961
7962#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
7963 ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7964 -(__v4sf)(__m128)(X), \
7965 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7966 (int)(R)))
7967
7968static __inline__ __m128d __DEFAULT_FN_ATTRS128
7969_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7970{
7971 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7972 (__v2df)__A,
7973 (__v2df)__B,
7974 (__mmask8)__U,
7976}
7977
7978#define _mm_fmadd_round_sd(A, B, C, R) \
7979 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
7980 (__v2df)(__m128d)(B), \
7981 (__v2df)(__m128d)(C), (__mmask8)-1, \
7982 (int)(R)))
7983
7984#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
7985 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
7986 (__v2df)(__m128d)(A), \
7987 (__v2df)(__m128d)(B), (__mmask8)(U), \
7988 (int)(R)))
7989
7990static __inline__ __m128d __DEFAULT_FN_ATTRS128
7991_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
7992{
7993 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
7994 (__v2df)__B,
7995 (__v2df)__C,
7996 (__mmask8)__U,
7998}
7999
8000#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
8001 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8002 (__v2df)(__m128d)(B), \
8003 (__v2df)(__m128d)(C), (__mmask8)(U), \
8004 (int)(R)))
8005
8006static __inline__ __m128d __DEFAULT_FN_ATTRS128
8007_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8008{
8009 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8010 (__v2df)__X,
8011 (__v2df)__Y,
8012 (__mmask8)__U,
8014}
8015
8016#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
8017 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8018 (__v2df)(__m128d)(X), \
8019 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8020 (int)(R)))
8021
8022static __inline__ __m128d __DEFAULT_FN_ATTRS128
8023_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8024{
8025 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8026 (__v2df)__A,
8027 -(__v2df)__B,
8028 (__mmask8)__U,
8030}
8031
8032#define _mm_fmsub_round_sd(A, B, C, R) \
8033 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8034 (__v2df)(__m128d)(B), \
8035 -(__v2df)(__m128d)(C), (__mmask8)-1, \
8036 (int)(R)))
8037
8038#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
8039 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8040 (__v2df)(__m128d)(A), \
8041 -(__v2df)(__m128d)(B), (__mmask8)(U), \
8042 (int)(R)))
8043
8044static __inline__ __m128d __DEFAULT_FN_ATTRS128
8045_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8046{
8047 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8048 (__v2df)__B,
8049 -(__v2df)__C,
8050 (__mmask8)__U,
8052}
8053
8054#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
8055 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8056 (__v2df)(__m128d)(B), \
8057 -(__v2df)(__m128d)(C), \
8058 (__mmask8)(U), (int)(R)))
8059
8060static __inline__ __m128d __DEFAULT_FN_ATTRS128
8061_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8062{
8063 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8064 (__v2df)__X,
8065 (__v2df)__Y,
8066 (__mmask8)__U,
8068}
8069
8070#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
8071 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8072 (__v2df)(__m128d)(X), \
8073 (__v2df)(__m128d)(Y), \
8074 (__mmask8)(U), (int)(R)))
8075
8076static __inline__ __m128d __DEFAULT_FN_ATTRS128
8077_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8078{
8079 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8080 -(__v2df)__A,
8081 (__v2df)__B,
8082 (__mmask8)__U,
8084}
8085
8086#define _mm_fnmadd_round_sd(A, B, C, R) \
8087 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8088 -(__v2df)(__m128d)(B), \
8089 (__v2df)(__m128d)(C), (__mmask8)-1, \
8090 (int)(R)))
8091
8092#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
8093 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8094 -(__v2df)(__m128d)(A), \
8095 (__v2df)(__m128d)(B), (__mmask8)(U), \
8096 (int)(R)))
8097
8098static __inline__ __m128d __DEFAULT_FN_ATTRS128
8099_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8100{
8101 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8102 -(__v2df)__B,
8103 (__v2df)__C,
8104 (__mmask8)__U,
8106}
8107
8108#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
8109 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8110 -(__v2df)(__m128d)(B), \
8111 (__v2df)(__m128d)(C), (__mmask8)(U), \
8112 (int)(R)))
8113
8114static __inline__ __m128d __DEFAULT_FN_ATTRS128
8115_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8116{
8117 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8118 -(__v2df)__X,
8119 (__v2df)__Y,
8120 (__mmask8)__U,
8122}
8123
8124#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
8125 ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8126 -(__v2df)(__m128d)(X), \
8127 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8128 (int)(R)))
8129
8130static __inline__ __m128d __DEFAULT_FN_ATTRS128
8131_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8132{
8133 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8134 -(__v2df)__A,
8135 -(__v2df)__B,
8136 (__mmask8)__U,
8138}
8139
8140#define _mm_fnmsub_round_sd(A, B, C, R) \
8141 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8142 -(__v2df)(__m128d)(B), \
8143 -(__v2df)(__m128d)(C), (__mmask8)-1, \
8144 (int)(R)))
8145
8146#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
8147 ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8148 -(__v2df)(__m128d)(A), \
8149 -(__v2df)(__m128d)(B), (__mmask8)(U), \
8150 (int)(R)))
8151
8152static __inline__ __m128d __DEFAULT_FN_ATTRS128
8153_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8154{
8155 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8156 -(__v2df)__B,
8157 -(__v2df)__C,
8158 (__mmask8)__U,
8160}
8161
8162#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
8163 ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8164 -(__v2df)(__m128d)(B), \
8165 -(__v2df)(__m128d)(C), \
8166 (__mmask8)(U), \
8167 (int)(R)))
8168
8169static __inline__ __m128d __DEFAULT_FN_ATTRS128
8170_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8171{
8172 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8173 -(__v2df)__X,
8174 (__v2df)__Y,
8175 (__mmask8)__U,
8177}
8178
8179#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
8180 ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8181 -(__v2df)(__m128d)(X), \
8182 (__v2df)(__m128d)(Y), \
8183 (__mmask8)(U), (int)(R)))
8184
8185#define _mm512_permutex_pd(X, C) \
8186 ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
8187
8188#define _mm512_mask_permutex_pd(W, U, X, C) \
8189 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8190 (__v8df)_mm512_permutex_pd((X), (C)), \
8191 (__v8df)(__m512d)(W)))
8192
8193#define _mm512_maskz_permutex_pd(U, X, C) \
8194 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8195 (__v8df)_mm512_permutex_pd((X), (C)), \
8196 (__v8df)_mm512_setzero_pd()))
8197
8198#define _mm512_permutex_epi64(X, C) \
8199 ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
8200
8201#define _mm512_mask_permutex_epi64(W, U, X, C) \
8202 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8203 (__v8di)_mm512_permutex_epi64((X), (C)), \
8204 (__v8di)(__m512i)(W)))
8205
8206#define _mm512_maskz_permutex_epi64(U, X, C) \
8207 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8208 (__v8di)_mm512_permutex_epi64((X), (C)), \
8209 (__v8di)_mm512_setzero_si512()))
8210
8211static __inline__ __m512d __DEFAULT_FN_ATTRS512
8212_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8213{
8214 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
8215}
8216
8217static __inline__ __m512d __DEFAULT_FN_ATTRS512
8218_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8219{
8220 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8221 (__v8df)_mm512_permutexvar_pd(__X, __Y),
8222 (__v8df)__W);
8223}
8224
8225static __inline__ __m512d __DEFAULT_FN_ATTRS512
8226_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8227{
8228 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8229 (__v8df)_mm512_permutexvar_pd(__X, __Y),
8230 (__v8df)_mm512_setzero_pd());
8231}
8232
8233static __inline__ __m512i __DEFAULT_FN_ATTRS512
8234_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8235{
8236 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8237}
8238
8239static __inline__ __m512i __DEFAULT_FN_ATTRS512
8241{
8242 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8243 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8244 (__v8di)_mm512_setzero_si512());
8245}
8246
8247static __inline__ __m512i __DEFAULT_FN_ATTRS512
8248_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8249 __m512i __Y)
8250{
8251 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8252 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8253 (__v8di)__W);
8254}
8255
8256static __inline__ __m512 __DEFAULT_FN_ATTRS512
8257_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8258{
8259 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8260}
8261
8262static __inline__ __m512 __DEFAULT_FN_ATTRS512
8263_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8264{
8265 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8266 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8267 (__v16sf)__W);
8268}
8269
8270static __inline__ __m512 __DEFAULT_FN_ATTRS512
8272{
8273 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8274 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8275 (__v16sf)_mm512_setzero_ps());
8276}
8277
8278static __inline__ __m512i __DEFAULT_FN_ATTRS512
8279_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8280{
8281 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
8282}
8283
8284#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8285
8286static __inline__ __m512i __DEFAULT_FN_ATTRS512
8288{
8289 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8290 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8291 (__v16si)_mm512_setzero_si512());
8292}
8293
8294static __inline__ __m512i __DEFAULT_FN_ATTRS512
8295_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8296 __m512i __Y)
8297{
8298 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8299 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8300 (__v16si)__W);
8301}
8302
8303#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8304
8305static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8307{
8308 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8309}
8310
8311static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8313{
8314 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8315}
8316
8317static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8319{
8320 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8321}
8322
8323static __inline__ int __DEFAULT_FN_ATTRS
8325{
8326 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8327}
8328
8329static __inline__ int __DEFAULT_FN_ATTRS
8331{
8332 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8333}
8334
8335static __inline__ unsigned char __DEFAULT_FN_ATTRS
8337{
8338 return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8339}
8340
8341static __inline__ unsigned char __DEFAULT_FN_ATTRS
8343{
8344 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8345}
8346
8347static __inline__ unsigned char __DEFAULT_FN_ATTRS
8348_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8349 *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8350 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8351}
8352
8353static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8355{
8356 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8357}
8358
8359static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8361{
8362 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8363}
8364
8365static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8367{
8368 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8369}
8370
8371#define _kand_mask16 _mm512_kand
8372#define _kandn_mask16 _mm512_kandn
8373#define _knot_mask16 _mm512_knot
8374#define _kor_mask16 _mm512_kor
8375#define _kxnor_mask16 _mm512_kxnor
8376#define _kxor_mask16 _mm512_kxor
8377
8378#define _kshiftli_mask16(A, I) \
8379 ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
8380
8381#define _kshiftri_mask16(A, I) \
8382 ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
8383
8384static __inline__ unsigned int __DEFAULT_FN_ATTRS
8386 return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8387}
8388
8389static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8390_cvtu32_mask16(unsigned int __A) {
8391 return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8392}
8393
8394static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8396 return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8397}
8398
8399static __inline__ void __DEFAULT_FN_ATTRS
8401 *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8402}
8403
8404static __inline__ void __DEFAULT_FN_ATTRS512
8405_mm512_stream_si512 (void * __P, __m512i __A)
8406{
8407 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8408 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8409}
8410
8411static __inline__ __m512i __DEFAULT_FN_ATTRS512
8413{
8414 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8415 return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8416}
8417
8418static __inline__ void __DEFAULT_FN_ATTRS512
8419_mm512_stream_pd (void *__P, __m512d __A)
8420{
8421 typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8422 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8423}
8424
8425static __inline__ void __DEFAULT_FN_ATTRS512
8426_mm512_stream_ps (void *__P, __m512 __A)
8427{
8428 typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8429 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8430}
8431
8432static __inline__ __m512d __DEFAULT_FN_ATTRS512
8433_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8434{
8435 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8436 (__v8df) __W,
8437 (__mmask8) __U);
8438}
8439
8440static __inline__ __m512d __DEFAULT_FN_ATTRS512
8442{
8443 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8444 (__v8df)
8446 (__mmask8) __U);
8447}
8448
8449static __inline__ __m512i __DEFAULT_FN_ATTRS512
8450_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8451{
8452 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8453 (__v8di) __W,
8454 (__mmask8) __U);
8455}
8456
8457static __inline__ __m512i __DEFAULT_FN_ATTRS512
8459{
8460 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8461 (__v8di)
8463 (__mmask8) __U);
8464}
8465
8466static __inline__ __m512 __DEFAULT_FN_ATTRS512
8467_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8468{
8469 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8470 (__v16sf) __W,
8471 (__mmask16) __U);
8472}
8473
8474static __inline__ __m512 __DEFAULT_FN_ATTRS512
8476{
8477 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8478 (__v16sf)
8480 (__mmask16) __U);
8481}
8482
8483static __inline__ __m512i __DEFAULT_FN_ATTRS512
8484_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8485{
8486 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8487 (__v16si) __W,
8488 (__mmask16) __U);
8489}
8490
8491static __inline__ __m512i __DEFAULT_FN_ATTRS512
8493{
8494 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8495 (__v16si)
8497 (__mmask16) __U);
8498}
8499
8500#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8501 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8502 (__v4sf)(__m128)(Y), (int)(P), \
8503 (__mmask8)-1, (int)(R)))
8504
8505#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8506 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8507 (__v4sf)(__m128)(Y), (int)(P), \
8508 (__mmask8)(M), (int)(R)))
8509
8510#define _mm_cmp_ss_mask(X, Y, P) \
8511 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8512 (__v4sf)(__m128)(Y), (int)(P), \
8513 (__mmask8)-1, \
8514 _MM_FROUND_CUR_DIRECTION))
8515
8516#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8517 ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8518 (__v4sf)(__m128)(Y), (int)(P), \
8519 (__mmask8)(M), \
8520 _MM_FROUND_CUR_DIRECTION))
8521
8522#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8523 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8524 (__v2df)(__m128d)(Y), (int)(P), \
8525 (__mmask8)-1, (int)(R)))
8526
8527#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8528 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8529 (__v2df)(__m128d)(Y), (int)(P), \
8530 (__mmask8)(M), (int)(R)))
8531
8532#define _mm_cmp_sd_mask(X, Y, P) \
8533 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8534 (__v2df)(__m128d)(Y), (int)(P), \
8535 (__mmask8)-1, \
8536 _MM_FROUND_CUR_DIRECTION))
8537
8538#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8539 ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8540 (__v2df)(__m128d)(Y), (int)(P), \
8541 (__mmask8)(M), \
8542 _MM_FROUND_CUR_DIRECTION))
8543
8544/* Bit Test */
8545
8546static __inline __mmask16 __DEFAULT_FN_ATTRS512
8547_mm512_test_epi32_mask (__m512i __A, __m512i __B)
8548{
8551}
8552
8553static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8554_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8555{
8556 return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8558}
8559
8560static __inline __mmask8 __DEFAULT_FN_ATTRS512
8561_mm512_test_epi64_mask (__m512i __A, __m512i __B)
8562{
8563 return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8565}
8566
8567static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8568_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8569{
8570 return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8572}
8573
8574static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8575_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8576{
8577 return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8579}
8580
8581static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8582_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8583{
8584 return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8586}
8587
8588static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8589_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8590{
8591 return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8593}
8594
8595static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8596_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8597{
8598 return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8600}
8601
8602static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8604{
8605 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8606 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8607}
8608
8609static __inline__ __m512 __DEFAULT_FN_ATTRS512
8610_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8611{
8612 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8613 (__v16sf)_mm512_movehdup_ps(__A),
8614 (__v16sf)__W);
8615}
8616
8617static __inline__ __m512 __DEFAULT_FN_ATTRS512
8619{
8620 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8621 (__v16sf)_mm512_movehdup_ps(__A),
8622 (__v16sf)_mm512_setzero_ps());
8623}
8624
8625static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8627{
8628 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8629 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8630}
8631
8632static __inline__ __m512 __DEFAULT_FN_ATTRS512
8633_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8634{
8635 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8636 (__v16sf)_mm512_moveldup_ps(__A),
8637 (__v16sf)__W);
8638}
8639
8640static __inline__ __m512 __DEFAULT_FN_ATTRS512
8642{
8643 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8644 (__v16sf)_mm512_moveldup_ps(__A),
8645 (__v16sf)_mm512_setzero_ps());
8646}
8647
8648static __inline__ __m128 __DEFAULT_FN_ATTRS128
8649_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8650{
8651 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8652}
8653
8654static __inline__ __m128 __DEFAULT_FN_ATTRS128
8655_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
8656{
8657 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8658 _mm_setzero_ps());
8659}
8660
8661static __inline__ __m128d __DEFAULT_FN_ATTRS128
8662_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8663{
8664 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8665}
8666
8667static __inline__ __m128d __DEFAULT_FN_ATTRS128
8668_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
8669{
8670 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8671 _mm_setzero_pd());
8672}
8673
8674static __inline__ void __DEFAULT_FN_ATTRS128
8675_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8676{
8677 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8678}
8679
8680static __inline__ void __DEFAULT_FN_ATTRS128
8681_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8682{
8683 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8684}
8685
8686static __inline__ __m128 __DEFAULT_FN_ATTRS128
8687_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8688{
8689 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8690 (__v4sf)_mm_setzero_ps(),
8691 0, 4, 4, 4);
8692
8693 return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
8694}
8695
8696static __inline__ __m128 __DEFAULT_FN_ATTRS128
8697_mm_maskz_load_ss (__mmask8 __U, const float* __A)
8698{
8699 return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
8700 (__v4sf) _mm_setzero_ps(),
8701 __U & 1);
8702}
8703
8704static __inline__ __m128d __DEFAULT_FN_ATTRS128
8705_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8706{
8707 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8708 (__v2df)_mm_setzero_pd(),
8709 0, 2);
8710
8711 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
8712}
8713
8714static __inline__ __m128d __DEFAULT_FN_ATTRS128
8715_mm_maskz_load_sd (__mmask8 __U, const double* __A)
8716{
8717 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
8718 (__v2df) _mm_setzero_pd(),
8719 __U & 1);
8720}
8721
8722#define _mm512_shuffle_epi32(A, I) \
8723 ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
8724
8725#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8726 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8727 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8728 (__v16si)(__m512i)(W)))
8729
8730#define _mm512_maskz_shuffle_epi32(U, A, I) \
8731 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8732 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8733 (__v16si)_mm512_setzero_si512()))
8734
8735static __inline__ __m512d __DEFAULT_FN_ATTRS512
8736_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8737{
8738 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8739 (__v8df) __W,
8740 (__mmask8) __U);
8741}
8742
8743static __inline__ __m512d __DEFAULT_FN_ATTRS512
8745{
8746 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8747 (__v8df) _mm512_setzero_pd (),
8748 (__mmask8) __U);
8749}
8750
8751static __inline__ __m512i __DEFAULT_FN_ATTRS512
8752_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8753{
8754 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8755 (__v8di) __W,
8756 (__mmask8) __U);
8757}
8758
8759static __inline__ __m512i __DEFAULT_FN_ATTRS512
8761{
8762 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8763 (__v8di) _mm512_setzero_si512 (),
8764 (__mmask8) __U);
8765}
8766
8767static __inline__ __m512d __DEFAULT_FN_ATTRS512
8768_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8769{
8770 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8771 (__v8df) __W,
8772 (__mmask8) __U);
8773}
8774
8775static __inline__ __m512d __DEFAULT_FN_ATTRS512
8777{
8778 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8779 (__v8df) _mm512_setzero_pd(),
8780 (__mmask8) __U);
8781}
8782
8783static __inline__ __m512i __DEFAULT_FN_ATTRS512
8784_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8785{
8786 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8787 (__v8di) __W,
8788 (__mmask8) __U);
8789}
8790
8791static __inline__ __m512i __DEFAULT_FN_ATTRS512
8793{
8794 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8795 (__v8di) _mm512_setzero_si512(),
8796 (__mmask8) __U);
8797}
8798
8799static __inline__ __m512 __DEFAULT_FN_ATTRS512
8800_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8801{
8802 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8803 (__v16sf) __W,
8804 (__mmask16) __U);
8805}
8806
8807static __inline__ __m512 __DEFAULT_FN_ATTRS512
8809{
8810 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8811 (__v16sf) _mm512_setzero_ps(),
8812 (__mmask16) __U);
8813}
8814
8815static __inline__ __m512i __DEFAULT_FN_ATTRS512
8816_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8817{
8818 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8819 (__v16si) __W,
8820 (__mmask16) __U);
8821}
8822
8823static __inline__ __m512i __DEFAULT_FN_ATTRS512
8825{
8826 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8827 (__v16si) _mm512_setzero_si512(),
8828 (__mmask16) __U);
8829}
8830
8831static __inline__ __m512 __DEFAULT_FN_ATTRS512
8832_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8833{
8834 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8835 (__v16sf) __W,
8836 (__mmask16) __U);
8837}
8838
8839static __inline__ __m512 __DEFAULT_FN_ATTRS512
8841{
8842 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8843 (__v16sf) _mm512_setzero_ps(),
8844 (__mmask16) __U);
8845}
8846
8847static __inline__ __m512i __DEFAULT_FN_ATTRS512
8848_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8849{
8850 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8851 (__v16si) __W,
8852 (__mmask16) __U);
8853}
8854
8855static __inline__ __m512i __DEFAULT_FN_ATTRS512
8857{
8858 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8859 (__v16si) _mm512_setzero_si512(),
8860 (__mmask16) __U);
8861}
8862
8863#define _mm512_cvt_roundps_pd(A, R) \
8864 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8865 (__v8df)_mm512_undefined_pd(), \
8866 (__mmask8)-1, (int)(R)))
8867
8868#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8869 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8870 (__v8df)(__m512d)(W), \
8871 (__mmask8)(U), (int)(R)))
8872
8873#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8874 ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8875 (__v8df)_mm512_setzero_pd(), \
8876 (__mmask8)(U), (int)(R)))
8877
8878static __inline__ __m512d __DEFAULT_FN_ATTRS512
8880{
8881 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8882}
8883
8884static __inline__ __m512d __DEFAULT_FN_ATTRS512
8885_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
8886{
8887 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8888 (__v8df)_mm512_cvtps_pd(__A),
8889 (__v8df)__W);
8890}
8891
8892static __inline__ __m512d __DEFAULT_FN_ATTRS512
8894{
8895 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8896 (__v8df)_mm512_cvtps_pd(__A),
8897 (__v8df)_mm512_setzero_pd());
8898}
8899
8900static __inline__ __m512d __DEFAULT_FN_ATTRS512
8902{
8903 return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8904}
8905
8906static __inline__ __m512d __DEFAULT_FN_ATTRS512
8907_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
8908{
8909 return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8910}
8911
8912static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8913_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) {
8914 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8915 (__v8df)__W);
8916}
8917
8918static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8920 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8921 (__v8df)_mm512_setzero_pd());
8922}
8923
8924static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8925_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8926 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8927 (__v16sf)__W);
8928}
8929
8930static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8932 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8933 (__v16sf)_mm512_setzero_ps());
8934}
8935
8936static __inline__ void __DEFAULT_FN_ATTRS512
8938{
8939 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8940 (__mmask8) __U);
8941}
8942
8943static __inline__ void __DEFAULT_FN_ATTRS512
8945{
8946 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8947 (__mmask8) __U);
8948}
8949
8950static __inline__ void __DEFAULT_FN_ATTRS512
8952{
8953 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
8954 (__mmask16) __U);
8955}
8956
8957static __inline__ void __DEFAULT_FN_ATTRS512
8959{
8960 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
8961 (__mmask16) __U);
8962}
8963
8964#define _mm_cvt_roundsd_ss(A, B, R) \
8965 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8966 (__v2df)(__m128d)(B), \
8967 (__v4sf)_mm_undefined_ps(), \
8968 (__mmask8)-1, (int)(R)))
8969
8970#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
8971 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8972 (__v2df)(__m128d)(B), \
8973 (__v4sf)(__m128)(W), \
8974 (__mmask8)(U), (int)(R)))
8975
8976#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
8977 ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
8978 (__v2df)(__m128d)(B), \
8979 (__v4sf)_mm_setzero_ps(), \
8980 (__mmask8)(U), (int)(R)))
8981
8982static __inline__ __m128 __DEFAULT_FN_ATTRS128
8983_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
8984{
8985 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8986 (__v2df)__B,
8987 (__v4sf)__W,
8989}
8990
8991static __inline__ __m128 __DEFAULT_FN_ATTRS128
8992_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
8993{
8994 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8995 (__v2df)__B,
8996 (__v4sf)_mm_setzero_ps(),
8998}
8999
9000#define _mm_cvtss_i32 _mm_cvtss_si32
9001#define _mm_cvtsd_i32 _mm_cvtsd_si32
9002#define _mm_cvti32_sd _mm_cvtsi32_sd
9003#define _mm_cvti32_ss _mm_cvtsi32_ss
9004#ifdef __x86_64__
9005#define _mm_cvtss_i64 _mm_cvtss_si64
9006#define _mm_cvtsd_i64 _mm_cvtsd_si64
9007#define _mm_cvti64_sd _mm_cvtsi64_sd
9008#define _mm_cvti64_ss _mm_cvtsi64_ss
9009#endif
9010
9011#ifdef __x86_64__
9012#define _mm_cvt_roundi64_sd(A, B, R) \
9013 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9014 (int)(R)))
9015
9016#define _mm_cvt_roundsi64_sd(A, B, R) \
9017 ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9018 (int)(R)))
9019#endif
9020
9021#define _mm_cvt_roundsi32_ss(A, B, R) \
9022 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
9023
9024#define _mm_cvt_roundi32_ss(A, B, R) \
9025 ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
9026
9027#ifdef __x86_64__
9028#define _mm_cvt_roundsi64_ss(A, B, R) \
9029 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9030 (int)(R)))
9031
9032#define _mm_cvt_roundi64_ss(A, B, R) \
9033 ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9034 (int)(R)))
9035#endif
9036
9037#define _mm_cvt_roundss_sd(A, B, R) \
9038 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9039 (__v4sf)(__m128)(B), \
9040 (__v2df)_mm_undefined_pd(), \
9041 (__mmask8)-1, (int)(R)))
9042
9043#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
9044 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9045 (__v4sf)(__m128)(B), \
9046 (__v2df)(__m128d)(W), \
9047 (__mmask8)(U), (int)(R)))
9048
9049#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
9050 ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9051 (__v4sf)(__m128)(B), \
9052 (__v2df)_mm_setzero_pd(), \
9053 (__mmask8)(U), (int)(R)))
9054
9055static __inline__ __m128d __DEFAULT_FN_ATTRS128
9056_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9057{
9058 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9059 (__v4sf)__B,
9060 (__v2df)__W,
9062}
9063
9064static __inline__ __m128d __DEFAULT_FN_ATTRS128
9065_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9066{
9067 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9068 (__v4sf)__B,
9069 (__v2df)_mm_setzero_pd(),
9071}
9072
9073static __inline__ __m128d __DEFAULT_FN_ATTRS128
9074_mm_cvtu32_sd (__m128d __A, unsigned __B)
9075{
9076 __A[0] = __B;
9077 return __A;
9078}
9079
9080#ifdef __x86_64__
9081#define _mm_cvt_roundu64_sd(A, B, R) \
9082 ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9083 (unsigned long long)(B), (int)(R)))
9084
9085static __inline__ __m128d __DEFAULT_FN_ATTRS128
9086_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9087{
9088 __A[0] = __B;
9089 return __A;
9090}
9091#endif
9092
9093#define _mm_cvt_roundu32_ss(A, B, R) \
9094 ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9095 (int)(R)))
9096
9097static __inline__ __m128 __DEFAULT_FN_ATTRS128
9098_mm_cvtu32_ss (__m128 __A, unsigned __B)
9099{
9100 __A[0] = __B;
9101 return __A;
9102}
9103
9104#ifdef __x86_64__
9105#define _mm_cvt_roundu64_ss(A, B, R) \
9106 ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9107 (unsigned long long)(B), (int)(R)))
9108
9109static __inline__ __m128 __DEFAULT_FN_ATTRS128
9110_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9111{
9112 __A[0] = __B;
9113 return __A;
9114}
9115#endif
9116
9117static __inline__ __m512i __DEFAULT_FN_ATTRS512
9118_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9119{
9120 return (__m512i) __builtin_ia32_selectd_512(__M,
9121 (__v16si) _mm512_set1_epi32(__A),
9122 (__v16si) __O);
9123}
9124
9125static __inline__ __m512i __DEFAULT_FN_ATTRS512
9126_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9127{
9128 return (__m512i) __builtin_ia32_selectq_512(__M,
9129 (__v8di) _mm512_set1_epi64(__A),
9130 (__v8di) __O);
9131}
9132
9134 char __e63, char __e62, char __e61, char __e60, char __e59, char __e58,
9135 char __e57, char __e56, char __e55, char __e54, char __e53, char __e52,
9136 char __e51, char __e50, char __e49, char __e48, char __e47, char __e46,
9137 char __e45, char __e44, char __e43, char __e42, char __e41, char __e40,
9138 char __e39, char __e38, char __e37, char __e36, char __e35, char __e34,
9139 char __e33, char __e32, char __e31, char __e30, char __e29, char __e28,
9140 char __e27, char __e26, char __e25, char __e24, char __e23, char __e22,
9141 char __e21, char __e20, char __e19, char __e18, char __e17, char __e16,
9142 char __e15, char __e14, char __e13, char __e12, char __e11, char __e10,
9143 char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3,
9144 char __e2, char __e1, char __e0) {
9145
9146 return __extension__ (__m512i)(__v64qi)
9147 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9148 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9149 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9150 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9151 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9152 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9153 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9154 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9155}
9156
9158 short __e31, short __e30, short __e29, short __e28, short __e27,
9159 short __e26, short __e25, short __e24, short __e23, short __e22,
9160 short __e21, short __e20, short __e19, short __e18, short __e17,
9161 short __e16, short __e15, short __e14, short __e13, short __e12,
9162 short __e11, short __e10, short __e9, short __e8, short __e7, short __e6,
9163 short __e5, short __e4, short __e3, short __e2, short __e1, short __e0) {
9164 return __extension__ (__m512i)(__v32hi)
9165 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9166 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9167 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9168 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9169}
9170
9172 int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H,
9173 int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P) {
9174 return __extension__ (__m512i)(__v16si)
9175 { __P, __O, __N, __M, __L, __K, __J, __I,
9176 __H, __G, __F, __E, __D, __C, __B, __A };
9177}
9178
9179#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
9180 e8,e9,e10,e11,e12,e13,e14,e15) \
9181 _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9182 (e5),(e4),(e3),(e2),(e1),(e0))
9183
9184static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
9185_mm512_set_epi64(long long __A, long long __B, long long __C, long long __D,
9186 long long __E, long long __F, long long __G, long long __H) {
9187 return __extension__ (__m512i) (__v8di)
9188 { __H, __G, __F, __E, __D, __C, __B, __A };
9189}
9190
9191#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
9192 _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9193
9194static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
9195_mm512_set_pd(double __A, double __B, double __C, double __D, double __E,
9196 double __F, double __G, double __H) {
9197 return __extension__ (__m512d)
9198 { __H, __G, __F, __E, __D, __C, __B, __A };
9199}
9200
9201#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
9202 _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9203
9204static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
9205_mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F,
9206 float __G, float __H, float __I, float __J, float __K, float __L,
9207 float __M, float __N, float __O, float __P) {
9208 return __extension__ (__m512)
9209 { __P, __O, __N, __M, __L, __K, __J, __I,
9210 __H, __G, __F, __E, __D, __C, __B, __A };
9211}
9212
9213#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9214 _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9215 (e4),(e3),(e2),(e1),(e0))
9216
9217static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
9218_mm512_abs_ps(__m512 __A) {
9219 return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9220}
9221
9222static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
9223_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) {
9224 return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9225}
9226
9227static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
9228_mm512_abs_pd(__m512d __A) {
9229 return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9230}
9231
9232static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
9233_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) {
9234 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9235}
9236
9237/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9238 * outputs. This class of vector operation forms the basis of many scientific
9239 * computations. In vector-reduction arithmetic, the evaluation order is
9240 * independent of the order of the input elements of V.
9241
9242 * For floating-point intrinsics:
9243 * 1. When using fadd/fmul intrinsics, the order of operations within the
9244 * vector is unspecified (associative math).
9245 * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector
9246 * produce unspecified results.
9247
9248 * Used bisection method. At each step, we partition the vector with previous
9249 * step in half, and the operation is performed on its two halves.
9250 * This takes log2(n) steps where n is the number of elements in the vector.
9251 */
9252
9253static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9255 return __builtin_reduce_add((__v8di)__W);
9256}
9257
9258static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9260 return __builtin_reduce_mul((__v8di)__W);
9261}
9262
9263static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9265 return __builtin_reduce_and((__v8di)__W);
9266}
9267
9268static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9270 return __builtin_reduce_or((__v8di)__W);
9271}
9272
9273static __inline__ long long __DEFAULT_FN_ATTRS512
9275 __W = _mm512_maskz_mov_epi64(__M, __W);
9276 return __builtin_reduce_add((__v8di)__W);
9277}
9278
9279static __inline__ long long __DEFAULT_FN_ATTRS512
9281 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
9282 return __builtin_reduce_mul((__v8di)__W);
9283}
9284
9285static __inline__ long long __DEFAULT_FN_ATTRS512
9287 __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __W);
9288 return __builtin_reduce_and((__v8di)__W);
9289}
9290
9291static __inline__ long long __DEFAULT_FN_ATTRS512
9293 __W = _mm512_maskz_mov_epi64(__M, __W);
9294 return __builtin_reduce_or((__v8di)__W);
9295}
9296
9297// -0.0 is used to ignore the start value since it is the neutral value of
9298// floating point addition. For more information, please refer to
9299// https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic
9300static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
9301 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9302}
9303
9304static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
9305 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9306}
9307
9308static __inline__ double __DEFAULT_FN_ATTRS512
9310 __W = _mm512_maskz_mov_pd(__M, __W);
9311 return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9312}
9313
9314static __inline__ double __DEFAULT_FN_ATTRS512
9316 __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
9317 return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9318}
9319
9320static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9322 return __builtin_reduce_add((__v16si)__W);
9323}
9324
9325static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9327 return __builtin_reduce_mul((__v16si)__W);
9328}
9329
9330static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9332 return __builtin_reduce_and((__v16si)__W);
9333}
9334
9335static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9337 return __builtin_reduce_or((__v16si)__W);
9338}
9339
9340static __inline__ int __DEFAULT_FN_ATTRS512
9342 __W = _mm512_maskz_mov_epi32(__M, __W);
9343 return __builtin_reduce_add((__v16si)__W);
9344}
9345
9346static __inline__ int __DEFAULT_FN_ATTRS512
9348 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
9349 return __builtin_reduce_mul((__v16si)__W);
9350}
9351
9352static __inline__ int __DEFAULT_FN_ATTRS512
9354 __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __W);
9355 return __builtin_reduce_and((__v16si)__W);
9356}
9357
9358static __inline__ int __DEFAULT_FN_ATTRS512
9360 __W = _mm512_maskz_mov_epi32(__M, __W);
9361 return __builtin_reduce_or((__v16si)__W);
9362}
9363
9364static __inline__ float __DEFAULT_FN_ATTRS512
9366 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9367}
9368
9369static __inline__ float __DEFAULT_FN_ATTRS512
9371 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9372}
9373
9374static __inline__ float __DEFAULT_FN_ATTRS512
9376 __W = _mm512_maskz_mov_ps(__M, __W);
9377 return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9378}
9379
9380static __inline__ float __DEFAULT_FN_ATTRS512
9382 __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9383 return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9384}
9385
9386static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9388 return __builtin_reduce_max((__v8di)__V);
9389}
9390
9391static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9393 return __builtin_reduce_max((__v8du)__V);
9394}
9395
9396static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9398 return __builtin_reduce_min((__v8di)__V);
9399}
9400
9401static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9403 return __builtin_reduce_min((__v8du)__V);
9404}
9405
9406static __inline__ long long __DEFAULT_FN_ATTRS512
9408 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9409 return __builtin_reduce_max((__v8di)__V);
9410}
9411
9412static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9414 __V = _mm512_maskz_mov_epi64(__M, __V);
9415 return __builtin_reduce_max((__v8du)__V);
9416}
9417
9418static __inline__ long long __DEFAULT_FN_ATTRS512
9420 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9421 return __builtin_reduce_min((__v8di)__V);
9422}
9423
9424static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9426 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V);
9427 return __builtin_reduce_min((__v8du)__V);
9428}
9429static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9431 return __builtin_reduce_max((__v16si)__V);
9432}
9433
9434static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9436 return __builtin_reduce_max((__v16su)__V);
9437}
9438
9439static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
9441 return __builtin_reduce_min((__v16si)__V);
9442}
9443
9444static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
9446 return __builtin_reduce_min((__v16su)__V);
9447}
9448
9449static __inline__ int __DEFAULT_FN_ATTRS512
9451 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9452 return __builtin_reduce_max((__v16si)__V);
9453}
9454
9455static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9457 __V = _mm512_maskz_mov_epi32(__M, __V);
9458 return __builtin_reduce_max((__v16su)__V);
9459}
9460
9461static __inline__ int __DEFAULT_FN_ATTRS512
9463 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9464 return __builtin_reduce_min((__v16si)__V);
9465}
9466
9467static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9469 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __V);
9470 return __builtin_reduce_min((__v16su)__V);
9471}
9472
9473static __inline__ double __DEFAULT_FN_ATTRS512
9475 return __builtin_ia32_reduce_fmax_pd512(__V);
9476}
9477
9478static __inline__ double __DEFAULT_FN_ATTRS512
9480 return __builtin_ia32_reduce_fmin_pd512(__V);
9481}
9482
9483static __inline__ double __DEFAULT_FN_ATTRS512
9485 __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9486 return __builtin_ia32_reduce_fmax_pd512(__V);
9487}
9488
9489static __inline__ double __DEFAULT_FN_ATTRS512
9491 __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9492 return __builtin_ia32_reduce_fmin_pd512(__V);
9493}
9494
9495static __inline__ float __DEFAULT_FN_ATTRS512
9497 return __builtin_ia32_reduce_fmax_ps512(__V);
9498}
9499
9500static __inline__ float __DEFAULT_FN_ATTRS512
9502 return __builtin_ia32_reduce_fmin_ps512(__V);
9503}
9504
9505static __inline__ float __DEFAULT_FN_ATTRS512
9507 __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9508 return __builtin_ia32_reduce_fmax_ps512(__V);
9509}
9510
9511static __inline__ float __DEFAULT_FN_ATTRS512
9513 __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9514 return __builtin_ia32_reduce_fmin_ps512(__V);
9515}
9516
9517/// Moves the least significant 32 bits of a vector of [16 x i32] to a
9518/// 32-bit signed integer value.
9519///
9520/// \headerfile <x86intrin.h>
9521///
9522/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
9523///
9524/// \param __A
9525/// A vector of [16 x i32]. The least significant 32 bits are moved to the
9526/// destination.
9527/// \returns A 32-bit signed integer containing the moved value.
9528static __inline__ int __DEFAULT_FN_ATTRS512
9530 __v16si __b = (__v16si)__A;
9531 return __b[0];
9532}
9533
9534/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
9535/// locations starting at location \a base_addr at packed 32-bit integer indices
9536/// stored in the lower half of \a vindex scaled by \a scale them in dst.
9537///
9538/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9539///
9540/// \code{.operation}
9541/// FOR j := 0 to 7
9542/// i := j*64
9543/// m := j*32
9544/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9545/// dst[i+63:i] := MEM[addr+63:addr]
9546/// ENDFOR
9547/// dst[MAX:512] := 0
9548/// \endcode
9549#define _mm512_i32logather_pd(vindex, base_addr, scale) \
9550 _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9551
9552/// Loads 8 double-precision (64-bit) floating-point elements from memory
9553/// starting at location \a base_addr at packed 32-bit integer indices stored in
9554/// the lower half of \a vindex scaled by \a scale into dst using writemask
9555/// \a mask (elements are copied from \a src when the corresponding mask bit is
9556/// not set).
9557///
9558/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9559///
9560/// \code{.operation}
9561/// FOR j := 0 to 7
9562/// i := j*64
9563/// m := j*32
9564/// IF mask[j]
9565/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9566/// dst[i+63:i] := MEM[addr+63:addr]
9567/// ELSE
9568/// dst[i+63:i] := src[i+63:i]
9569/// FI
9570/// ENDFOR
9571/// dst[MAX:512] := 0
9572/// \endcode
9573#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \
9574 _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \
9575 (base_addr), (scale))
9576
9577/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9578/// at packed 32-bit integer indices stored in the lower half of \a vindex
9579/// scaled by \a scale and stores them in dst.
9580///
9581/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9582///
9583/// \code{.operation}
9584/// FOR j := 0 to 7
9585/// i := j*64
9586/// m := j*32
9587/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9588/// dst[i+63:i] := MEM[addr+63:addr]
9589/// ENDFOR
9590/// dst[MAX:512] := 0
9591/// \endcode
9592#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
9593 _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9594
9595/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9596/// at packed 32-bit integer indices stored in the lower half of \a vindex
9597/// scaled by \a scale and stores them in dst using writemask \a mask (elements
9598/// are copied from \a src when the corresponding mask bit is not set).
9599///
9600/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9601///
9602/// \code{.operation}
9603/// FOR j := 0 to 7
9604/// i := j*64
9605/// m := j*32
9606/// IF mask[j]
9607/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9608/// dst[i+63:i] := MEM[addr+63:addr]
9609/// ELSE
9610/// dst[i+63:i] := src[i+63:i]
9611/// FI
9612/// ENDFOR
9613/// dst[MAX:512] := 0
9614/// \endcode
9615#define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \
9616 _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \
9617 (base_addr), (scale))
9618
9619/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9620/// and to memory locations starting at location \a base_addr at packed 32-bit
9621/// integer indices stored in \a vindex scaled by \a scale.
9622///
9623/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9624///
9625/// \code{.operation}
9626/// FOR j := 0 to 7
9627/// i := j*64
9628/// m := j*32
9629/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9630/// MEM[addr+63:addr] := v1[i+63:i]
9631/// ENDFOR
9632/// \endcode
9633#define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \
9634 _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9635
9636/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9637/// to memory locations starting at location \a base_addr at packed 32-bit
9638/// integer indices stored in \a vindex scaled by \a scale. Only those elements
9639/// whose corresponding mask bit is set in writemask \a mask are written to
9640/// memory.
9641///
9642/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9643///
9644/// \code{.operation}
9645/// FOR j := 0 to 7
9646/// i := j*64
9647/// m := j*32
9648/// IF mask[j]
9649/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9650/// MEM[addr+63:addr] := a[i+63:i]
9651/// FI
9652/// ENDFOR
9653/// \endcode
9654#define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \
9655 _mm512_mask_i32scatter_pd((base_addr), (mask), \
9656 _mm512_castsi512_si256(vindex), (v1), (scale))
9657
9658/// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in
9659/// memory locations starting at location \a base_addr at packed 32-bit integer
9660/// indices stored in \a vindex scaled by \a scale.
9661///
9662/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9663///
9664/// \code{.operation}
9665/// FOR j := 0 to 7
9666/// i := j*64
9667/// m := j*32
9668/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9669/// MEM[addr+63:addr] := a[i+63:i]
9670/// ENDFOR
9671/// \endcode
9672#define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \
9673 _mm512_i32scatter_epi64((base_addr), \
9674 _mm512_castsi512_si256(vindex), (v1), (scale))
9675
9676/// Stores 8 packed 64-bit integer elements located in a and stores them in
9677/// memory locations starting at location \a base_addr at packed 32-bit integer
9678/// indices stored in \a vindex scaled by scale using writemask \a mask (elements
9679/// whose corresponding mask bit is not set are not written to memory).
9680///
9681/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9682///
9683/// \code{.operation}
9684/// FOR j := 0 to 7
9685/// i := j*64
9686/// m := j*32
9687/// IF mask[j]
9688/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9689/// MEM[addr+63:addr] := a[i+63:i]
9690/// FI
9691/// ENDFOR
9692/// \endcode
9693#define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \
9694 _mm512_mask_i32scatter_epi64((base_addr), (mask), \
9695 _mm512_castsi512_si256(vindex), (v1), (scale))
9696
9697#undef __DEFAULT_FN_ATTRS512
9698#undef __DEFAULT_FN_ATTRS128
9699#undef __DEFAULT_FN_ATTRS
9700#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
9701#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
9702#undef __DEFAULT_FN_ATTRS_CONSTEXPR
9703
9704#endif /* __AVX512FINTRIN_H */
#define __L(__X)
static __inline__ vector float vector float __b
Definition: altivec.h:578
static __inline__ uint32_t volatile uint32_t * __p
Definition: arm_acle.h:57
return __v
Definition: arm_acle.h:88
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi64(long long __d)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f32x4(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi64(__m512i __W)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi64(__m512i __A, unsigned int __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov(__mmask16 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A)
Moves the least significant 32 bits of a vector of [16 x i32] to a 32-bit signed integer value.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
#define __DEFAULT_FN_ATTRS512_CONSTEXPR
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srai_epi32(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi64(__m512i __W)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ps(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(void *__P, __m512d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_si512(__m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi32(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
#define __DEFAULT_FN_ATTRS512
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu32(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_epi64(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask2int(__mmask16 __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movehdup_ps(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_add_epi64(__m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_pd(double __w)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
_MM_MANTISSA_NORM_ENUM
@ _MM_MANT_NORM_p5_1
@ _MM_MANT_NORM_p5_2
@ _MM_MANT_NORM_1_2
@ _MM_MANT_NORM_p75_1p5
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_ps(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i64x4(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi8(char __w)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_abs_pd(__m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi64(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi64(__m512i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
char __v64qi __attribute__((__vector_size__(64)))
Definition: avx512fintrin.h:16
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi64(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movedup_pd(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m256 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps256(__m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_pd(__m512d __a, __m512d __b)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtps_pd(__m256 __A)
_MM_CMPINT_ENUM
Definition: avx512fintrin.h:52
@ _MM_CMPINT_NE
Definition: avx512fintrin.h:57
@ _MM_CMPINT_NLT
Definition: avx512fintrin.h:58
@ _MM_CMPINT_LE
Definition: avx512fintrin.h:55
@ _MM_CMPINT_EQ
Definition: avx512fintrin.h:53
@ _MM_CMPINT_LT
Definition: avx512fintrin.h:54
@ _MM_CMPINT_UNUSED
Definition: avx512fintrin.h:56
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastq_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
unsigned char __mmask8
Definition: avx512fintrin.h:41
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
_MM_TERNLOG_ENUM
A helper to represent the ternary logic operations among vector A, B and C.
@ _MM_TERNLOG_A
@ _MM_TERNLOG_B
@ _MM_TERNLOG_C
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_mul_epi32(__m512i __W)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srli_epi32(__m512i __A, unsigned int __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(void *__P, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
_MM_MANTISSA_SIGN_ENUM
@ _MM_MANT_SIGN_zero
@ _MM_MANT_SIGN_src
@ _MM_MANT_SIGN_nan
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_int2mask(int __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
#define __DEFAULT_FN_ATTRS128
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epi32(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_slli_epi32(__m512i __A, unsigned int __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_ps(float __w)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f64x4(__m256d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
_MM_PERM_ENUM
Definition: avx512fintrin.h:65
@ _MM_PERM_BBCA
Definition: avx512fintrin.h:95
@ _MM_PERM_BCBD
@ _MM_PERM_DAAB
@ _MM_PERM_CBBD
@ _MM_PERM_DCCC
@ _MM_PERM_CDBB
@ _MM_PERM_DDDD
@ _MM_PERM_CCCC
@ _MM_PERM_CADA
@ _MM_PERM_BACD
Definition: avx512fintrin.h:91
@ _MM_PERM_CCAD
@ _MM_PERM_ABDB
Definition: avx512fintrin.h:75
@ _MM_PERM_BBBD
Definition: avx512fintrin.h:95
@ _MM_PERM_DCAB
@ _MM_PERM_BABC
Definition: avx512fintrin.h:89
@ _MM_PERM_AACD
Definition: avx512fintrin.h:69
@ _MM_PERM_BBAB
Definition: avx512fintrin.h:93
@ _MM_PERM_DCDB
@ _MM_PERM_BACC
Definition: avx512fintrin.h:90
@ _MM_PERM_ABDA
Definition: avx512fintrin.h:75
@ _MM_PERM_ACCC
Definition: avx512fintrin.h:80
@ _MM_PERM_ADAC
Definition: avx512fintrin.h:82
@ _MM_PERM_CCCD
@ _MM_PERM_CADD
@ _MM_PERM_ACCB
Definition: avx512fintrin.h:79
@ _MM_PERM_BBDB
Definition: avx512fintrin.h:97
@ _MM_PERM_ABBB
Definition: avx512fintrin.h:73
@ _MM_PERM_BACB
Definition: avx512fintrin.h:90
@ _MM_PERM_CDCA
@ _MM_PERM_ACBC
Definition: avx512fintrin.h:78
@ _MM_PERM_ADCB
Definition: avx512fintrin.h:85
@ _MM_PERM_BBBC
Definition: avx512fintrin.h:94
@ _MM_PERM_DBBA
@ _MM_PERM_BBBB
Definition: avx512fintrin.h:94
@ _MM_PERM_DDDB
@ _MM_PERM_CAAA
@ _MM_PERM_ADBB
Definition: avx512fintrin.h:83
@ _MM_PERM_ACDB
Definition: avx512fintrin.h:81
@ _MM_PERM_DCAD
@ _MM_PERM_DBBC
@ _MM_PERM_BAAB
Definition: avx512fintrin.h:87
@ _MM_PERM_BDDD
@ _MM_PERM_BBAD
Definition: avx512fintrin.h:93
@ _MM_PERM_DDBA
@ _MM_PERM_CDCD
@ _MM_PERM_CCCA
@ _MM_PERM_DBBB
@ _MM_PERM_DAAD
@ _MM_PERM_DCBA
@ _MM_PERM_CCBC
@ _MM_PERM_ADDD
Definition: avx512fintrin.h:87
@ _MM_PERM_DBAC
@ _MM_PERM_ABAB
Definition: avx512fintrin.h:71
@ _MM_PERM_CBDB
@ _MM_PERM_CDBC
@ _MM_PERM_AABC
Definition: avx512fintrin.h:68
@ _MM_PERM_DABD
@ _MM_PERM_CBBA
@ _MM_PERM_CBAA
@ _MM_PERM_BDDB
@ _MM_PERM_CABC
@ _MM_PERM_BDBD
@ _MM_PERM_BCAD
Definition: avx512fintrin.h:99
@ _MM_PERM_ACBA
Definition: avx512fintrin.h:78
@ _MM_PERM_ADBA
Definition: avx512fintrin.h:83
@ _MM_PERM_ADBC
Definition: avx512fintrin.h:84
@ _MM_PERM_DBCB
@ _MM_PERM_CBDC
@ _MM_PERM_CBAD
@ _MM_PERM_ABCC
Definition: avx512fintrin.h:74
@ _MM_PERM_AAAD
Definition: avx512fintrin.h:67
@ _MM_PERM_CBAC
@ _MM_PERM_CCDA
@ _MM_PERM_CDAC
@ _MM_PERM_BADD
Definition: avx512fintrin.h:92
@ _MM_PERM_DAAC
@ _MM_PERM_BCCC
@ _MM_PERM_DBBD
@ _MM_PERM_DDAC
@ _MM_PERM_DACD
@ _MM_PERM_BAAC
Definition: avx512fintrin.h:88
@ _MM_PERM_ACCA
Definition: avx512fintrin.h:79
@ _MM_PERM_ABDD
Definition: avx512fintrin.h:76
@ _MM_PERM_BBCC
Definition: avx512fintrin.h:96
@ _MM_PERM_DAAA
@ _MM_PERM_CAAB
@ _MM_PERM_BCDB
@ _MM_PERM_ACBB
Definition: avx512fintrin.h:78
@ _MM_PERM_CDAB
@ _MM_PERM_DBDB
@ _MM_PERM_AABB
Definition: avx512fintrin.h:67
@ _MM_PERM_DBDA
@ _MM_PERM_BCBA
Definition: avx512fintrin.h:99
@ _MM_PERM_CBAB
@ _MM_PERM_DCDC
@ _MM_PERM_BBCB
Definition: avx512fintrin.h:95
@ _MM_PERM_CDCB
@ _MM_PERM_AACA
Definition: avx512fintrin.h:68
@ _MM_PERM_ACBD
Definition: avx512fintrin.h:79
@ _MM_PERM_AAAB
Definition: avx512fintrin.h:66
@ _MM_PERM_DCCB
@ _MM_PERM_ADDB
Definition: avx512fintrin.h:86
@ _MM_PERM_AAAA
Definition: avx512fintrin.h:66
@ _MM_PERM_AACC
Definition: avx512fintrin.h:69
@ _MM_PERM_BDDC
@ _MM_PERM_CBBC
@ _MM_PERM_DDCC
@ _MM_PERM_CABD
@ _MM_PERM_AADC
Definition: avx512fintrin.h:70
@ _MM_PERM_BCBC
@ _MM_PERM_BCCA
@ _MM_PERM_CCBD
@ _MM_PERM_CBBB
@ _MM_PERM_CDBA
@ _MM_PERM_CACD
@ _MM_PERM_BDAD
@ _MM_PERM_ADCA
Definition: avx512fintrin.h:84
@ _MM_PERM_AAAC
Definition: avx512fintrin.h:66
@ _MM_PERM_BDDA
@ _MM_PERM_CCAC
@ _MM_PERM_ACDC
Definition: avx512fintrin.h:81
@ _MM_PERM_DBCA
@ _MM_PERM_DBAA
@ _MM_PERM_AABD
Definition: avx512fintrin.h:68
@ _MM_PERM_CDCC
@ _MM_PERM_DCAA
@ _MM_PERM_DDDC
@ _MM_PERM_CDDB
@ _MM_PERM_AABA
Definition: avx512fintrin.h:67
@ _MM_PERM_DDBB
@ _MM_PERM_CDDA
@ _MM_PERM_AADD
Definition: avx512fintrin.h:71
@ _MM_PERM_BADC
Definition: avx512fintrin.h:92
@ _MM_PERM_BDBA
@ _MM_PERM_DBDD
@ _MM_PERM_BDAC
@ _MM_PERM_DBDC
@ _MM_PERM_BBBA
Definition: avx512fintrin.h:94
@ _MM_PERM_DDBC
@ _MM_PERM_BAAA
Definition: avx512fintrin.h:87
@ _MM_PERM_BDCC
@ _MM_PERM_DDAB
@ _MM_PERM_BCCB
@ _MM_PERM_BCCD
@ _MM_PERM_ADBD
Definition: avx512fintrin.h:84
@ _MM_PERM_ADCC
Definition: avx512fintrin.h:85
@ _MM_PERM_CCBB
@ _MM_PERM_CDAA
@ _MM_PERM_BBDA
Definition: avx512fintrin.h:96
@ _MM_PERM_CACC
@ _MM_PERM_DCBB
@ _MM_PERM_DABA
@ _MM_PERM_BADB
Definition: avx512fintrin.h:91
@ _MM_PERM_ABCA
Definition: avx512fintrin.h:74
@ _MM_PERM_CBCC
@ _MM_PERM_ABAD
Definition: avx512fintrin.h:72
@ _MM_PERM_BDBC
@ _MM_PERM_DDDA
@ _MM_PERM_ADAB
Definition: avx512fintrin.h:82
@ _MM_PERM_CADB
@ _MM_PERM_ADAA
Definition: avx512fintrin.h:82
@ _MM_PERM_ACAC
Definition: avx512fintrin.h:77
@ _MM_PERM_DADD
@ _MM_PERM_BABD
Definition: avx512fintrin.h:89
@ _MM_PERM_ACCD
Definition: avx512fintrin.h:80
@ _MM_PERM_CCAA
@ _MM_PERM_AADA
Definition: avx512fintrin.h:70
@ _MM_PERM_BDCA
@ _MM_PERM_CDDD
@ _MM_PERM_ABBD
Definition: avx512fintrin.h:73
@ _MM_PERM_ACAA
Definition: avx512fintrin.h:76
@ _MM_PERM_ACDD
Definition: avx512fintrin.h:81
@ _MM_PERM_DABB
@ _MM_PERM_CCCB
@ _MM_PERM_AADB
Definition: avx512fintrin.h:70
@ _MM_PERM_DBAD
@ _MM_PERM_BBDD
Definition: avx512fintrin.h:97
@ _MM_PERM_BCDC
@ _MM_PERM_CABA
@ _MM_PERM_BBAA
Definition: avx512fintrin.h:92
@ _MM_PERM_ADAD
Definition: avx512fintrin.h:83
@ _MM_PERM_BADA
Definition: avx512fintrin.h:91
@ _MM_PERM_DCDA
@ _MM_PERM_ABBA
Definition: avx512fintrin.h:72
@ _MM_PERM_ACAB
Definition: avx512fintrin.h:77
@ _MM_PERM_CCDD
@ _MM_PERM_CADC
@ _MM_PERM_DDCB
@ _MM_PERM_BABB
Definition: avx512fintrin.h:89
@ _MM_PERM_CCDB
@ _MM_PERM_DDAD
@ _MM_PERM_DBCC
@ _MM_PERM_BCBB
Definition: avx512fintrin.h:99
@ _MM_PERM_ADDC
Definition: avx512fintrin.h:86
@ _MM_PERM_CCBA
@ _MM_PERM_ABCD
Definition: avx512fintrin.h:75
@ _MM_PERM_BCAB
Definition: avx512fintrin.h:98
@ _MM_PERM_DCBC
@ _MM_PERM_BCDD
@ _MM_PERM_CCDC
@ _MM_PERM_ABAC
Definition: avx512fintrin.h:72
@ _MM_PERM_CBCB
@ _MM_PERM_CCAB
@ _MM_PERM_DDCD
@ _MM_PERM_DACA
@ _MM_PERM_ACAD
Definition: avx512fintrin.h:77
@ _MM_PERM_BABA
Definition: avx512fintrin.h:88
@ _MM_PERM_CBCD
@ _MM_PERM_CAAD
@ _MM_PERM_DCDD
@ _MM_PERM_BDBB
@ _MM_PERM_BCAA
Definition: avx512fintrin.h:98
@ _MM_PERM_ABDC
Definition: avx512fintrin.h:76
@ _MM_PERM_BBCD
Definition: avx512fintrin.h:96
@ _MM_PERM_CAAC
@ _MM_PERM_BBAC
Definition: avx512fintrin.h:93
@ _MM_PERM_CBCA
@ _MM_PERM_DCAC
@ _MM_PERM_ABAA
Definition: avx512fintrin.h:71
@ _MM_PERM_CACB
@ _MM_PERM_BBDC
Definition: avx512fintrin.h:97
@ _MM_PERM_CDAD
@ _MM_PERM_ADCD
Definition: avx512fintrin.h:85
@ _MM_PERM_DADB
@ _MM_PERM_DBCD
@ _MM_PERM_DACC
@ _MM_PERM_DACB
@ _MM_PERM_DCBD
@ _MM_PERM_CACA
@ _MM_PERM_ABBC
Definition: avx512fintrin.h:73
@ _MM_PERM_DCCA
@ _MM_PERM_DABC
@ _MM_PERM_CBDD
@ _MM_PERM_DDBD
@ _MM_PERM_DDCA
@ _MM_PERM_BDCD
@ _MM_PERM_CDBD
@ _MM_PERM_ABCB
Definition: avx512fintrin.h:74
@ _MM_PERM_CDDC
@ _MM_PERM_AACB
Definition: avx512fintrin.h:69
@ _MM_PERM_DDAA
@ _MM_PERM_ADDA
Definition: avx512fintrin.h:86
@ _MM_PERM_DADA
@ _MM_PERM_BCDA
@ _MM_PERM_BDAB
@ _MM_PERM_BAAD
Definition: avx512fintrin.h:88
@ _MM_PERM_DBAB
@ _MM_PERM_DCCD
@ _MM_PERM_CABB
@ _MM_PERM_BDAA
@ _MM_PERM_BDCB
@ _MM_PERM_ACDA
Definition: avx512fintrin.h:80
@ _MM_PERM_DADC
@ _MM_PERM_CBDA
@ _MM_PERM_BCAC
Definition: avx512fintrin.h:98
@ _MM_PERM_BACA
Definition: avx512fintrin.h:90
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi32(int __s)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_pd(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _cvtu32_mask16(unsigned int __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
unsigned short __mmask16
Definition: avx512fintrin.h:42
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu16_epi32(__m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si256(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
#define _MM_FROUND_CUR_DIRECTION
Definition: avx512fintrin.h:49
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_si512(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_knot(__mmask16 __M)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_or_epi32(__m512i __W)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask16_u32(__mmask16 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a)
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set1_epi16(short __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd(__m512i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_moveldup_ps(__m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_reduce_min_epi32(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
Definition: avxintrin.h:3658
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition: avxintrin.h:3671
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition: avxintrin.h:4342
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition: avxintrin.h:4330
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition: avxintrin.h:4354
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
Definition: emmintrin.h:127
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
Definition: emmintrin.h:206
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
Definition: emmintrin.h:87
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition: emmintrin.h:3885
static __inline__ void int __a
Definition: emmintrin.h:4084
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition: emmintrin.h:3500
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
Definition: emmintrin.h:1894
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition: emmintrin.h:1874
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
Definition: emmintrin.h:166
static __inline__ void short __D
Definition: immintrin.h:342
static __inline__ void const void * __src
Definition: movdirintrin.h:45
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25
__inline unsigned int unsigned int __Y
Definition: bmi2intrin.h:19
#define _MM_FROUND_FLOOR
Definition: smmintrin.h:47
#define _MM_FROUND_CEIL
Definition: smmintrin.h:48
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
Definition: xmmintrin.h:2819
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
Definition: xmmintrin.h:168
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
Definition: xmmintrin.h:127
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
Definition: xmmintrin.h:87
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition: xmmintrin.h:2029
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
Definition: xmmintrin.h:208