11#error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead." 
   14#ifndef __AVX512DQINTRIN_H 
   15#define __AVX512DQINTRIN_H 
   18#define __DEFAULT_FN_ATTRS512                                                  \ 
   19  __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"),       \ 
   20                 __min_vector_width__(512))) 
 
   21#define __DEFAULT_FN_ATTRS                                                     \ 
   22  __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) 
 
   24#if defined(__cplusplus) && (__cplusplus >= 201103L) 
   25#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr 
   26#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr 
   28#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 
   29#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS 
   34  return __builtin_ia32_knotqi(__M);
 
 
   65  return (
unsigned char)__builtin_ia32_kortestcqi(__A, __B);
 
 
   71  return (
unsigned char)__builtin_ia32_kortestzqi(__A, __B);
 
 
   76  *__C = (
unsigned char)__builtin_ia32_kortestcqi(__A, __B);
 
   77  return (
unsigned char)__builtin_ia32_kortestzqi(__A, __B);
 
 
   83  return (
unsigned char)__builtin_ia32_ktestcqi(__A, __B);
 
 
   89  return (
unsigned char)__builtin_ia32_ktestzqi(__A, __B);
 
 
   94  *__C = (
unsigned char)__builtin_ia32_ktestcqi(__A, __B);
 
   95  return (
unsigned char)__builtin_ia32_ktestzqi(__A, __B);
 
 
  101  return (
unsigned char)__builtin_ia32_ktestchi(__A, __B);
 
 
  107  return (
unsigned char)__builtin_ia32_ktestzhi(__A, __B);
 
 
  112  *__C = (
unsigned char)__builtin_ia32_ktestchi(__A, __B);
 
  113  return (
unsigned char)__builtin_ia32_ktestzhi(__A, __B);
 
 
  126#define _kshiftli_mask8(A, I) \ 
  127  ((__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I))) 
 
  129#define _kshiftri_mask8(A, I) \ 
  130  ((__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I))) 
 
  134  return (
unsigned int)__builtin_ia32_kmovb((
__mmask8)__A);
 
 
  154  return (__m512i) ((__v8du) __A * (__v8du) __B);
 
 
  159  return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
 
 
  166  return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
 
 
  173  return (__m512d)((__v8du)__A ^ (__v8du)__B);
 
 
  178  return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
 
 
  185  return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
 
 
  192  return (__m512)((__v16su)__A ^ (__v16su)__B);
 
 
  197  return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
 
 
  204  return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
 
 
  211  return (__m512d)((__v8du)__A | (__v8du)__B);
 
 
  216  return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
 
 
  223  return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
 
 
  230  return (__m512)((__v16su)__A | (__v16su)__B);
 
 
  235  return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
 
 
  242  return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
 
 
  249  return (__m512d)((__v8du)__A & (__v8du)__B);
 
 
  254  return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
 
 
  261  return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
 
 
  268  return (__m512)((__v16su)__A & (__v16su)__B);
 
 
  273  return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
 
 
  280  return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
 
 
  287  return (__m512d)(~(__v8du)__A & (__v8du)__B);
 
 
  292  return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
 
 
  299  return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
 
 
  306  return (__m512)(~(__v16su)__A & (__v16su)__B);
 
 
  311  return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
 
 
  318  return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
 
 
  325  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
 
 
  333  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
 
 
  341  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
 
 
  347#define _mm512_cvt_roundpd_epi64(A, R) \ 
  348  ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 
  349                                            (__v8di)_mm512_setzero_si512(), \ 
  350                                            (__mmask8)-1, (int)(R))) 
 
  352#define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \ 
  353  ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 
  354                                            (__v8di)(__m512i)(W), \ 
  355                                            (__mmask8)(U), (int)(R))) 
 
  357#define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \ 
  358  ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 
  359                                            (__v8di)_mm512_setzero_si512(), \ 
  360                                            (__mmask8)(U), (int)(R))) 
 
  364  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
 
 
  372  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
 
 
  380  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
 
 
  386#define _mm512_cvt_roundpd_epu64(A, R) \ 
  387  ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 
  388                                             (__v8di)_mm512_setzero_si512(), \ 
  389                                             (__mmask8)-1, (int)(R))) 
 
  391#define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \ 
  392  ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 
  393                                             (__v8di)(__m512i)(W), \ 
  394                                             (__mmask8)(U), (int)(R))) 
 
  396#define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \ 
  397  ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 
  398                                             (__v8di)_mm512_setzero_si512(), \ 
  399                                             (__mmask8)(U), (int)(R))) 
 
  403  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
 
 
  411  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
 
 
  419  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
 
 
  425#define _mm512_cvt_roundps_epi64(A, R) \ 
  426  ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 
  427                                            (__v8di)_mm512_setzero_si512(), \ 
  428                                            (__mmask8)-1, (int)(R))) 
 
  430#define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \ 
  431  ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 
  432                                            (__v8di)(__m512i)(W), \ 
  433                                            (__mmask8)(U), (int)(R))) 
 
  435#define _mm512_maskz_cvt_roundps_epi64(U, A, R) \ 
  436  ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 
  437                                            (__v8di)_mm512_setzero_si512(), \ 
  438                                            (__mmask8)(U), (int)(R))) 
 
  442  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
 
 
  450  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
 
 
  458  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
 
 
  464#define _mm512_cvt_roundps_epu64(A, R) \ 
  465  ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 
  466                                             (__v8di)_mm512_setzero_si512(), \ 
  467                                             (__mmask8)-1, (int)(R))) 
 
  469#define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \ 
  470  ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 
  471                                             (__v8di)(__m512i)(W), \ 
  472                                             (__mmask8)(U), (int)(R))) 
 
  474#define _mm512_maskz_cvt_roundps_epu64(U, A, R) \ 
  475  ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 
  476                                             (__v8di)_mm512_setzero_si512(), \ 
  477                                             (__mmask8)(U), (int)(R))) 
 
  481  return (__m512d)__builtin_convertvector((__v8di)__A, __v8df);
 
 
  486  return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
 
 
  493  return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
 
 
  498#define _mm512_cvt_roundepi64_pd(A, R) \ 
  499  ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 
  500                                            (__v8df)_mm512_setzero_pd(), \ 
  501                                            (__mmask8)-1, (int)(R))) 
 
  503#define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \ 
  504  ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 
  505                                            (__v8df)(__m512d)(W), \ 
  506                                            (__mmask8)(U), (int)(R))) 
 
  508#define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \ 
  509  ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 
  510                                            (__v8df)_mm512_setzero_pd(), \ 
  511                                            (__mmask8)(U), (int)(R))) 
 
  515  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
 
 
  523  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
 
 
  531  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
 
 
  537#define _mm512_cvt_roundepi64_ps(A, R) \ 
  538  ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 
  539                                           (__v8sf)_mm256_setzero_ps(), \ 
  540                                           (__mmask8)-1, (int)(R))) 
 
  542#define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \ 
  543  ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 
  544                                           (__v8sf)(__m256)(W), (__mmask8)(U), \ 
 
  547#define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \ 
  548  ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 
  549                                           (__v8sf)_mm256_setzero_ps(), \ 
  550                                           (__mmask8)(U), (int)(R))) 
 
  555  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
 
 
  563  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
 
 
  571  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
 
 
  577#define _mm512_cvtt_roundpd_epi64(A, R) \ 
  578  ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 
  579                                             (__v8di)_mm512_setzero_si512(), \ 
  580                                             (__mmask8)-1, (int)(R))) 
 
  582#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \ 
  583  ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 
  584                                             (__v8di)(__m512i)(W), \ 
  585                                             (__mmask8)(U), (int)(R))) 
 
  587#define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \ 
  588  ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 
  589                                             (__v8di)_mm512_setzero_si512(), \ 
  590                                             (__mmask8)(U), (int)(R))) 
 
  594  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
 
 
  602  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
 
 
  610  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
 
 
  616#define _mm512_cvtt_roundpd_epu64(A, R) \ 
  617  ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 
  618                                              (__v8di)_mm512_setzero_si512(), \ 
  619                                              (__mmask8)-1, (int)(R))) 
 
  621#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \ 
  622  ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 
  623                                              (__v8di)(__m512i)(W), \ 
  624                                              (__mmask8)(U), (int)(R))) 
 
  626#define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \ 
  627  ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 
  628                                              (__v8di)_mm512_setzero_si512(), \ 
  629                                              (__mmask8)(U), (int)(R))) 
 
  633  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
 
 
  641  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
 
 
  649  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
 
 
  655#define _mm512_cvtt_roundps_epi64(A, R) \ 
  656  ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 
  657                                             (__v8di)_mm512_setzero_si512(), \ 
  658                                             (__mmask8)-1, (int)(R))) 
 
  660#define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \ 
  661  ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 
  662                                             (__v8di)(__m512i)(W), \ 
  663                                             (__mmask8)(U), (int)(R))) 
 
  665#define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \ 
  666  ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 
  667                                             (__v8di)_mm512_setzero_si512(), \ 
  668                                             (__mmask8)(U), (int)(R))) 
 
  672  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
 
 
  680  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
 
 
  688  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
 
 
  694#define _mm512_cvtt_roundps_epu64(A, R) \ 
  695  ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 
  696                                              (__v8di)_mm512_setzero_si512(), \ 
  697                                              (__mmask8)-1, (int)(R))) 
 
  699#define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \ 
  700  ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 
  701                                              (__v8di)(__m512i)(W), \ 
  702                                              (__mmask8)(U), (int)(R))) 
 
  704#define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \ 
  705  ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 
  706                                              (__v8di)_mm512_setzero_si512(), \ 
  707                                              (__mmask8)(U), (int)(R))) 
 
  711  return (__m512d)__builtin_convertvector((__v8du)__A, __v8df);
 
 
  716  return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
 
 
  723  return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
 
 
  728#define _mm512_cvt_roundepu64_pd(A, R) \ 
  729  ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 
  730                                             (__v8df)_mm512_setzero_pd(), \ 
  731                                             (__mmask8)-1, (int)(R))) 
 
  733#define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \ 
  734  ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 
  735                                             (__v8df)(__m512d)(W), \ 
  736                                             (__mmask8)(U), (int)(R))) 
 
  739#define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \ 
  740  ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 
  741                                             (__v8df)_mm512_setzero_pd(), \ 
  742                                             (__mmask8)(U), (int)(R))) 
 
  747  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
 
 
  755  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
 
 
  763  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
 
 
  769#define _mm512_cvt_roundepu64_ps(A, R) \ 
  770  ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 
  771                                            (__v8sf)_mm256_setzero_ps(), \ 
  772                                            (__mmask8)-1, (int)(R))) 
 
  774#define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \ 
  775  ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 
  776                                            (__v8sf)(__m256)(W), (__mmask8)(U), \ 
 
  779#define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \ 
  780  ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 
  781                                            (__v8sf)_mm256_setzero_ps(), \ 
  782                                            (__mmask8)(U), (int)(R))) 
 
  784#define _mm512_range_pd(A, B, C) \ 
  785  ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 
  786                                           (__v8df)(__m512d)(B), (int)(C), \ 
  787                                           (__v8df)_mm512_setzero_pd(), \ 
  789                                           _MM_FROUND_CUR_DIRECTION)) 
 
  791#define _mm512_mask_range_pd(W, U, A, B, C) \ 
  792  ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 
  793                                           (__v8df)(__m512d)(B), (int)(C), \ 
  794                                           (__v8df)(__m512d)(W), (__mmask8)(U), \ 
  795                                           _MM_FROUND_CUR_DIRECTION)) 
 
  797#define _mm512_maskz_range_pd(U, A, B, C) \ 
  798  ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 
  799                                           (__v8df)(__m512d)(B), (int)(C), \ 
  800                                           (__v8df)_mm512_setzero_pd(), \ 
  802                                           _MM_FROUND_CUR_DIRECTION)) 
 
  804#define _mm512_range_round_pd(A, B, C, R) \ 
  805  ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 
  806                                           (__v8df)(__m512d)(B), (int)(C), \ 
  807                                           (__v8df)_mm512_setzero_pd(), \ 
  808                                           (__mmask8)-1, (int)(R))) 
 
  810#define _mm512_mask_range_round_pd(W, U, A, B, C, R) \ 
  811  ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 
  812                                           (__v8df)(__m512d)(B), (int)(C), \ 
  813                                           (__v8df)(__m512d)(W), (__mmask8)(U), \ 
 
  816#define _mm512_maskz_range_round_pd(U, A, B, C, R) \ 
  817  ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 
  818                                           (__v8df)(__m512d)(B), (int)(C), \ 
  819                                           (__v8df)_mm512_setzero_pd(), \ 
  820                                           (__mmask8)(U), (int)(R))) 
 
  822#define _mm512_range_ps(A, B, C) \ 
  823  ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 
  824                                          (__v16sf)(__m512)(B), (int)(C), \ 
  825                                          (__v16sf)_mm512_setzero_ps(), \ 
  827                                          _MM_FROUND_CUR_DIRECTION)) 
 
  829#define _mm512_mask_range_ps(W, U, A, B, C) \ 
  830  ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 
  831                                          (__v16sf)(__m512)(B), (int)(C), \ 
  832                                          (__v16sf)(__m512)(W), (__mmask16)(U), \ 
  833                                          _MM_FROUND_CUR_DIRECTION)) 
 
  835#define _mm512_maskz_range_ps(U, A, B, C) \ 
  836  ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 
  837                                          (__v16sf)(__m512)(B), (int)(C), \ 
  838                                          (__v16sf)_mm512_setzero_ps(), \ 
  840                                          _MM_FROUND_CUR_DIRECTION)) 
 
  842#define _mm512_range_round_ps(A, B, C, R) \ 
  843  ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 
  844                                          (__v16sf)(__m512)(B), (int)(C), \ 
  845                                          (__v16sf)_mm512_setzero_ps(), \ 
  846                                          (__mmask16)-1, (int)(R))) 
 
  848#define _mm512_mask_range_round_ps(W, U, A, B, C, R) \ 
  849  ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 
  850                                          (__v16sf)(__m512)(B), (int)(C), \ 
  851                                          (__v16sf)(__m512)(W), (__mmask16)(U), \ 
 
  854#define _mm512_maskz_range_round_ps(U, A, B, C, R) \ 
  855  ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 
  856                                          (__v16sf)(__m512)(B), (int)(C), \ 
  857                                          (__v16sf)_mm512_setzero_ps(), \ 
  858                                          (__mmask16)(U), (int)(R))) 
 
  860#define _mm_range_round_ss(A, B, C, R) \ 
  861  ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 
  862                                                (__v4sf)(__m128)(B), \ 
  863                                                (__v4sf)_mm_setzero_ps(), \ 
  864                                                (__mmask8) -1, (int)(C),\ 
 
  867#define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) 
  869#define _mm_mask_range_round_ss(W, U, A, B, C, R) \ 
  870  ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 
  871                                                (__v4sf)(__m128)(B), \ 
  872                                                (__v4sf)(__m128)(W),\ 
  873                                                (__mmask8)(U), (int)(C),\ 
 
  876#define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) 
  878#define _mm_maskz_range_round_ss(U, A, B, C, R) \ 
  879  ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 
  880                                                (__v4sf)(__m128)(B), \ 
  881                                                (__v4sf)_mm_setzero_ps(), \ 
  882                                                (__mmask8)(U), (int)(C),\ 
 
  885#define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 
  887#define _mm_range_round_sd(A, B, C, R) \ 
  888  ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 
  889                                                 (__v2df)(__m128d)(B), \ 
  890                                                 (__v2df)_mm_setzero_pd(), \ 
  891                                                 (__mmask8) -1, (int)(C),\ 
 
  894#define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) 
  896#define _mm_mask_range_round_sd(W, U, A, B, C, R) \ 
  897  ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 
  898                                                 (__v2df)(__m128d)(B), \ 
  899                                                 (__v2df)(__m128d)(W),\ 
  900                                                 (__mmask8)(U), (int)(C),\ 
 
  903#define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 
  905#define _mm_maskz_range_round_sd(U, A, B, C, R) \ 
  906  ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 
  907                                                 (__v2df)(__m128d)(B), \ 
  908                                                 (__v2df)_mm_setzero_pd(), \ 
  909                                                 (__mmask8)(U), (int)(C),\ 
 
  912#define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 
  914#define _mm512_reduce_pd(A, B) \ 
  915  ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 
  916                                            (__v8df)_mm512_setzero_pd(), \ 
  918                                            _MM_FROUND_CUR_DIRECTION)) 
 
  920#define _mm512_mask_reduce_pd(W, U, A, B) \ 
  921  ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 
  922                                            (__v8df)(__m512d)(W), \ 
  924                                            _MM_FROUND_CUR_DIRECTION)) 
 
  926#define _mm512_maskz_reduce_pd(U, A, B) \ 
  927  ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 
  928                                            (__v8df)_mm512_setzero_pd(), \ 
  930                                            _MM_FROUND_CUR_DIRECTION)) 
 
  932#define _mm512_reduce_ps(A, B) \ 
  933  ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 
  934                                           (__v16sf)_mm512_setzero_ps(), \ 
  936                                           _MM_FROUND_CUR_DIRECTION)) 
 
  938#define _mm512_mask_reduce_ps(W, U, A, B) \ 
  939  ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 
  940                                           (__v16sf)(__m512)(W), \ 
  942                                           _MM_FROUND_CUR_DIRECTION)) 
 
  944#define _mm512_maskz_reduce_ps(U, A, B) \ 
  945  ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 
  946                                           (__v16sf)_mm512_setzero_ps(), \ 
  948                                           _MM_FROUND_CUR_DIRECTION)) 
 
  950#define _mm512_reduce_round_pd(A, B, R) \ 
  951  ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 
  952                                            (__v8df)_mm512_setzero_pd(), \ 
  953                                            (__mmask8)-1, (int)(R))) 
 
  955#define _mm512_mask_reduce_round_pd(W, U, A, B, R) \ 
  956  ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 
  957                                            (__v8df)(__m512d)(W), \ 
  958                                            (__mmask8)(U), (int)(R))) 
 
  960#define _mm512_maskz_reduce_round_pd(U, A, B, R) \ 
  961  ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 
  962                                            (__v8df)_mm512_setzero_pd(), \ 
  963                                            (__mmask8)(U), (int)(R))) 
 
  965#define _mm512_reduce_round_ps(A, B, R) \ 
  966  ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 
  967                                           (__v16sf)_mm512_setzero_ps(), \ 
  968                                           (__mmask16)-1, (int)(R))) 
 
  970#define _mm512_mask_reduce_round_ps(W, U, A, B, R) \ 
  971  ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 
  972                                           (__v16sf)(__m512)(W), \ 
  973                                           (__mmask16)(U), (int)(R))) 
 
  975#define _mm512_maskz_reduce_round_ps(U, A, B, R) \ 
  976  ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 
  977                                           (__v16sf)_mm512_setzero_ps(), \ 
  978                                           (__mmask16)(U), (int)(R))) 
 
  980#define _mm_reduce_ss(A, B, C) \ 
  981  ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 
  982                                        (__v4sf)(__m128)(B), \ 
  983                                        (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 
  984                                        (int)(C), _MM_FROUND_CUR_DIRECTION)) 
 
  986#define _mm_mask_reduce_ss(W, U, A, B, C) \ 
  987  ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 
  988                                        (__v4sf)(__m128)(B), \ 
  989                                        (__v4sf)(__m128)(W), (__mmask8)(U), \ 
  990                                        (int)(C), _MM_FROUND_CUR_DIRECTION)) 
 
  992#define _mm_maskz_reduce_ss(U, A, B, C) \ 
  993  ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 
  994                                        (__v4sf)(__m128)(B), \ 
  995                                        (__v4sf)_mm_setzero_ps(), \ 
  996                                        (__mmask8)(U), (int)(C), \ 
  997                                        _MM_FROUND_CUR_DIRECTION)) 
 
  999#define _mm_reduce_round_ss(A, B, C, R) \ 
 1000  ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 
 1001                                        (__v4sf)(__m128)(B), \ 
 1002                                        (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 
 1003                                        (int)(C), (int)(R))) 
 
 1005#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ 
 1006  ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 
 1007                                        (__v4sf)(__m128)(B), \ 
 1008                                        (__v4sf)(__m128)(W), (__mmask8)(U), \ 
 1009                                        (int)(C), (int)(R))) 
 
 1011#define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ 
 1012  ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 
 1013                                        (__v4sf)(__m128)(B), \ 
 1014                                        (__v4sf)_mm_setzero_ps(), \ 
 1015                                        (__mmask8)(U), (int)(C), (int)(R))) 
 
 1017#define _mm_reduce_sd(A, B, C) \ 
 1018  ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 
 1019                                         (__v2df)(__m128d)(B), \ 
 1020                                         (__v2df)_mm_setzero_pd(), \ 
 1021                                         (__mmask8)-1, (int)(C), \ 
 1022                                         _MM_FROUND_CUR_DIRECTION)) 
 
 1024#define _mm_mask_reduce_sd(W, U, A, B, C) \ 
 1025  ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 
 1026                                         (__v2df)(__m128d)(B), \ 
 1027                                         (__v2df)(__m128d)(W), (__mmask8)(U), \ 
 1028                                         (int)(C), _MM_FROUND_CUR_DIRECTION)) 
 
 1030#define _mm_maskz_reduce_sd(U, A, B, C) \ 
 1031  ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 
 1032                                         (__v2df)(__m128d)(B), \ 
 1033                                         (__v2df)_mm_setzero_pd(), \ 
 1034                                         (__mmask8)(U), (int)(C), \ 
 1035                                         _MM_FROUND_CUR_DIRECTION)) 
 
 1037#define _mm_reduce_round_sd(A, B, C, R) \ 
 1038  ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 
 1039                                         (__v2df)(__m128d)(B), \ 
 1040                                         (__v2df)_mm_setzero_pd(), \ 
 1041                                         (__mmask8)-1, (int)(C), (int)(R))) 
 
 1043#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ 
 1044  ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 
 1045                                         (__v2df)(__m128d)(B), \ 
 1046                                         (__v2df)(__m128d)(W), (__mmask8)(U), \ 
 1047                                         (int)(C), (int)(R))) 
 
 1049#define _mm_maskz_reduce_round_sd(U, A, B, C, R) \ 
 1050  ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 
 1051                                         (__v2df)(__m128d)(B), \ 
 1052                                         (__v2df)_mm_setzero_pd(), \ 
 1053                                         (__mmask8)(U), (int)(C), (int)(R))) 
 
 1058  return (
__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
 
 
 1064  return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
 
 
 1070  return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
 
 
 1076  return (
__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
 
 
 1081  return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
 
 1082                                         0, 1, 0, 1, 0, 1, 0, 1,
 
 1083                                         0, 1, 0, 1, 0, 1, 0, 1);
 
 
 1088  return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
 
 
 1095  return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
 
 
 1102  return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
 
 1103                                         0, 1, 2, 3, 4, 5, 6, 7,
 
 1104                                         0, 1, 2, 3, 4, 5, 6, 7);
 
 
 1109  return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
 
 
 1116  return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
 
 
 1123  return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
 
 1124                                          0, 1, 0, 1, 0, 1, 0, 1);
 
 
 1129  return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
 
 
 1136  return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
 
 
 1143  return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
 
 1144                                          0, 1, 0, 1, 0, 1, 0, 1,
 
 1145                                          0, 1, 0, 1, 0, 1, 0, 1);
 
 
 1150  return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
 
 
 1157  return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
 
 
 1164  return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
 
 1165                                          0, 1, 2, 3, 4, 5, 6, 7,
 
 1166                                          0, 1, 2, 3, 4, 5, 6, 7);
 
 
 1171  return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
 
 
 1178  return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
 
 
 1185  return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
 
 1186                                          0, 1, 0, 1, 0, 1, 0, 1);
 
 
 1191  return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
 
 
 1198  return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
 
 
 1203#define _mm512_extractf32x8_ps(A, imm)                                         \ 
 1204  ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm),  \ 
 1205                                            (__v8sf)_mm256_setzero_ps(),       \ 
 
 1208#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \ 
 1209  ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ 
 1210                                            (__v8sf)(__m256)(W), \ 
 
 1213#define _mm512_maskz_extractf32x8_ps(U, A, imm) \ 
 1214  ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ 
 1215                                            (__v8sf)_mm256_setzero_ps(), \ 
 
 1218#define _mm512_extractf64x2_pd(A, imm)                                         \ 
 1219  ((__m128d)__builtin_ia32_extractf64x2_512_mask(                              \ 
 1220      (__v8df)(__m512d)(A), (int)(imm), (__v2df)_mm_setzero_pd(),              \ 
 
 1223#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \ 
 1224  ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ 
 1226                                                 (__v2df)(__m128d)(W), \ 
 
 1229#define _mm512_maskz_extractf64x2_pd(U, A, imm) \ 
 1230  ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ 
 1232                                                 (__v2df)_mm_setzero_pd(), \ 
 
 1235#define _mm512_extracti32x8_epi32(A, imm)                                      \ 
 1236  ((__m256i)__builtin_ia32_extracti32x8_mask(                                  \ 
 1237      (__v16si)(__m512i)(A), (int)(imm), (__v8si)_mm256_setzero_si256(),       \ 
 
 1240#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \ 
 1241  ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ 
 1242                                             (__v8si)(__m256i)(W), \ 
 
 1245#define _mm512_maskz_extracti32x8_epi32(U, A, imm) \ 
 1246  ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ 
 1247                                             (__v8si)_mm256_setzero_si256(), \ 
 
 1250#define _mm512_extracti64x2_epi64(A, imm)                                      \ 
 1251  ((__m128i)__builtin_ia32_extracti64x2_512_mask(                              \ 
 1252      (__v8di)(__m512i)(A), (int)(imm), (__v2di)_mm_setzero_si128(),           \ 
 
 1255#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \ 
 1256  ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ 
 1258                                                 (__v2di)(__m128i)(W), \ 
 
 1261#define _mm512_maskz_extracti64x2_epi64(U, A, imm) \ 
 1262  ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ 
 1264                                                 (__v2di)_mm_setzero_si128(), \ 
 
 1267#define _mm512_insertf32x8(A, B, imm) \ 
 1268  ((__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \ 
 1269                                      (__v8sf)(__m256)(B), (int)(imm))) 
 
 1271#define _mm512_mask_insertf32x8(W, U, A, B, imm) \ 
 1272  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 
 1273                                 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ 
 1274                                 (__v16sf)(__m512)(W))) 
 
 1276#define _mm512_maskz_insertf32x8(U, A, B, imm) \ 
 1277  ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 
 1278                                 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ 
 1279                                 (__v16sf)_mm512_setzero_ps())) 
 
 1281#define _mm512_insertf64x2(A, B, imm) \ 
 1282  ((__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \ 
 1283                                           (__v2df)(__m128d)(B), (int)(imm))) 
 
 1285#define _mm512_mask_insertf64x2(W, U, A, B, imm) \ 
 1286  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 
 1287                                  (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ 
 1288                                  (__v8df)(__m512d)(W))) 
 
 1290#define _mm512_maskz_insertf64x2(U, A, B, imm) \ 
 1291  ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 
 1292                                  (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ 
 1293                                  (__v8df)_mm512_setzero_pd())) 
 
 1295#define _mm512_inserti32x8(A, B, imm) \ 
 1296  ((__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \ 
 1297                                       (__v8si)(__m256i)(B), (int)(imm))) 
 
 1299#define _mm512_mask_inserti32x8(W, U, A, B, imm) \ 
 1300  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 
 1301                                 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ 
 1302                                 (__v16si)(__m512i)(W))) 
 
 1304#define _mm512_maskz_inserti32x8(U, A, B, imm) \ 
 1305  ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 
 1306                                 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ 
 1307                                 (__v16si)_mm512_setzero_si512())) 
 
 1309#define _mm512_inserti64x2(A, B, imm) \ 
 1310  ((__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \ 
 1311                                           (__v2di)(__m128i)(B), (int)(imm))) 
 
 1313#define _mm512_mask_inserti64x2(W, U, A, B, imm) \ 
 1314  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 
 1315                                  (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ 
 1316                                  (__v8di)(__m512i)(W))) 
 
 1318#define _mm512_maskz_inserti64x2(U, A, B, imm) \ 
 1319  ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 
 1320                                  (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ 
 1321                                  (__v8di)_mm512_setzero_si512())) 
 
 1323#define _mm512_mask_fpclass_ps_mask(U, A, imm) \ 
 1324  ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 
 1325                                               (int)(imm), (__mmask16)(U))) 
 
 1327#define _mm512_fpclass_ps_mask(A, imm) \ 
 1328  ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 
 1329                                               (int)(imm), (__mmask16)-1)) 
 
 1331#define _mm512_mask_fpclass_pd_mask(U, A, imm) \ 
 1332  ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 
 
 1335#define _mm512_fpclass_pd_mask(A, imm) \ 
 1336  ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 
 
 1339#define _mm_fpclass_sd_mask(A, imm) \ 
 1340  ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 
 
 1343#define _mm_mask_fpclass_sd_mask(U, A, imm) \ 
 1344  ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 
 
 1347#define _mm_fpclass_ss_mask(A, imm) \ 
 1348  ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 
 
 1351#define _mm_mask_fpclass_ss_mask(U, A, imm) \ 
 1352  ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 
 
 1355#undef __DEFAULT_FN_ATTRS512 
 1356#undef __DEFAULT_FN_ATTRS 
 1357#undef __DEFAULT_FN_ATTRS512_CONSTEXPR 
 1358#undef __DEFAULT_FN_ATTRS_CONSTEXPR 
#define __DEFAULT_FN_ATTRS
#define __DEFAULT_FN_ATTRS_CONSTEXPR
#define __DEFAULT_FN_ATTRS512_CONSTEXPR
#define __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_movm_epi64(__mmask8 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_ps(__mmask8 __U, __m512i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _kadd_mask16(__mmask16 __A, __mmask16 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _load_mask8(__mmask8 *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i32x2(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu64(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mullo_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi64(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi64(__m512i __W, __mmask8 __U, __m512d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_ps(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu64(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_pd(__m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu64(__mmask8 __U, __m256 __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi64(__m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi64(__m512i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask8(__mmask8 *__A, __mmask8 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f32x8(__m256 __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask8_u8(__mmask8 __A, __mmask8 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu64(__m512i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi64(__m512i __W, __mmask8 __U, __m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi64(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f64x2(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu64(__m512i __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu64(__mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_ps(__m256 __W, __mmask8 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_pd(__m512d __A, __m512d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _cvtu32_mask8(unsigned int __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR _kor_mask8(__mmask8 __A, __mmask8 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtepu64_ps(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR _kadd_mask8(__mmask8 __A, __mmask8 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_movepi32_mask(__m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu64(__m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu64_ps(__m256 __W, __mmask8 __U, __m512i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR _kxor_mask8(__mmask8 __A, __mmask8 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi64(__mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i32x8(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepi64_pd(__m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktestz_mask8_u8(__mmask8 __A, __mmask8 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR _kxnor_mask8(__mmask8 __A, __mmask8 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR _kandn_mask8(__mmask8 __A, __mmask8 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR _kand_mask8(__mmask8 __A, __mmask8 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu64(__m512i __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_movm_epi32(__mmask16 __A)
static __inline __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR _knot_mask8(__mmask8 __M)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_ps(__m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktestc_mask8_u8(__mmask8 __A, __mmask8 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu64(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi64(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi64(__m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu64_ps(__mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi64(__m512i __W, __mmask8 __U, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu64(__m512d __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask8_u32(__mmask8 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi64(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi64(__mmask8 __U, __m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu64(__m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_ps(__m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu64(__m512i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu64_pd(__m512i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_movepi64_mask(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_f32x2(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_broadcast_i64x2(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask8_u8(__mmask8 __A, __mmask8 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_pd(void)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
#define _MM_FROUND_CUR_DIRECTION
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_si512(void)
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...