11#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead." 
   14#ifndef __AVX512VLBWINTRIN_H 
   15#define __AVX512VLBWINTRIN_H 
   18#define __DEFAULT_FN_ATTRS128                                                  \ 
   19  __attribute__((__always_inline__, __nodebug__,                               \ 
   20                 __target__("avx512vl,avx512bw"), __min_vector_width__(128))) 
 
   21#define __DEFAULT_FN_ATTRS256                                                  \ 
   22  __attribute__((__always_inline__, __nodebug__,                               \ 
   23                 __target__("avx512vl,avx512bw"), __min_vector_width__(256))) 
 
   25#if defined(__cplusplus) && (__cplusplus >= 201103L) 
   26#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr 
   27#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr 
   29#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 
   30#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 
   35#define _mm_cmp_epi8_mask(a, b, p) \ 
   36  ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ 
   37                                          (__v16qi)(__m128i)(b), (int)(p), \ 
 
   40#define _mm_mask_cmp_epi8_mask(m, a, b, p) \ 
   41  ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ 
   42                                          (__v16qi)(__m128i)(b), (int)(p), \ 
 
   45#define _mm_cmp_epu8_mask(a, b, p) \ 
   46  ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ 
   47                                           (__v16qi)(__m128i)(b), (int)(p), \ 
 
   50#define _mm_mask_cmp_epu8_mask(m, a, b, p) \ 
   51  ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ 
   52                                           (__v16qi)(__m128i)(b), (int)(p), \ 
 
   55#define _mm256_cmp_epi8_mask(a, b, p) \ 
   56  ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ 
   57                                          (__v32qi)(__m256i)(b), (int)(p), \ 
 
   60#define _mm256_mask_cmp_epi8_mask(m, a, b, p) \ 
   61  ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ 
   62                                          (__v32qi)(__m256i)(b), (int)(p), \ 
 
   65#define _mm256_cmp_epu8_mask(a, b, p) \ 
   66  ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ 
   67                                           (__v32qi)(__m256i)(b), (int)(p), \ 
 
   70#define _mm256_mask_cmp_epu8_mask(m, a, b, p) \ 
   71  ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ 
   72                                           (__v32qi)(__m256i)(b), (int)(p), \ 
 
   75#define _mm_cmp_epi16_mask(a, b, p) \ 
   76  ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ 
   77                                         (__v8hi)(__m128i)(b), (int)(p), \ 
 
   80#define _mm_mask_cmp_epi16_mask(m, a, b, p) \ 
   81  ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ 
   82                                         (__v8hi)(__m128i)(b), (int)(p), \ 
 
   85#define _mm_cmp_epu16_mask(a, b, p) \ 
   86  ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ 
   87                                          (__v8hi)(__m128i)(b), (int)(p), \ 
 
   90#define _mm_mask_cmp_epu16_mask(m, a, b, p) \ 
   91  ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ 
   92                                          (__v8hi)(__m128i)(b), (int)(p), \ 
 
   95#define _mm256_cmp_epi16_mask(a, b, p) \ 
   96  ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ 
   97                                          (__v16hi)(__m256i)(b), (int)(p), \ 
 
  100#define _mm256_mask_cmp_epi16_mask(m, a, b, p) \ 
  101  ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ 
  102                                          (__v16hi)(__m256i)(b), (int)(p), \ 
 
  105#define _mm256_cmp_epu16_mask(a, b, p) \ 
  106  ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ 
  107                                           (__v16hi)(__m256i)(b), (int)(p), \ 
 
  110#define _mm256_mask_cmp_epu16_mask(m, a, b, p) \ 
  111  ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ 
  112                                           (__v16hi)(__m256i)(b), (int)(p), \ 
 
  115#define _mm_cmpeq_epi8_mask(A, B) \ 
  116    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 
 
  117#define _mm_mask_cmpeq_epi8_mask(k, A, B) \ 
  118    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 
 
  119#define _mm_cmpge_epi8_mask(A, B) \ 
  120    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 
 
  121#define _mm_mask_cmpge_epi8_mask(k, A, B) \ 
  122    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 
 
  123#define _mm_cmpgt_epi8_mask(A, B) \ 
  124    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 
 
  125#define _mm_mask_cmpgt_epi8_mask(k, A, B) \ 
  126    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 
 
  127#define _mm_cmple_epi8_mask(A, B) \ 
  128    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 
 
  129#define _mm_mask_cmple_epi8_mask(k, A, B) \ 
  130    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 
 
  131#define _mm_cmplt_epi8_mask(A, B) \ 
  132    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 
 
  133#define _mm_mask_cmplt_epi8_mask(k, A, B) \ 
  134    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 
 
  135#define _mm_cmpneq_epi8_mask(A, B) \ 
  136    _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 
 
  137#define _mm_mask_cmpneq_epi8_mask(k, A, B) \ 
  138    _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 
 
  140#define _mm256_cmpeq_epi8_mask(A, B) \ 
  141    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 
 
  142#define _mm256_mask_cmpeq_epi8_mask(k, A, B) \ 
  143    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 
 
  144#define _mm256_cmpge_epi8_mask(A, B) \ 
  145    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 
 
  146#define _mm256_mask_cmpge_epi8_mask(k, A, B) \ 
  147    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 
 
  148#define _mm256_cmpgt_epi8_mask(A, B) \ 
  149    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 
 
  150#define _mm256_mask_cmpgt_epi8_mask(k, A, B) \ 
  151    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 
 
  152#define _mm256_cmple_epi8_mask(A, B) \ 
  153    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 
 
  154#define _mm256_mask_cmple_epi8_mask(k, A, B) \ 
  155    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 
 
  156#define _mm256_cmplt_epi8_mask(A, B) \ 
  157    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 
 
  158#define _mm256_mask_cmplt_epi8_mask(k, A, B) \ 
  159    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 
 
  160#define _mm256_cmpneq_epi8_mask(A, B) \ 
  161    _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 
 
  162#define _mm256_mask_cmpneq_epi8_mask(k, A, B) \ 
  163    _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 
 
  165#define _mm_cmpeq_epu8_mask(A, B) \ 
  166    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 
 
  167#define _mm_mask_cmpeq_epu8_mask(k, A, B) \ 
  168    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 
 
  169#define _mm_cmpge_epu8_mask(A, B) \ 
  170    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 
 
  171#define _mm_mask_cmpge_epu8_mask(k, A, B) \ 
  172    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 
 
  173#define _mm_cmpgt_epu8_mask(A, B) \ 
  174    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 
 
  175#define _mm_mask_cmpgt_epu8_mask(k, A, B) \ 
  176    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 
 
  177#define _mm_cmple_epu8_mask(A, B) \ 
  178    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 
 
  179#define _mm_mask_cmple_epu8_mask(k, A, B) \ 
  180    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 
 
  181#define _mm_cmplt_epu8_mask(A, B) \ 
  182    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 
 
  183#define _mm_mask_cmplt_epu8_mask(k, A, B) \ 
  184    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 
 
  185#define _mm_cmpneq_epu8_mask(A, B) \ 
  186    _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 
 
  187#define _mm_mask_cmpneq_epu8_mask(k, A, B) \ 
  188    _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 
 
  190#define _mm256_cmpeq_epu8_mask(A, B) \ 
  191    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 
 
  192#define _mm256_mask_cmpeq_epu8_mask(k, A, B) \ 
  193    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 
 
  194#define _mm256_cmpge_epu8_mask(A, B) \ 
  195    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 
 
  196#define _mm256_mask_cmpge_epu8_mask(k, A, B) \ 
  197    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 
 
  198#define _mm256_cmpgt_epu8_mask(A, B) \ 
  199    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 
 
  200#define _mm256_mask_cmpgt_epu8_mask(k, A, B) \ 
  201    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 
 
  202#define _mm256_cmple_epu8_mask(A, B) \ 
  203    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 
 
  204#define _mm256_mask_cmple_epu8_mask(k, A, B) \ 
  205    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 
 
  206#define _mm256_cmplt_epu8_mask(A, B) \ 
  207    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 
 
  208#define _mm256_mask_cmplt_epu8_mask(k, A, B) \ 
  209    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 
 
  210#define _mm256_cmpneq_epu8_mask(A, B) \ 
  211    _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 
 
  212#define _mm256_mask_cmpneq_epu8_mask(k, A, B) \ 
  213    _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 
 
  215#define _mm_cmpeq_epi16_mask(A, B) \ 
  216    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 
 
  217#define _mm_mask_cmpeq_epi16_mask(k, A, B) \ 
  218    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 
 
  219#define _mm_cmpge_epi16_mask(A, B) \ 
  220    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 
 
  221#define _mm_mask_cmpge_epi16_mask(k, A, B) \ 
  222    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 
 
  223#define _mm_cmpgt_epi16_mask(A, B) \ 
  224    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 
 
  225#define _mm_mask_cmpgt_epi16_mask(k, A, B) \ 
  226    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 
 
  227#define _mm_cmple_epi16_mask(A, B) \ 
  228    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 
 
  229#define _mm_mask_cmple_epi16_mask(k, A, B) \ 
  230    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 
 
  231#define _mm_cmplt_epi16_mask(A, B) \ 
  232    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 
 
  233#define _mm_mask_cmplt_epi16_mask(k, A, B) \ 
  234    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 
 
  235#define _mm_cmpneq_epi16_mask(A, B) \ 
  236    _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 
 
  237#define _mm_mask_cmpneq_epi16_mask(k, A, B) \ 
  238    _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 
 
  240#define _mm256_cmpeq_epi16_mask(A, B) \ 
  241    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 
 
  242#define _mm256_mask_cmpeq_epi16_mask(k, A, B) \ 
  243    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 
 
  244#define _mm256_cmpge_epi16_mask(A, B) \ 
  245    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 
 
  246#define _mm256_mask_cmpge_epi16_mask(k, A, B) \ 
  247    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 
 
  248#define _mm256_cmpgt_epi16_mask(A, B) \ 
  249    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 
 
  250#define _mm256_mask_cmpgt_epi16_mask(k, A, B) \ 
  251    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 
 
  252#define _mm256_cmple_epi16_mask(A, B) \ 
  253    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 
 
  254#define _mm256_mask_cmple_epi16_mask(k, A, B) \ 
  255    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 
 
  256#define _mm256_cmplt_epi16_mask(A, B) \ 
  257    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 
 
  258#define _mm256_mask_cmplt_epi16_mask(k, A, B) \ 
  259    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 
 
  260#define _mm256_cmpneq_epi16_mask(A, B) \ 
  261    _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 
 
  262#define _mm256_mask_cmpneq_epi16_mask(k, A, B) \ 
  263    _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 
 
  265#define _mm_cmpeq_epu16_mask(A, B) \ 
  266    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 
 
  267#define _mm_mask_cmpeq_epu16_mask(k, A, B) \ 
  268    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 
 
  269#define _mm_cmpge_epu16_mask(A, B) \ 
  270    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 
 
  271#define _mm_mask_cmpge_epu16_mask(k, A, B) \ 
  272    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 
 
  273#define _mm_cmpgt_epu16_mask(A, B) \ 
  274    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 
 
  275#define _mm_mask_cmpgt_epu16_mask(k, A, B) \ 
  276    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 
 
  277#define _mm_cmple_epu16_mask(A, B) \ 
  278    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 
 
  279#define _mm_mask_cmple_epu16_mask(k, A, B) \ 
  280    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 
 
  281#define _mm_cmplt_epu16_mask(A, B) \ 
  282    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 
 
  283#define _mm_mask_cmplt_epu16_mask(k, A, B) \ 
  284    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 
 
  285#define _mm_cmpneq_epu16_mask(A, B) \ 
  286    _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 
 
  287#define _mm_mask_cmpneq_epu16_mask(k, A, B) \ 
  288    _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 
 
  290#define _mm256_cmpeq_epu16_mask(A, B) \ 
  291    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 
 
  292#define _mm256_mask_cmpeq_epu16_mask(k, A, B) \ 
  293    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 
 
  294#define _mm256_cmpge_epu16_mask(A, B) \ 
  295    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 
 
  296#define _mm256_mask_cmpge_epu16_mask(k, A, B) \ 
  297    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 
 
  298#define _mm256_cmpgt_epu16_mask(A, B) \ 
  299    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 
 
  300#define _mm256_mask_cmpgt_epu16_mask(k, A, B) \ 
  301    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 
 
  302#define _mm256_cmple_epu16_mask(A, B) \ 
  303    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 
 
  304#define _mm256_mask_cmple_epu16_mask(k, A, B) \ 
  305    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 
 
  306#define _mm256_cmplt_epu16_mask(A, B) \ 
  307    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 
 
  308#define _mm256_mask_cmplt_epu16_mask(k, A, B) \ 
  309    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 
 
  310#define _mm256_cmpneq_epu16_mask(A, B) \ 
  311    _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 
 
  312#define _mm256_mask_cmpneq_epu16_mask(k, A, B) \ 
  313    _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 
 
  317  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
  324  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
  331  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
  338  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
  345  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
  352  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
  359  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
  366  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
  373  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
  380  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
  387  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
  394  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
  401  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
  408  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
  415  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
  422  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
  429  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
  436  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
  443  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
  450  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
  457  return (__m128i) __builtin_ia32_selectb_128 ((
__mmask16) __U,
 
 
  464  return (__m256i) __builtin_ia32_selectb_256 ((
__mmask32) __U,
 
 
  471  return (__m128i) __builtin_ia32_selectw_128 ((
__mmask8) __U,
 
 
  478  return (__m256i) __builtin_ia32_selectw_256 ((
__mmask16) __U,
 
 
  485  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
  492  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
  499  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
  506  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
  513  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
  520  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
  527  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
  534  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
  541  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
 
 
  549  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
 
 
  557  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
 
 
  565  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
 
 
  573  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
 
 
  581  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
 
 
  589  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
 
 
  597  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
 
 
  605  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
 
 
  613  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
 
 
  621  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
 
 
  629  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
 
 
  637  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
 
 
  645  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
 
 
  653  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
 
 
  661  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
 
 
  669  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
  677  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
  685  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
  693  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
  701  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
  709  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
  717  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
  725  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
  733  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
  741  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
  749  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
  757  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
  765  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
  773  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
  781  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
  789  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
  796  return (__m128i)__builtin_ia32_selectb_128(
 
 
  802  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
  809  return (__m256i)__builtin_ia32_selectb_256(
 
 
  815  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
  822  return (__m128i)__builtin_ia32_selectw_128(
 
 
  828  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
  835  return (__m256i)__builtin_ia32_selectw_256(
 
 
  841  return (__m256i)__builtin_ia32_selectw_256(
 
 
  848  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
 
 
  855  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
 
 
  862  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
 
 
  869  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
 
 
  876  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
 
 
  883  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
 
 
  890  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
 
 
  897  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
 
 
  904  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
 
 
  911  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
 
 
  918  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
 
 
  925  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
 
 
  932  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
 
 
  939  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
 
 
  946  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
 
 
  953  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
 
 
  960  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
 
 
  967  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
 
 
  974  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
 
 
  981  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
 
 
  988  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
 
 
  995  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
 
 
 1002  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
 
 
 1009  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
 
 
 1016  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
 
 
 1023  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
 
 
 1030  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
 
 
 1037  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
 
 
 1044  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
 
 
 1051  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
 
 
 1058  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
 
 
 1065  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
 
 
 1072  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
 1079  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
 1086  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
 1093  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
 1101  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
 1109  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
 1117  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
 1125  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
 1133  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1141  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1149  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1157  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1165  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
 1173  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
 1181  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
 1189  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
 1197  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1205  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1213  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1221  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1229  return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
 
 
 1237  return (__m128i)__builtin_ia32_selectw_128(__U,
 
 
 1246  return (__m128i)__builtin_ia32_selectw_128(__U,
 
 
 1255  return (__m128i)__builtin_ia32_selectw_128(__U,
 
 
 1263  return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
 
 
 1271  return (__m256i)__builtin_ia32_selectw_256(__U,
 
 
 1280  return (__m256i)__builtin_ia32_selectw_256(__U,
 
 
 1289  return (__m256i)__builtin_ia32_selectw_256(__U,
 
 
 1296  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1303  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1311  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1318  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1325  return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
 
 
 1332  return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
 
 
 1339  return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
 
 
 1346  return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
 
 
 1353  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
 
 
 1360  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
 
 
 1367  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
 
 
 1374  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
 
 
 1381  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
 
 
 1388  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
 
 
 1395  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
 
 
 1402  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
 
 
 1409  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
 
 
 1416  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
 
 
 1423  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
 
 
 1430  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
 
 
 1437  return (__m128i)__builtin_shufflevector(
 
 1438      __builtin_convertvector((__v8hi)__A, __v8qi),
 
 1439      (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
 
 
 1445  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
 
 
 1452  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
 
 
 1460  __builtin_ia32_pmovwb128mem_mask ((__v16qi *) 
__P, (__v8hi) __A, __M);
 
 
 1467  __builtin_ia32_pmovswb128mem_mask ((__v16qi *) 
__P, (__v8hi) __A, __M);
 
 
 1473  __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) 
__P, (__v8hi) __A, __M);
 
 
 1478  return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi);
 
 
 1483  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
 
 
 1490  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
 
 
 1498  __builtin_ia32_pmovwb256mem_mask ((__v16qi *) 
__P, (__v16hi) __A, __M);
 
 
 1504  __builtin_ia32_pmovswb256mem_mask ((__v16qi *) 
__P, (__v16hi) __A, __M);
 
 
 1510  __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) 
__P, (__v16hi) __A, __M);
 
 
 1515  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1522  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1529  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1536  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1543  return (__m128i)__builtin_ia32_selectw_128(
 
 
 1549  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1556  return (__m256i)__builtin_ia32_selectw_256(
 
 
 1562  return (__m256i)__builtin_ia32_selectw_256(
 
 
 1569  return (__m128i)__builtin_ia32_selectw_128(
 
 
 1575  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1582  return (__m256i)__builtin_ia32_selectw_256(
 
 
 1588  return (__m256i)__builtin_ia32_selectw_256(
 
 
 1595  return (__m128i)__builtin_ia32_selectb_128(
 
 
 1601  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
 1608  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
 1615  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
 1622  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1629  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1636  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1643  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1650  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
 1657  return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
 
 
 1664  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
 1671  return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
 
 
 1678  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1685  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1692  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1699  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1707  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1715  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1723  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1731  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1740  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1748  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1756  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1764  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1770#define _mm_mask_shufflehi_epi16(W, U, A, imm) \ 
 1771  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 
 1772                                       (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ 
 1773                                       (__v8hi)(__m128i)(W))) 
 
 1775#define _mm_maskz_shufflehi_epi16(U, A, imm) \ 
 1776  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 
 1777                                       (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ 
 1778                                       (__v8hi)_mm_setzero_si128())) 
 
 1780#define _mm256_mask_shufflehi_epi16(W, U, A, imm) \ 
 1781  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 
 1782                                       (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ 
 1783                                       (__v16hi)(__m256i)(W))) 
 
 1785#define _mm256_maskz_shufflehi_epi16(U, A, imm) \ 
 1786  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 
 1787                                       (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ 
 1788                                       (__v16hi)_mm256_setzero_si256())) 
 
 1790#define _mm_mask_shufflelo_epi16(W, U, A, imm) \ 
 1791  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 
 1792                                       (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ 
 1793                                       (__v8hi)(__m128i)(W))) 
 
 1795#define _mm_maskz_shufflelo_epi16(U, A, imm) \ 
 1796  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 
 1797                                       (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ 
 1798                                       (__v8hi)_mm_setzero_si128())) 
 
 1800#define _mm256_mask_shufflelo_epi16(W, U, A, imm) \ 
 1801  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 
 1802                                       (__v16hi)_mm256_shufflelo_epi16((A), \ 
 1804                                       (__v16hi)(__m256i)(W))) 
 
 1806#define _mm256_maskz_shufflelo_epi16(U, A, imm) \ 
 1807  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 
 1808                                       (__v16hi)_mm256_shufflelo_epi16((A), \ 
 1810                                       (__v16hi)_mm256_setzero_si256())) 
 
 1815  return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B);
 
 
 1821  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1829  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1837  return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B);
 
 
 1843  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1851  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1859  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1867  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1875  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1883  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1891  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1899  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1907  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1914  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1922  return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B);
 
 
 1928  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1936  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1944  return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B);
 
 
 1950  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1958  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 1966  return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B);
 
 
 1972  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1980  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 1988  return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B);
 
 
 1994  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 2002  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 2010  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 2018  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 2026  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 2034  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 2041  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 2048  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 2056  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 2063  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 2071  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 2079  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 2087  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 2095  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 2102  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 2109  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
 
 
 2116  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 2123  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
 
 
 2130  return (__m128i) __builtin_ia32_selectw_128 ((
__mmask8) __U,
 
 
 2137  return (__m128i) __builtin_ia32_selectw_128 ((
__mmask8) __U,
 
 
 2144  return (__m256i) __builtin_ia32_selectw_256 ((
__mmask16) __U,
 
 
 2151  return (__m256i) __builtin_ia32_selectw_256 ((
__mmask16) __U,
 
 
 2158  return (__m128i) __builtin_ia32_selectb_128 ((
__mmask16) __U,
 
 
 2165  return (__m128i) __builtin_ia32_selectb_128 ((
__mmask16) __U,
 
 
 2172  return (__m256i) __builtin_ia32_selectb_256 ((
__mmask32) __U,
 
 
 2179  return (__m256i) __builtin_ia32_selectb_256 ((
__mmask32) __U,
 
 
 2187  return (__m128i) __builtin_ia32_selectb_128(__M,
 
 
 2195 return (__m128i) __builtin_ia32_selectb_128(__M,
 
 
 2203  return (__m256i) __builtin_ia32_selectb_256(__M,
 
 
 2211  return (__m256i) __builtin_ia32_selectb_256(__M,
 
 
 2219  struct __loadu_epi16 {
 
 2222  return ((
const struct __loadu_epi16*)
__P)->__v;
 
 
 2228  return (__m128i) __builtin_ia32_loaddquhi128_mask ((
const __v8hi *) 
__P,
 
 
 2236  return (__m128i) __builtin_ia32_loaddquhi128_mask ((
const __v8hi *) 
__P,
 
 
 2245  struct __loadu_epi16 {
 
 2248  return ((
const struct __loadu_epi16*)
__P)->__v;
 
 
 2254  return (__m256i) __builtin_ia32_loaddquhi256_mask ((
const __v16hi *) 
__P,
 
 
 2262  return (__m256i) __builtin_ia32_loaddquhi256_mask ((
const __v16hi *) 
__P,
 
 
 2271  struct __loadu_epi8 {
 
 2274  return ((
const struct __loadu_epi8*)
__P)->__v;
 
 
 2280  return (__m128i) __builtin_ia32_loaddquqi128_mask ((
const __v16qi *) 
__P,
 
 
 2288  return (__m128i) __builtin_ia32_loaddquqi128_mask ((
const __v16qi *) 
__P,
 
 
 2297  struct __loadu_epi8 {
 
 2300  return ((
const struct __loadu_epi8*)
__P)->__v;
 
 
 2306  return (__m256i) __builtin_ia32_loaddquqi256_mask ((
const __v32qi *) 
__P,
 
 
 2314  return (__m256i) __builtin_ia32_loaddquqi256_mask ((
const __v32qi *) 
__P,
 
 
 2323  struct __storeu_epi16 {
 
 2326  ((
struct __storeu_epi16*)
__P)->
__v = __A;
 
 
 2332  __builtin_ia32_storedquhi128_mask ((__v8hi *) 
__P,
 
 
 2340  struct __storeu_epi16 {
 
 2343  ((
struct __storeu_epi16*)
__P)->
__v = __A;
 
 
 2349  __builtin_ia32_storedquhi256_mask ((__v16hi *) 
__P,
 
 
 2357  struct __storeu_epi8 {
 
 2360  ((
struct __storeu_epi8*)
__P)->
__v = __A;
 
 
 2366  __builtin_ia32_storedquqi128_mask ((__v16qi *) 
__P,
 
 
 2374  struct __storeu_epi8 {
 
 2377  ((
struct __storeu_epi8*)
__P)->
__v = __A;
 
 
 2383  __builtin_ia32_storedquqi256_mask ((__v32qi *) 
__P,
 
 
 2494  return (
__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
 
 
 2500  return (
__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
 
 
 2506  return (
__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
 
 
 2512  return (
__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
 
 
 2518  return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
 
 
 2524  return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
 
 
 2530  return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
 
 
 2536  return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
 
 
 2542  return (__m128i)__builtin_ia32_selectb_128(__M,
 
 
 2550  return (__m128i)__builtin_ia32_selectb_128(__M,
 
 
 2558  return (__m256i)__builtin_ia32_selectb_256(__M,
 
 
 2566  return (__m256i)__builtin_ia32_selectb_256(__M,
 
 
 2574  return (__m128i)__builtin_ia32_selectw_128(__M,
 
 
 2582  return (__m128i)__builtin_ia32_selectw_128(__M,
 
 
 2590  return (__m256i)__builtin_ia32_selectw_256(__M,
 
 
 2598  return (__m256i)__builtin_ia32_selectw_256(__M,
 
 
 2606  return (__m256i) __builtin_ia32_selectw_256 (__M,
 
 
 2614  return (__m256i) __builtin_ia32_selectw_256(__M,
 
 
 2622  return (__m128i) __builtin_ia32_selectw_128(__M,
 
 
 2630  return (__m128i) __builtin_ia32_selectw_128(__M,
 
 
 2638  return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A);
 
 
 2644  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
 
 
 2653  return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
 
 
 2661  return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A);
 
 
 2668  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
 
 
 2677  return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
 
 
 2682#define _mm_mask_alignr_epi8(W, U, A, B, N) \ 
 2683  ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ 
 2684                                 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ 
 2685                                 (__v16qi)(__m128i)(W))) 
 
 2687#define _mm_maskz_alignr_epi8(U, A, B, N) \ 
 2688  ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ 
 2689                                 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ 
 2690                                 (__v16qi)_mm_setzero_si128())) 
 
 2692#define _mm256_mask_alignr_epi8(W, U, A, B, N) \ 
 2693  ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ 
 2694                              (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ 
 2695                              (__v32qi)(__m256i)(W))) 
 
 2697#define _mm256_maskz_alignr_epi8(U, A, B, N) \ 
 2698  ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ 
 2699                              (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ 
 2700                              (__v32qi)_mm256_setzero_si256())) 
 
 2702#define _mm_dbsad_epu8(A, B, imm) \ 
 2703  ((__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \ 
 2704                                       (__v16qi)(__m128i)(B), (int)(imm))) 
 
 2706#define _mm_mask_dbsad_epu8(W, U, A, B, imm) \ 
 2707  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 
 2708                                      (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ 
 2709                                      (__v8hi)(__m128i)(W))) 
 
 2711#define _mm_maskz_dbsad_epu8(U, A, B, imm) \ 
 2712  ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 
 2713                                      (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ 
 2714                                      (__v8hi)_mm_setzero_si128())) 
 
 2716#define _mm256_dbsad_epu8(A, B, imm) \ 
 2717  ((__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \ 
 2718                                       (__v32qi)(__m256i)(B), (int)(imm))) 
 
 2720#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \ 
 2721  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 
 2722                                  (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ 
 2723                                  (__v16hi)(__m256i)(W))) 
 
 2725#define _mm256_maskz_dbsad_epu8(U, A, B, imm) \ 
 2726  ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 
 2727                                  (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ 
 2728                                  (__v16hi)_mm256_setzero_si256())) 
 
 2732  return __builtin_reduce_add((__v8hi)__W);
 
 
 2737  return __builtin_reduce_mul((__v8hi)__W);
 
 
 2742  return __builtin_reduce_and((__v8hi)__W);
 
 
 2747  return __builtin_reduce_or((__v8hi)__W);
 
 
 2753  return __builtin_reduce_add((__v8hi)__W);
 
 
 2759  return __builtin_reduce_mul((__v8hi)__W);
 
 
 2765  return __builtin_reduce_and((__v8hi)__W);
 
 
 2771  return __builtin_reduce_or((__v8hi)__W);
 
 
 2776  return __builtin_reduce_max((__v8hi)__V);
 
 
 2781  return __builtin_reduce_max((__v8hu)__V);
 
 
 2786  return __builtin_reduce_min((__v8hi)__V);
 
 
 2791  return __builtin_reduce_min((__v8hu)__V);
 
 
 2797  return __builtin_reduce_max((__v8hi)__V);
 
 
 2803  return __builtin_reduce_max((__v8hu)__V);
 
 
 2809  return __builtin_reduce_min((__v8hi)__V);
 
 
 2815  return __builtin_reduce_min((__v8hu)__V);
 
 
 2820  return __builtin_reduce_add((__v16hi)__W);
 
 
 2825  return __builtin_reduce_mul((__v16hi)__W);
 
 
 2830  return __builtin_reduce_and((__v16hi)__W);
 
 
 2835  return __builtin_reduce_or((__v16hi)__W);
 
 
 2841  return __builtin_reduce_add((__v16hi)__W);
 
 
 2847  return __builtin_reduce_mul((__v16hi)__W);
 
 
 2853  return __builtin_reduce_and((__v16hi)__W);
 
 
 2859  return __builtin_reduce_or((__v16hi)__W);
 
 
 2864  return __builtin_reduce_max((__v16hi)__V);
 
 
 2869  return __builtin_reduce_max((__v16hu)__V);
 
 
 2874  return __builtin_reduce_min((__v16hi)__V);
 
 
 2879  return __builtin_reduce_min((__v16hu)__V);
 
 
 2885  return __builtin_reduce_max((__v16hi)__V);
 
 
 2891  return __builtin_reduce_max((__v16hu)__V);
 
 
 2897  return __builtin_reduce_min((__v16hi)__V);
 
 
 2903  return __builtin_reduce_min((__v16hu)__V);
 
 
 2908  return __builtin_reduce_add((__v16qs)__W);
 
 
 2913  return __builtin_reduce_mul((__v16qs)__W);
 
 
 2918  return __builtin_reduce_and((__v16qs)__W);
 
 
 2923  return __builtin_reduce_or((__v16qs)__W);
 
 
 2929  return __builtin_reduce_add((__v16qs)__W);
 
 
 2935  return __builtin_reduce_mul((__v16qs)__W);
 
 
 2941  return __builtin_reduce_and((__v16qs)__W);
 
 
 2947  return __builtin_reduce_or((__v16qs)__W);
 
 
 2952  return __builtin_reduce_max((__v16qs)__V);
 
 
 2957  return __builtin_reduce_max((__v16qu)__V);
 
 
 2962  return __builtin_reduce_min((__v16qs)__V);
 
 
 2967  return __builtin_reduce_min((__v16qu)__V);
 
 
 2973  return __builtin_reduce_max((__v16qs)__V);
 
 
 2979  return __builtin_reduce_max((__v16qu)__V);
 
 
 2985  return __builtin_reduce_min((__v16qs)__V);
 
 
 2991  return __builtin_reduce_min((__v16qu)__V);
 
 
 2996  return __builtin_reduce_add((__v32qs)__W);
 
 
 3001  return __builtin_reduce_mul((__v32qs)__W);
 
 
 3006  return __builtin_reduce_and((__v32qs)__W);
 
 
 3011  return __builtin_reduce_or((__v32qs)__W);
 
 
 3017  return __builtin_reduce_add((__v32qs)__W);
 
 
 3023  return __builtin_reduce_mul((__v32qs)__W);
 
 
 3029  return __builtin_reduce_and((__v32qs)__W);
 
 
 3035  return __builtin_reduce_or((__v32qs)__W);
 
 
 3040  return __builtin_reduce_max((__v32qs)__V);
 
 
 3045  return __builtin_reduce_max((__v32qu)__V);
 
 
 3050  return __builtin_reduce_min((__v32qs)__V);
 
 
 3055  return __builtin_reduce_min((__v32qu)__V);
 
 
 3061  return __builtin_reduce_max((__v32qs)__V);
 
 
 3067  return __builtin_reduce_max((__v32qu)__V);
 
 
 3073  return __builtin_reduce_min((__v32qs)__V);
 
 
 3079  return __builtin_reduce_min((__v32qu)__V);
 
 
 3082#undef __DEFAULT_FN_ATTRS128 
 3083#undef __DEFAULT_FN_ATTRS256 
 3084#undef __DEFAULT_FN_ATTRS128_CONSTEXPR 
 3085#undef __DEFAULT_FN_ATTRS256_CONSTEXPR 
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
#define __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi16(__m256i __a, __m128i __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a right by the number of bits giv...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epu16(__m256i __a, __m256i __b)
Compares the corresponding unsigned 16-bit integers in the two 256-bit vectors of [16 x i16] in __a a...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mulhi_epi16(__m256i __a, __m256i __b)
Multiplies signed 16-bit integer elements of two 256-bit vectors of [16 x i16], and returns the upper...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_adds_epu8(__m256i __a, __m256i __b)
Adds 8-bit integers from corresponding bytes of two 256-bit integer vectors using unsigned saturation...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maddubs_epi16(__m256i __a, __m256i __b)
Multiplies each unsigned byte from the 256-bit integer vector in __a with the corresponding signed by...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mulhi_epu16(__m256i __a, __m256i __b)
Multiplies unsigned 16-bit integer elements of two 256-bit vectors of [16 x i16], and returns the upp...
#define __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_broadcastb_epi8(__m128i __X)
Broadcasts the low byte from the 128-bit integer vector in __X to all bytes of the 128-bit result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepu8_epi16(__m128i __V)
Zero-extends bytes from the 128-bit integer vector in __V and returns the 16-bit values in the corres...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_subs_epu16(__m256i __a, __m256i __b)
Subtracts 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16] using unsi...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_packus_epi32(__m256i __V1, __m256i __V2)
Converts elements from two 256-bit vectors of [8 x i32] to 16-bit integers using unsigned saturation,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastb_epi8(__m128i __X)
Broadcasts the low byte from the 128-bit integer vector in __X to all bytes of the 256-bit result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpacklo_epi16(__m256i __a, __m256i __b)
Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors of [16 x i16] in __a and __...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mullo_epi16(__m256i __a, __m256i __b)
Multiplies signed 16-bit integer elements of two 256-bit vectors of [16 x i16], and returns the lower...
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_broadcastw_epi16(__m128i __X)
Broadcasts the low element from the 128-bit vector of [8 x i16] in __X to all elements of the result'...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_adds_epi16(__m256i __a, __m256i __b)
Adds 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16] using signed sa...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epi8(__m256i __a, __m256i __b)
Compares the corresponding signed bytes in the two 256-bit integer vectors in __a and __b and returns...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi16(__m256i __a, __m128i __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a left by the number of bits spec...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srli_epi16(__m256i __a, int __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a right by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epi16(__m256i __a, __m256i __b)
Compares the corresponding signed 16-bit integers in the two 256-bit vectors of [16 x i16] in __a and...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_shuffle_epi8(__m256i __a, __m256i __b)
Shuffles 8-bit integers in the 256-bit integer vector __a according to control information in the 256...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi8_epi16(__m128i __V)
Sign-extends bytes from the 128-bit integer vector in __V and returns the 16-bit values in the corres...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpacklo_epi8(__m256i __a, __m256i __b)
Unpacks and interleaves 8-bit integers from parts of the 256-bit integer vectors in __a and __b to fo...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_broadcastw_epi16(__m128i __X)
Broadcasts the low element from the 128-bit vector of [8 x i16] in __X to all elements of the result'...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srai_epi16(__m256i __a, int __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a right by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi8(__m256i __a, __m256i __b)
Adds 8-bit integers from corresponding bytes of two 256-bit integer vectors and returns the lower 8 b...
#define __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epu16(__m256i __a, __m256i __b)
Compares the corresponding unsigned 16-bit integers in the two 256-bit vectors of [16 x i16] in __a a...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_subs_epu8(__m256i __a, __m256i __b)
Subtracts 8-bit integers from corresponding bytes of two 256-bit integer vectors using unsigned satur...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_subs_epi16(__m256i __a, __m256i __b)
Subtracts 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16] using sign...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_abs_epi8(__m256i __a)
Computes the absolute value of each signed byte in the 256-bit integer vector __a and returns each va...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpackhi_epi8(__m256i __a, __m256i __b)
Unpacks and interleaves 8-bit integers from parts of the 256-bit integer vectors in __a and __b to fo...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi16(__m256i __a, __m256i __b)
Subtracts 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16].
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_si256(__m256i __a, __m256i __b)
Computes the bitwise AND of the 256-bit integer vectors in __a and __b.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_packus_epi16(__m256i __a, __m256i __b)
Converts elements from two 256-bit vectors of [16 x i16] to 8-bit integers using unsigned saturation,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_abs_epi16(__m256i __a)
Computes the absolute value of each signed 16-bit element in the 256-bit vector of [16 x i16] in __a ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epu8(__m256i __a, __m256i __b)
Compares the corresponding unsigned bytes in the two 256-bit integer vectors in __a and __b and retur...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi16(__m256i __a, __m256i __b)
Adds 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16] and returns the...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epu8(__m256i __a, __m256i __b)
Compares the corresponding unsigned bytes in the two 256-bit integer vectors in __a and __b and retur...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi8(__m256i __a, __m256i __b)
Subtracts 8-bit integers from corresponding bytes of two 256-bit integer vectors.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_min_epi16(__m256i __a, __m256i __b)
Compares the corresponding signed 16-bit integers in the two 256-bit vectors of [16 x i16] in __a and...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_avg_epu16(__m256i __a, __m256i __b)
Computes the averages of the corresponding unsigned 16-bit integers in the two 256-bit vectors of [16...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi16(__m256i __a, __m128i __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a right by the number of bits giv...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_adds_epu16(__m256i __a, __m256i __b)
Adds 16-bit integers from corresponding elements of two 256-bit vectors of [16 x i16] using unsigned ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_madd_epi16(__m256i __a, __m256i __b)
Multiplies corresponding 16-bit elements of two 256-bit vectors of [16 x i16], forming 32-bit interme...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_max_epi8(__m256i __a, __m256i __b)
Compares the corresponding signed bytes in the two 256-bit integer vectors in __a and __b and returns...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_unpackhi_epi16(__m256i __a, __m256i __b)
Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors of [16 x i16] in __a and __...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_subs_epi8(__m256i __a, __m256i __b)
Subtracts 8-bit integers from corresponding bytes of two 256-bit integer vectors using signed saturat...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_packs_epi32(__m256i __a, __m256i __b)
Converts the elements of two 256-bit vectors of [8 x i32] to 16-bit integers using signed saturation,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mulhrs_epi16(__m256i __a, __m256i __b)
Multiplies signed 16-bit integer elements of two 256-bit vectors of [16 x i16], truncates the 32-bit ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_adds_epi8(__m256i __a, __m256i __b)
Adds 8-bit integers from corresponding bytes of two 256-bit integer vectors using signed saturation,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_slli_epi16(__m256i __a, int __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a left by __count bits,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_packs_epi16(__m256i __a, __m256i __b)
Converts the elements of two 256-bit vectors of [16 x i16] to 8-bit integers using signed saturation,...
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_avg_epu8(__m256i __a, __m256i __b)
Computes the averages of the corresponding unsigned bytes in the two 256-bit integer vectors in __a a...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W)
static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epu16(__m128i __V)
#define _mm256_mask_cmpeq_epi16_mask(k, A, B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_mul_epi8(__m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epi8(__m256i __V)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi16(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastb_epi8(__m128i __O, __mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_movepi8_mask(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi8(__m128i __W, __mmask16 __U, void const *__P)
static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi8(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastb_epi8(__m256i __O, __mmask32 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
#define _mm256_cmpeq_epi8_mask(A, B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi16_mask(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A)
#define _mm_cmpneq_epi16_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_and_epi16(__m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
#define _mm_mask_cmpeq_epi8_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_abs_epi8(__mmask32 __U, __m256i __A)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_and_epi8(__m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srav_epi16(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_mul_epi16(__mmask16 __M, __m256i __W)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastb_epi8(__mmask32 __M, __m128i __A)
#define _mm_cmpneq_epi8_mask(A, B)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi16_mask(__m128i __A, __m128i __B)
#define _mm256_mask_cmpneq_epi8_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epi16(__m256i __V)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_add_epi8(__m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi16(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi16(void *__P, __mmask16 __U, __m256i __A)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_mul_epi16(__m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V)
#define _mm256_cmpneq_epi8_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_and_epi16(__mmask8 __M, __m128i __W)
static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_test_epi8_mask(__m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epi16(__m128i __V)
static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epu16(__m256i __V)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi16(__mmask8 __M, short __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi8(__mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_or_epi16(__m128i __W)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi16_mask(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi8(__m128i __O, __mmask16 __M, char __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_and_epi16(__m256i __W)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_movepi8_mask(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
#define _mm_cmpeq_epi8_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi16(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi8(__mmask32 __M, char __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_testn_epi8_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi16(__mmask16 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi8(__m256i __W, __mmask32 __U, void const *__P)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_mul_epi8(__m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_add_epi8(__m128i __W)
static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epu16(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi16(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi8(void *__P, __mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi16(__mmask16 __M, short __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epi8(__m256i __V)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_add_epi16(__m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi16(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_cvtepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi8(__mmask16 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V)
#define _mm256_cmpeq_epi16_mask(A, B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi8(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi16(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epu8(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi16_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_movepi16_mask(__m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi16(__m128i __O, __mmask8 __M, short __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mov_epi16(__mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi16(__m256i __W, __mmask16 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutexvar_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mov_epi8(__m256i __W, __mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastw_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_mul_epi16(__mmask8 __M, __m128i __W)
static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epu8(__m256i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mov_epi16(__m256i __W, __mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi8(__mmask16 __M, char __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi8(__mmask32 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testn_epi8_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi16(__mmask16 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
#define _mm_mask_cmpneq_epi16_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi16(__m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_test_epi16_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srav_epi16(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi8(__mmask32 __A)
#define _mm_cmpeq_epi16_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
#define _mm_mask_cmpeq_epi16_mask(k, A, B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epi16(__m256i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mov_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_max_epi8(__m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi8(void *__P, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi16(__mmask16 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutexvar_epi16(__m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_or_epi8(__m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mov_epi8(__m128i __W, __mmask16 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi8(void *__P, __mmask16 __U, __m128i __A)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_add_epi16(__m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi16(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_or_epi16(__m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_add_epi16(__mmask16 __M, __m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_abs_epi8(__mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_srli_epi16(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epi16(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mov_epi8(__mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastw_epi16(__m256i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_testn_epi16_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_cvtepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi16(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi8(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi16(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi16(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y)
#define _mm256_mask_cmpeq_epi8_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi16_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cvtepi16_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi16_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epu8(__m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastw_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_test_epi8_mask(__m128i __A, __m128i __B)
static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_min_epu8(__m256i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastb_epi8(__mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B)
#define _mm256_mask_cmpneq_epi16_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi16(__m256i __O, __mmask16 __M, short __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_epu8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mov_epi8(__mmask16 __U, __m128i __A)
static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_max_epu16(__m256i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi16(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi16(__mmask8 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_reduce_and_epi16(__mmask16 __M, __m256i __W)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_or_epi8(__m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi16_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi16(__mmask8 __U, __m128i __A, __m128i __B)
#define _mm_mask_cmpneq_epi8_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastw_epi16(__mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_mul_epi16(__m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_epu8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mov_epi16(__m128i __W, __mmask8 __U, __m128i __A)
#define _mm256_cmpneq_epi16_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi16(__mmask16 __U, void const *__P)
static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_cvtepi16_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi16(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_reduce_and_epi8(__m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi16(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_abs_epi16(__mmask8 __U, __m128i __A)
static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_reduce_min_epi8(__m128i __V)
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi16(short __w)
Constructs a 256-bit integer vector of [16 x i16], with each of the 16-bit integral vector elements s...
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_epi8(char __b)
Constructs a 256-bit integer vector of [32 x i8], with each of the 8-bit integral vector elements set...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_subs_epu16(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 16-bit unsigned integer values in the input and returns the...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the smaller value f...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_packs_epi32(__m128i __a, __m128i __b)
Converts, with saturation, 32-bit signed integers from both 128-bit integer vector operands into 16-b...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_avg_epu8(__m128i __a, __m128i __b)
Computes the rounded averages of corresponding elements of two 128-bit unsigned [16 x i8] vectors,...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_srai_epi16(__m128i __a, int __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the greater value f...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_epi8(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-7) values from two 128-bit vectors of [16 x i8] and interleaves them i...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_avg_epu16(__m128i __a, __m128i __b)
Computes the rounded averages of corresponding elements of two 128-bit unsigned [8 x i16] vectors,...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpacklo_epi16(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-3) values from each of the two 128-bit vectors of [8 x i16] and interl...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_epi16(__m128i __a, __m128i __b)
Subtracts the corresponding 16-bit integer values in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi16(short __w)
Initializes all values in a 128-bit vector of [8 x i16] with the specified 16-bit value.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_epi8(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [16 x i8], saving the lower 8 bits of each ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mullo_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the lower 16 bits of ea...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_madd_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two 128-bit signed [8 x i16] vectors, producing eight interm...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_packus_epi16(__m128i __a, __m128i __b)
Converts, with saturation, 16-bit signed integers from both 128-bit integer vector operands into 8-bi...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the smaller value fro...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_subs_epi8(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 8-bit signed integer values in the input and returns the di...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_adds_epu8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [16 x i8] vectors,...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_adds_epu16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [8 x i16] vectors,...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_srli_epi16(__m128i __a, int __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_epi8(__m128i __a, __m128i __b)
Unpacks the high-order (index 8-15) values from two 128-bit vectors of [16 x i8] and interleaves them...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_adds_epi8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [16 x i8] vectors,...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhi_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the upper 16 bits of ea...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_epi8(__m128i __a, __m128i __b)
Subtracts the corresponding 8-bit integer values in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_packs_epi16(__m128i __a, __m128i __b)
Converts, with saturation, 16-bit signed integers from both 128-bit integer vector operands into 8-bi...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_unpackhi_epi16(__m128i __a, __m128i __b)
Unpacks the high-order (index 4-7) values from two 128-bit vectors of [8 x i16] and interleaves them ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhi_epu16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two unsigned [8 x i16] vectors, saving the upper 16 bits of ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the greater value fro...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_subs_epi16(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 16-bit signed integer values in the input and returns the d...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_epi16(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [8 x i16], saving the lower 16 bits of each...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_adds_epi16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [8 x i16] vectors,...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_subs_epu8(__m128i __a, __m128i __b)
Subtracts, with saturation, corresponding 8-bit unsigned integer values in the input and returns the ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
__inline unsigned int unsigned int unsigned int * __P
__inline unsigned int unsigned int __Y
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_epu16(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [8 x u16] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_epi8(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [16 x i8] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepu8_epi16(__m128i __V)
Zero-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_packus_epi32(__m128i __V1, __m128i __V2)
Converts, with saturation, 32-bit signed integers from both 128-bit integer vector operands into 16-b...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_epu16(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [8 x u16] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtepi8_epi16(__m128i __V)
Sign-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_epi8(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [16 x i8] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...