clang  6.0.0svn
Macros | Typedefs | Functions
avxintrin.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define __DEFAULT_FN_ATTRS   __attribute__((__always_inline__, __nodebug__, __target__("avx")))
 
#define _mm256_round_pd(V, M)
 Rounds the values in a 256-bit vector of [4 x double] as specified by the byte operand. More...
 
#define _mm256_round_ps(V, M)
 Rounds the values stored in a 256-bit vector of [8 x float] as specified by the byte operand. More...
 
#define _mm256_ceil_pd(V)   _mm256_round_pd((V), _MM_FROUND_CEIL)
 Rounds up the values stored in a 256-bit vector of [4 x double]. More...
 
#define _mm256_floor_pd(V)   _mm256_round_pd((V), _MM_FROUND_FLOOR)
 Rounds down the values stored in a 256-bit vector of [4 x double]. More...
 
#define _mm256_ceil_ps(V)   _mm256_round_ps((V), _MM_FROUND_CEIL)
 Rounds up the values stored in a 256-bit vector of [8 x float]. More...
 
#define _mm256_floor_ps(V)   _mm256_round_ps((V), _MM_FROUND_FLOOR)
 Rounds down the values stored in a 256-bit vector of [8 x float]. More...
 
#define _mm_permute_pd(A, C)
 Copies the values in a 128-bit vector of [2 x double] as specified by the immediate integer operand. More...
 
#define _mm256_permute_pd(A, C)
 Copies the values in a 256-bit vector of [4 x double] as specified by the immediate integer operand. More...
 
#define _mm_permute_ps(A, C)
 Copies the values in a 128-bit vector of [4 x float] as specified by the immediate integer operand. More...
 
#define _mm256_permute_ps(A, C)
 Copies the values in a 256-bit vector of [8 x float] as specified by the immediate integer operand. More...
 
#define _mm256_permute2f128_pd(V1, V2, M)
 Permutes 128-bit data values stored in two 256-bit vectors of [4 x double], as specified by the immediate integer operand. More...
 
#define _mm256_permute2f128_ps(V1, V2, M)
 Permutes 128-bit data values stored in two 256-bit vectors of [8 x float], as specified by the immediate integer operand. More...
 
#define _mm256_permute2f128_si256(V1, V2, M)
 Permutes 128-bit data values stored in two 256-bit integer vectors, as specified by the immediate integer operand. More...
 
#define _mm256_blend_pd(V1, V2, M)
 Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the immediate integer operand. More...
 
#define _mm256_blend_ps(V1, V2, M)
 Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the immediate integer operand. More...
 
#define _mm256_dp_ps(V1, V2, M)
 Computes two dot products in parallel, using the lower and upper halves of two [8 x float] vectors as input to the two computations, and returning the two dot products in the lower and upper halves of the [8 x float] result. More...
 
#define _mm256_shuffle_ps(a, b, mask)
 Selects 8 float values from the 256-bit operands of [8 x float], as specified by the immediate value operand. More...
 
#define _mm256_shuffle_pd(a, b, mask)
 Selects four double-precision values from the 256-bit operands of [4 x double], as specified by the immediate value operand. More...
 
#define _CMP_EQ_OQ   0x00 /* Equal (ordered, non-signaling) */
 
#define _CMP_LT_OS   0x01 /* Less-than (ordered, signaling) */
 
#define _CMP_LE_OS   0x02 /* Less-than-or-equal (ordered, signaling) */
 
#define _CMP_UNORD_Q   0x03 /* Unordered (non-signaling) */
 
#define _CMP_NEQ_UQ   0x04 /* Not-equal (unordered, non-signaling) */
 
#define _CMP_NLT_US   0x05 /* Not-less-than (unordered, signaling) */
 
#define _CMP_NLE_US   0x06 /* Not-less-than-or-equal (unordered, signaling) */
 
#define _CMP_ORD_Q   0x07 /* Ordered (non-signaling) */
 
#define _CMP_EQ_UQ   0x08 /* Equal (unordered, non-signaling) */
 
#define _CMP_NGE_US   0x09 /* Not-greater-than-or-equal (unordered, signaling) */
 
#define _CMP_NGT_US   0x0a /* Not-greater-than (unordered, signaling) */
 
#define _CMP_FALSE_OQ   0x0b /* False (ordered, non-signaling) */
 
#define _CMP_NEQ_OQ   0x0c /* Not-equal (ordered, non-signaling) */
 
#define _CMP_GE_OS   0x0d /* Greater-than-or-equal (ordered, signaling) */
 
#define _CMP_GT_OS   0x0e /* Greater-than (ordered, signaling) */
 
#define _CMP_TRUE_UQ   0x0f /* True (unordered, non-signaling) */
 
#define _CMP_EQ_OS   0x10 /* Equal (ordered, signaling) */
 
#define _CMP_LT_OQ   0x11 /* Less-than (ordered, non-signaling) */
 
#define _CMP_LE_OQ   0x12 /* Less-than-or-equal (ordered, non-signaling) */
 
#define _CMP_UNORD_S   0x13 /* Unordered (signaling) */
 
#define _CMP_NEQ_US   0x14 /* Not-equal (unordered, signaling) */
 
#define _CMP_NLT_UQ   0x15 /* Not-less-than (unordered, non-signaling) */
 
#define _CMP_NLE_UQ   0x16 /* Not-less-than-or-equal (unordered, non-signaling) */
 
#define _CMP_ORD_S   0x17 /* Ordered (signaling) */
 
#define _CMP_EQ_US   0x18 /* Equal (unordered, signaling) */
 
#define _CMP_NGE_UQ   0x19 /* Not-greater-than-or-equal (unordered, non-signaling) */
 
#define _CMP_NGT_UQ   0x1a /* Not-greater-than (unordered, non-signaling) */
 
#define _CMP_FALSE_OS   0x1b /* False (ordered, signaling) */
 
#define _CMP_NEQ_OS   0x1c /* Not-equal (ordered, signaling) */
 
#define _CMP_GE_OQ   0x1d /* Greater-than-or-equal (ordered, non-signaling) */
 
#define _CMP_GT_OQ   0x1e /* Greater-than (ordered, non-signaling) */
 
#define _CMP_TRUE_US   0x1f /* True (unordered, signaling) */
 
#define _mm_cmp_pd(a, b, c)
 Compares each of the corresponding double-precision values of two 128-bit vectors of [2 x double], using the operation specified by the immediate integer operand. More...
 
#define _mm_cmp_ps(a, b, c)
 Compares each of the corresponding values of two 128-bit vectors of [4 x float], using the operation specified by the immediate integer operand. More...
 
#define _mm256_cmp_pd(a, b, c)
 Compares each of the corresponding double-precision values of two 256-bit vectors of [4 x double], using the operation specified by the immediate integer operand. More...
 
#define _mm256_cmp_ps(a, b, c)
 Compares each of the corresponding values of two 256-bit vectors of [8 x float], using the operation specified by the immediate integer operand. More...
 
#define _mm_cmp_sd(a, b, c)
 Compares each of the corresponding scalar double-precision values of two 128-bit vectors of [2 x double], using the operation specified by the immediate integer operand. More...
 
#define _mm_cmp_ss(a, b, c)
 Compares each of the corresponding scalar values of two 128-bit vectors of [4 x float], using the operation specified by the immediate integer operand. More...
 
#define _mm256_insertf128_ps(V1, V2, M)
 Constructs a new 256-bit vector of [8 x float] by first duplicating a 256-bit vector of [8 x float] given in the first parameter, and then replacing either the upper or the lower 128 bits with the contents of a 128-bit vector of [4 x float] in the second parameter. More...
 
#define _mm256_insertf128_pd(V1, V2, M)
 Constructs a new 256-bit vector of [4 x double] by first duplicating a 256-bit vector of [4 x double] given in the first parameter, and then replacing either the upper or the lower 128 bits with the contents of a 128-bit vector of [2 x double] in the second parameter. More...
 
#define _mm256_insertf128_si256(V1, V2, M)
 Constructs a new 256-bit integer vector by first duplicating a 256-bit integer vector given in the first parameter, and then replacing either the upper or the lower 128 bits with the contents of a 128-bit integer vector in the second parameter. More...
 
#define _mm256_extractf128_ps(V, M)
 Extracts either the upper or the lower 128 bits from a 256-bit vector of [8 x float], as determined by the immediate integer parameter, and returns the extracted bits as a 128-bit vector of [4 x float]. More...
 
#define _mm256_extractf128_pd(V, M)
 Extracts either the upper or the lower 128 bits from a 256-bit vector of [4 x double], as determined by the immediate integer parameter, and returns the extracted bits as a 128-bit vector of [2 x double]. More...
 
#define _mm256_extractf128_si256(V, M)
 Extracts either the upper or the lower 128 bits from a 256-bit integer vector, as determined by the immediate integer parameter, and returns the extracted bits as a 128-bit integer vector. More...
 

Typedefs

typedef double __v4df __attribute__((__vector_size__(32)))
 

Functions

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_add_pd (__m256d __a, __m256d __b)
 Adds two 256-bit vectors of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_add_ps (__m256 __a, __m256 __b)
 Adds two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sub_pd (__m256d __a, __m256d __b)
 Subtracts two 256-bit vectors of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sub_ps (__m256 __a, __m256 __b)
 Subtracts two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_addsub_pd (__m256d __a, __m256d __b)
 Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_addsub_ps (__m256 __a, __m256 __b)
 Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_div_pd (__m256d __a, __m256d __b)
 Divides two 256-bit vectors of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_div_ps (__m256 __a, __m256 __b)
 Divides two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_max_pd (__m256d __a, __m256d __b)
 Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_max_ps (__m256 __a, __m256 __b)
 Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_min_pd (__m256d __a, __m256d __b)
 Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_min_ps (__m256 __a, __m256 __b)
 Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_mul_pd (__m256d __a, __m256d __b)
 Multiplies two 256-bit vectors of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_mul_ps (__m256 __a, __m256 __b)
 Multiplies two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd (__m256d __a)
 Calculates the square roots of the values in a 256-bit vector of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps (__m256 __a)
 Calculates the square roots of the values in a 256-bit vector of [8 x float]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_rsqrt_ps (__m256 __a)
 Calculates the reciprocal square roots of the values in a 256-bit vector of [8 x float]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_rcp_ps (__m256 __a)
 Calculates the reciprocals of the values in a 256-bit vector of [8 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_and_pd (__m256d __a, __m256d __b)
 Performs a bitwise AND of two 256-bit vectors of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_and_ps (__m256 __a, __m256 __b)
 Performs a bitwise AND of two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_andnot_pd (__m256d __a, __m256d __b)
 Performs a bitwise AND of two 256-bit vectors of [4 x double], using the one's complement of the values contained in the first source operand. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_andnot_ps (__m256 __a, __m256 __b)
 Performs a bitwise AND of two 256-bit vectors of [8 x float], using the one's complement of the values contained in the first source operand. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_or_pd (__m256d __a, __m256d __b)
 Performs a bitwise OR of two 256-bit vectors of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_or_ps (__m256 __a, __m256 __b)
 Performs a bitwise OR of two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_xor_pd (__m256d __a, __m256d __b)
 Performs a bitwise XOR of two 256-bit vectors of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_xor_ps (__m256 __a, __m256 __b)
 Performs a bitwise XOR of two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_hadd_pd (__m256d __a, __m256d __b)
 Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_hadd_ps (__m256 __a, __m256 __b)
 Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_hsub_pd (__m256d __a, __m256d __b)
 Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_hsub_ps (__m256 __a, __m256 __b)
 Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [8 x float]. More...
 
static __inline __m128d __DEFAULT_FN_ATTRS _mm_permutevar_pd (__m128d __a, __m128i __c)
 Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector operand. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_permutevar_pd (__m256d __a, __m256i __c)
 Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector operand. More...
 
static __inline __m128 __DEFAULT_FN_ATTRS _mm_permutevar_ps (__m128 __a, __m128i __c)
 Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vector operand. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_permutevar_ps (__m256 __a, __m256i __c)
 Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vector operand. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_blendv_pd (__m256d __a, __m256d __b, __m256d __c)
 Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the 256-bit vector operand. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_blendv_ps (__m256 __a, __m256 __b, __m256 __c)
 Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the 256-bit vector operand. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi32 (__m256i __a, const int __imm)
 Takes a [8 x i32] vector and returns the vector element value indexed by the immediate constant operand. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi16 (__m256i __a, const int __imm)
 Takes a [16 x i16] vector and returns the vector element value indexed by the immediate constant operand. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi8 (__m256i __a, const int __imm)
 Takes a [32 x i8] vector and returns the vector element value indexed by the immediate constant operand. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_insert_epi32 (__m256i __a, int __b, int const __imm)
 Takes a [8 x i32] vector and replaces the vector element value indexed by the immediate constant operand by a new value. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_insert_epi16 (__m256i __a, int __b, int const __imm)
 Takes a [16 x i16] vector and replaces the vector element value indexed by the immediate constant operand with a new value. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_insert_epi8 (__m256i __a, int __b, int const __imm)
 Takes a [32 x i8] vector and replaces the vector element value indexed by the immediate constant operand with a new value. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi32_pd (__m128i __a)
 Converts a vector of [4 x i32] into a vector of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_cvtepi32_ps (__m256i __a)
 Converts a vector of [8 x i32] into a vector of [8 x float]. More...
 
static __inline __m128 __DEFAULT_FN_ATTRS _mm256_cvtpd_ps (__m256d __a)
 Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x float]. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epi32 (__m256 __a)
 Converts a vector of [8 x float] into a vector of [8 x i32]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtps_pd (__m128 __a)
 Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4 x double]. More...
 
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32 (__m256d __a)
 Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x i32], truncating the result by rounding towards zero when it is inexact. More...
 
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epi32 (__m256d __a)
 Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x i32]. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32 (__m256 __a)
 Converts a vector of [8 x float] into a vector of [8 x i32], truncating the result by rounding towards zero when it is inexact. More...
 
static __inline double __DEFAULT_FN_ATTRS _mm256_cvtsd_f64 (__m256d __a)
 Returns the first element of the input vector of [4 x double]. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm256_cvtsi256_si32 (__m256i __a)
 Returns the first element of the input vector of [8 x i32]. More...
 
static __inline float __DEFAULT_FN_ATTRS _mm256_cvtss_f32 (__m256 __a)
 Returns the first element of the input vector of [8 x float]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_movehdup_ps (__m256 __a)
 Moves and duplicates high-order (odd-indexed) values from a 256-bit vector of [8 x float] to float values in a 256-bit vector of [8 x float]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_moveldup_ps (__m256 __a)
 Moves and duplicates low-order (even-indexed) values from a 256-bit vector of [8 x float] to float values in a 256-bit vector of [8 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_movedup_pd (__m256d __a)
 Moves and duplicates double-precision floating point values from a 256-bit vector of [4 x double] to double-precision values in a 256-bit vector of [4 x double]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpackhi_pd (__m256d __a, __m256d __b)
 Unpacks the odd-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them into a 256-bit vector of [4 x double]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpacklo_pd (__m256d __a, __m256d __b)
 Unpacks the even-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them into a 256-bit vector of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpackhi_ps (__m256 __a, __m256 __b)
 Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the two 256-bit vectors of [8 x float] and interleaves them into a 256-bit vector of [8 x float]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpacklo_ps (__m256 __a, __m256 __b)
 Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the two 256-bit vectors of [8 x float] and interleaves them into a 256-bit vector of [8 x float]. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm_testz_pd (__m128d __a, __m128d __b)
 Given two 128-bit floating-point vectors of [2 x double], perform an element-by-element comparison of the double-precision element in the first source vector and the corresponding element in the second source vector. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm_testc_pd (__m128d __a, __m128d __b)
 Given two 128-bit floating-point vectors of [2 x double], perform an element-by-element comparison of the double-precision element in the first source vector and the corresponding element in the second source vector. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm_testnzc_pd (__m128d __a, __m128d __b)
 Given two 128-bit floating-point vectors of [2 x double], perform an element-by-element comparison of the double-precision element in the first source vector and the corresponding element in the second source vector. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm_testz_ps (__m128 __a, __m128 __b)
 Given two 128-bit floating-point vectors of [4 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm_testc_ps (__m128 __a, __m128 __b)
 Given two 128-bit floating-point vectors of [4 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm_testnzc_ps (__m128 __a, __m128 __b)
 Given two 128-bit floating-point vectors of [4 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm256_testz_pd (__m256d __a, __m256d __b)
 Given two 256-bit floating-point vectors of [4 x double], perform an element-by-element comparison of the double-precision elements in the first source vector and the corresponding elements in the second source vector. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm256_testc_pd (__m256d __a, __m256d __b)
 Given two 256-bit floating-point vectors of [4 x double], perform an element-by-element comparison of the double-precision elements in the first source vector and the corresponding elements in the second source vector. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_pd (__m256d __a, __m256d __b)
 Given two 256-bit floating-point vectors of [4 x double], perform an element-by-element comparison of the double-precision elements in the first source vector and the corresponding elements in the second source vector. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm256_testz_ps (__m256 __a, __m256 __b)
 Given two 256-bit floating-point vectors of [8 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm256_testc_ps (__m256 __a, __m256 __b)
 Given two 256-bit floating-point vectors of [8 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_ps (__m256 __a, __m256 __b)
 Given two 256-bit floating-point vectors of [8 x float], perform an element-by-element comparison of the single-precision elements in the first source vector and the corresponding elements in the second source vector. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm256_testz_si256 (__m256i __a, __m256i __b)
 Given two 256-bit integer vectors, perform a bit-by-bit comparison of the two source vectors. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm256_testc_si256 (__m256i __a, __m256i __b)
 Given two 256-bit integer vectors, perform a bit-by-bit comparison of the two source vectors. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_si256 (__m256i __a, __m256i __b)
 Given two 256-bit integer vectors, perform a bit-by-bit comparison of the two source vectors. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm256_movemask_pd (__m256d __a)
 Extracts the sign bits of double-precision floating point elements in a 256-bit vector of [4 x double] and writes them to the lower order bits of the return value. More...
 
static __inline int __DEFAULT_FN_ATTRS _mm256_movemask_ps (__m256 __a)
 Extracts the sign bits of double-precision floating point elements in a 256-bit vector of [8 x float] and writes them to the lower order bits of the return value. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_zeroall (void)
 Zeroes the contents of all XMM or YMM registers. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_zeroupper (void)
 Zeroes the upper 128 bits (bits 255:128) of all YMM registers. More...
 
static __inline __m128 __DEFAULT_FN_ATTRS _mm_broadcast_ss (float const *__a)
 Loads a scalar single-precision floating point value from the specified address pointed to by __a and broadcasts it to the elements of a [4 x float] vector. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_sd (double const *__a)
 Loads a scalar double-precision floating point value from the specified address pointed to by __a and broadcasts it to the elements of a [4 x double] vector. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ss (float const *__a)
 Loads a scalar single-precision floating point value from the specified address pointed to by __a and broadcasts it to the elements of a [8 x float] vector. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_pd (__m128d const *__a)
 Loads the data from a 128-bit vector of [2 x double] from the specified address pointed to by __a and broadcasts it to 128-bit elements in a 256-bit vector of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ps (__m128 const *__a)
 Loads the data from a 128-bit vector of [4 x float] from the specified address pointed to by __a and broadcasts it to 128-bit elements in a 256-bit vector of [8 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_load_pd (double const *__p)
 Loads 4 double-precision floating point values from a 32-byte aligned memory location pointed to by __p into a vector of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_load_ps (float const *__p)
 Loads 8 single-precision floating point values from a 32-byte aligned memory location pointed to by __p into a vector of [8 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_loadu_pd (double const *__p)
 Loads 4 double-precision floating point values from an unaligned memory location pointed to by __p into a vector of [4 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_loadu_ps (float const *__p)
 Loads 8 single-precision floating point values from an unaligned memory location pointed to by __p into a vector of [8 x float]. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_load_si256 (__m256i const *__p)
 Loads 256 bits of integer data from a 32-byte aligned memory location pointed to by __p into elements of a 256-bit integer vector. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_loadu_si256 (__m256i const *__p)
 Loads 256 bits of integer data from an unaligned memory location pointed to by __p into a 256-bit integer vector. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_lddqu_si256 (__m256i const *__p)
 Loads 256 bits of integer data from an unaligned memory location pointed to by __p into a 256-bit integer vector. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_store_pd (double *__p, __m256d __a)
 Stores double-precision floating point values from a 256-bit vector of [4 x double] to a 32-byte aligned memory location pointed to by __p. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_store_ps (float *__p, __m256 __a)
 Stores single-precision floating point values from a 256-bit vector of [8 x float] to a 32-byte aligned memory location pointed to by __p. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_pd (double *__p, __m256d __a)
 Stores double-precision floating point values from a 256-bit vector of [4 x double] to an unaligned memory location pointed to by __p. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_ps (float *__p, __m256 __a)
 Stores single-precision floating point values from a 256-bit vector of [8 x float] to an unaligned memory location pointed to by __p. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_store_si256 (__m256i *__p, __m256i __a)
 Stores integer values from a 256-bit integer vector to a 32-byte aligned memory location pointed to by __p. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_si256 (__m256i *__p, __m256i __a)
 Stores integer values from a 256-bit integer vector to an unaligned memory location pointed to by __p. More...
 
static __inline __m128d __DEFAULT_FN_ATTRS _mm_maskload_pd (double const *__p, __m128i __m)
 Conditionally loads double-precision floating point elements from a memory location pointed to by __p into a 128-bit vector of [2 x double], depending on the mask bits associated with each data element. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_maskload_pd (double const *__p, __m256i __m)
 Conditionally loads double-precision floating point elements from a memory location pointed to by __p into a 256-bit vector of [4 x double], depending on the mask bits associated with each data element. More...
 
static __inline __m128 __DEFAULT_FN_ATTRS _mm_maskload_ps (float const *__p, __m128i __m)
 Conditionally loads single-precision floating point elements from a memory location pointed to by __p into a 128-bit vector of [4 x float], depending on the mask bits associated with each data element. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_maskload_ps (float const *__p, __m256i __m)
 Conditionally loads single-precision floating point elements from a memory location pointed to by __p into a 256-bit vector of [8 x float], depending on the mask bits associated with each data element. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_maskstore_ps (float *__p, __m256i __m, __m256 __a)
 Moves single-precision floating point values from a 256-bit vector of [8 x float] to a memory location pointed to by __p, according to the specified mask. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm_maskstore_pd (double *__p, __m128i __m, __m128d __a)
 Moves double-precision values from a 128-bit vector of [2 x double] to a memory location pointed to by __p, according to the specified mask. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_maskstore_pd (double *__p, __m256i __m, __m256d __a)
 Moves double-precision values from a 256-bit vector of [4 x double] to a memory location pointed to by __p, according to the specified mask. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm_maskstore_ps (float *__p, __m128i __m, __m128 __a)
 Moves single-precision floating point values from a 128-bit vector of [4 x float] to a memory location pointed to by __p, according to the specified mask. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_stream_si256 (__m256i *__a, __m256i __b)
 Moves integer data from a 256-bit integer vector to a 32-byte aligned memory location. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_stream_pd (double *__a, __m256d __b)
 Moves double-precision values from a 256-bit vector of [4 x double] to a 32-byte aligned memory location. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_stream_ps (float *__p, __m256 __a)
 Moves single-precision floating point values from a 256-bit vector of [8 x float] to a 32-byte aligned memory location. More...
 
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd (void)
 Create a 256-bit vector of [4 x double] with undefined values. More...
 
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps (void)
 Create a 256-bit vector of [8 x float] with undefined values. More...
 
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256 (void)
 Create a 256-bit integer vector with undefined values. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_pd (double __a, double __b, double __c, double __d)
 Constructs a 256-bit floating-point vector of [4 x double] initialized with the specified double-precision floating-point values. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set_ps (float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h)
 Constructs a 256-bit floating-point vector of [8 x float] initialized with the specified single-precision floating-point values. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi32 (int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7)
 Constructs a 256-bit integer vector initialized with the specified 32-bit integral values. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi16 (short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, short __w03, short __w02, short __w01, short __w00)
 Constructs a 256-bit integer vector initialized with the specified 16-bit integral values. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi8 (char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, char __b19, char __b18, char __b17, char __b16, char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b09, char __b08, char __b07, char __b06, char __b05, char __b04, char __b03, char __b02, char __b01, char __b00)
 Constructs a 256-bit integer vector initialized with the specified 8-bit integral values. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi64x (long long __a, long long __b, long long __c, long long __d)
 Constructs a 256-bit integer vector initialized with the specified 64-bit integral values. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setr_pd (double __a, double __b, double __c, double __d)
 Constructs a 256-bit floating-point vector of [4 x double], initialized in reverse order with the specified double-precision floating-point values. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setr_ps (float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h)
 Constructs a 256-bit floating-point vector of [8 x float], initialized in reverse order with the specified single-precision float-point values. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi32 (int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7)
 Constructs a 256-bit integer vector, initialized in reverse order with the specified 32-bit integral values. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi16 (short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, short __w03, short __w02, short __w01, short __w00)
 Constructs a 256-bit integer vector, initialized in reverse order with the specified 16-bit integral values. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi8 (char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, char __b19, char __b18, char __b17, char __b16, char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b09, char __b08, char __b07, char __b06, char __b05, char __b04, char __b03, char __b02, char __b01, char __b00)
 Constructs a 256-bit integer vector, initialized in reverse order with the specified 8-bit integral values. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi64x (long long __a, long long __b, long long __c, long long __d)
 Constructs a 256-bit integer vector, initialized in reverse order with the specified 64-bit integral values. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set1_pd (double __w)
 Constructs a 256-bit floating-point vector of [4 x double], with each of the four double-precision floating-point vector elements set to the specified double-precision floating-point value. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set1_ps (float __w)
 Constructs a 256-bit floating-point vector of [8 x float], with each of the eight single-precision floating-point vector elements set to the specified single-precision floating-point value. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi32 (int __i)
 Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements set to the specified 32-bit integral value. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi16 (short __w)
 Constructs a 256-bit integer vector of [16 x i16], with each of the 16-bit integral vector elements set to the specified 16-bit integral value. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi8 (char __b)
 Constructs a 256-bit integer vector of [32 x i8], with each of the 8-bit integral vector elements set to the specified 8-bit integral value. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi64x (long long __q)
 Constructs a 256-bit integer vector of [4 x i64], with each of the 64-bit integral vector elements set to the specified 64-bit integral value. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd (void)
 Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to zero. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps (void)
 Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zero. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256 (void)
 Constructs a 256-bit integer vector initialized to zero. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castpd_ps (__m256d __a)
 Casts a 256-bit floating-point vector of [4 x double] into a 256-bit floating-point vector of [8 x float]. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castpd_si256 (__m256d __a)
 Casts a 256-bit floating-point vector of [4 x double] into a 256-bit integer vector. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castps_pd (__m256 __a)
 Casts a 256-bit floating-point vector of [8 x float] into a 256-bit floating-point vector of [4 x double]. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castps_si256 (__m256 __a)
 Casts a 256-bit floating-point vector of [8 x float] into a 256-bit integer vector. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castsi256_ps (__m256i __a)
 Casts a 256-bit integer vector into a 256-bit floating-point vector of [8 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castsi256_pd (__m256i __a)
 Casts a 256-bit integer vector into a 256-bit floating-point vector of [4 x double]. More...
 
static __inline __m128d __DEFAULT_FN_ATTRS _mm256_castpd256_pd128 (__m256d __a)
 Returns the lower 128 bits of a 256-bit floating-point vector of [4 x double] as a 128-bit floating-point vector of [2 x double]. More...
 
static __inline __m128 __DEFAULT_FN_ATTRS _mm256_castps256_ps128 (__m256 __a)
 Returns the lower 128 bits of a 256-bit floating-point vector of [8 x float] as a 128-bit floating-point vector of [4 x float]. More...
 
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_castsi256_si128 (__m256i __a)
 Truncates a 256-bit integer vector into a 128-bit integer vector. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castpd128_pd256 (__m128d __a)
 Constructs a 256-bit floating-point vector of [4 x double] from a 128-bit floating-point vector of [2 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castps128_ps256 (__m128 __a)
 Constructs a 256-bit floating-point vector of [8 x float] from a 128-bit floating-point vector of [4 x float]. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castsi128_si256 (__m128i __a)
 Constructs a 256-bit integer vector from a 128-bit integer vector. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_zextpd128_pd256 (__m128d __a)
 Constructs a 256-bit floating-point vector of [4 x double] from a 128-bit floating-point vector of [2 x double]. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_zextps128_ps256 (__m128 __a)
 Constructs a 256-bit floating-point vector of [8 x float] from a 128-bit floating-point vector of [4 x float]. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_zextsi128_si256 (__m128i __a)
 Constructs a 256-bit integer vector from a 128-bit integer vector. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_loadu2_m128 (float const *__addr_hi, float const *__addr_lo)
 Loads two 128-bit floating-point vectors of [4 x float] from unaligned memory locations and constructs a 256-bit floating-point vector of [8 x float] by concatenating the two 128-bit vectors. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_loadu2_m128d (double const *__addr_hi, double const *__addr_lo)
 Loads two 128-bit floating-point vectors of [2 x double] from unaligned memory locations and constructs a 256-bit floating-point vector of [4 x double] by concatenating the two 128-bit vectors. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_loadu2_m128i (__m128i const *__addr_hi, __m128i const *__addr_lo)
 Loads two 128-bit integer vectors from unaligned memory locations and constructs a 256-bit integer vector by concatenating the two 128-bit vectors. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128 (float *__addr_hi, float *__addr_lo, __m256 __a)
 Stores the upper and lower 128 bits of a 256-bit floating-point vector of [8 x float] into two different unaligned memory locations. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128d (double *__addr_hi, double *__addr_lo, __m256d __a)
 Stores the upper and lower 128 bits of a 256-bit floating-point vector of [4 x double] into two different unaligned memory locations. More...
 
static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128i (__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
 Stores the upper and lower 128 bits of a 256-bit integer vector into two different unaligned memory locations. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set_m128 (__m128 __hi, __m128 __lo)
 Constructs a 256-bit floating-point vector of [8 x float] by concatenating two 128-bit floating-point vectors of [4 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_m128d (__m128d __hi, __m128d __lo)
 Constructs a 256-bit floating-point vector of [4 x double] by concatenating two 128-bit floating-point vectors of [2 x double]. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_m128i (__m128i __hi, __m128i __lo)
 Constructs a 256-bit integer vector by concatenating two 128-bit integer vectors. More...
 
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setr_m128 (__m128 __lo, __m128 __hi)
 Constructs a 256-bit floating-point vector of [8 x float] by concatenating two 128-bit floating-point vectors of [4 x float]. More...
 
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setr_m128d (__m128d __lo, __m128d __hi)
 Constructs a 256-bit floating-point vector of [4 x double] by concatenating two 128-bit floating-point vectors of [2 x double]. More...
 
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_m128i (__m128i __lo, __m128i __hi)
 Constructs a 256-bit integer vector by concatenating two 128-bit integer vectors. More...
 

Macro Definition Documentation

◆ __DEFAULT_FN_ATTRS

#define __DEFAULT_FN_ATTRS   __attribute__((__always_inline__, __nodebug__, __target__("avx")))

Definition at line 53 of file avxintrin.h.

Referenced by _mm256_add_pd(), _mm256_add_ps(), _mm256_addsub_pd(), _mm256_addsub_ps(), _mm256_and_pd(), _mm256_and_ps(), _mm256_andnot_pd(), _mm256_andnot_ps(), _mm256_blendv_pd(), _mm256_broadcast_pd(), _mm256_broadcast_ps(), _mm256_broadcast_sd(), _mm256_broadcast_ss(), _mm256_castpd128_pd256(), _mm256_castpd256_pd128(), _mm256_castpd_ps(), _mm256_castpd_si256(), _mm256_castps128_ps256(), _mm256_castps256_ps128(), _mm256_castps_pd(), _mm256_castps_si256(), _mm256_castsi128_si256(), _mm256_castsi256_pd(), _mm256_castsi256_ps(), _mm256_castsi256_si128(), _mm256_cvtepi32_pd(), _mm256_cvtepi32_ps(), _mm256_cvtpd_epi32(), _mm256_cvtpd_ps(), _mm256_cvtps_epi32(), _mm256_cvtps_pd(), _mm256_cvtsd_f64(), _mm256_cvtsi256_si32(), _mm256_cvtss_f32(), _mm256_cvttpd_epi32(), _mm256_cvttps_epi32(), _mm256_div_pd(), _mm256_div_ps(), _mm256_extract_epi16(), _mm256_extract_epi32(), _mm256_extract_epi8(), _mm256_hadd_pd(), _mm256_hadd_ps(), _mm256_hsub_pd(), _mm256_hsub_ps(), _mm256_insert_epi16(), _mm256_insert_epi32(), _mm256_insert_epi8(), _mm256_lddqu_si256(), _mm256_load_pd(), _mm256_load_ps(), _mm256_load_si256(), _mm256_loadu2_m128(), _mm256_loadu2_m128d(), _mm256_loadu2_m128i(), _mm256_loadu_pd(), _mm256_loadu_ps(), _mm256_loadu_si256(), _mm256_maskload_pd(), _mm256_maskload_ps(), _mm256_maskstore_pd(), _mm256_maskstore_ps(), _mm256_max_pd(), _mm256_max_ps(), _mm256_min_pd(), _mm256_min_ps(), _mm256_movedup_pd(), _mm256_movehdup_ps(), _mm256_moveldup_ps(), _mm256_movemask_pd(), _mm256_movemask_ps(), _mm256_mul_pd(), _mm256_mul_ps(), _mm256_or_pd(), _mm256_or_ps(), _mm256_permutevar_pd(), _mm256_rsqrt_ps(), _mm256_set1_epi16(), _mm256_set1_epi32(), _mm256_set1_epi64x(), _mm256_set1_epi8(), _mm256_set1_pd(), _mm256_set1_ps(), _mm256_set_epi16(), _mm256_set_epi32(), _mm256_set_epi64x(), _mm256_set_epi8(), _mm256_set_m128(), _mm256_set_m128d(), _mm256_set_m128i(), _mm256_set_pd(), _mm256_set_ps(), _mm256_setr_epi16(), _mm256_setr_epi32(), _mm256_setr_epi64x(), _mm256_setr_epi8(), _mm256_setr_m128(), _mm256_setr_m128d(), _mm256_setr_pd(), _mm256_setr_ps(), _mm256_setzero_pd(), _mm256_setzero_ps(), _mm256_setzero_si256(), _mm256_sqrt_pd(), _mm256_sqrt_ps(), _mm256_store_pd(), _mm256_store_ps(), _mm256_store_si256(), _mm256_storeu2_m128(), _mm256_storeu2_m128d(), _mm256_storeu2_m128i(), _mm256_storeu_pd(), _mm256_storeu_ps(), _mm256_storeu_si256(), _mm256_stream_pd(), _mm256_stream_ps(), _mm256_stream_si256(), _mm256_sub_pd(), _mm256_sub_ps(), _mm256_testc_pd(), _mm256_testc_ps(), _mm256_testc_si256(), _mm256_testnzc_pd(), _mm256_testnzc_ps(), _mm256_testnzc_si256(), _mm256_testz_pd(), _mm256_testz_ps(), _mm256_testz_si256(), _mm256_undefined_pd(), _mm256_undefined_ps(), _mm256_undefined_si256(), _mm256_unpackhi_pd(), _mm256_unpackhi_ps(), _mm256_unpacklo_pd(), _mm256_unpacklo_ps(), _mm256_xor_pd(), _mm256_xor_ps(), _mm256_zeroall(), _mm256_zeroupper(), _mm256_zextpd128_pd256(), _mm256_zextps128_ps256(), _mm_broadcast_ss(), _mm_maskload_pd(), _mm_maskload_ps(), _mm_maskstore_pd(), _mm_maskstore_ps(), _mm_permutevar_pd(), _mm_permutevar_ps(), _mm_testc_pd(), _mm_testc_ps(), _mm_testnzc_pd(), _mm_testnzc_ps(), _mm_testz_pd(), and _mm_testz_ps().

◆ _CMP_EQ_OQ

#define _CMP_EQ_OQ   0x00 /* Equal (ordered, non-signaling) */

Definition at line 1612 of file avxintrin.h.

◆ _CMP_EQ_OS

#define _CMP_EQ_OS   0x10 /* Equal (ordered, signaling) */

Definition at line 1628 of file avxintrin.h.

◆ _CMP_EQ_UQ

#define _CMP_EQ_UQ   0x08 /* Equal (unordered, non-signaling) */

Definition at line 1620 of file avxintrin.h.

◆ _CMP_EQ_US

#define _CMP_EQ_US   0x18 /* Equal (unordered, signaling) */

Definition at line 1636 of file avxintrin.h.

◆ _CMP_FALSE_OQ

#define _CMP_FALSE_OQ   0x0b /* False (ordered, non-signaling) */

Definition at line 1623 of file avxintrin.h.

◆ _CMP_FALSE_OS

#define _CMP_FALSE_OS   0x1b /* False (ordered, signaling) */

Definition at line 1639 of file avxintrin.h.

◆ _CMP_GE_OQ

#define _CMP_GE_OQ   0x1d /* Greater-than-or-equal (ordered, non-signaling) */

Definition at line 1641 of file avxintrin.h.

◆ _CMP_GE_OS

#define _CMP_GE_OS   0x0d /* Greater-than-or-equal (ordered, signaling) */

Definition at line 1625 of file avxintrin.h.

◆ _CMP_GT_OQ

#define _CMP_GT_OQ   0x1e /* Greater-than (ordered, non-signaling) */

Definition at line 1642 of file avxintrin.h.

◆ _CMP_GT_OS

#define _CMP_GT_OS   0x0e /* Greater-than (ordered, signaling) */

Definition at line 1626 of file avxintrin.h.

◆ _CMP_LE_OQ

#define _CMP_LE_OQ   0x12 /* Less-than-or-equal (ordered, non-signaling) */

Definition at line 1630 of file avxintrin.h.

◆ _CMP_LE_OS

#define _CMP_LE_OS   0x02 /* Less-than-or-equal (ordered, signaling) */

Definition at line 1614 of file avxintrin.h.

◆ _CMP_LT_OQ

#define _CMP_LT_OQ   0x11 /* Less-than (ordered, non-signaling) */

Definition at line 1629 of file avxintrin.h.

◆ _CMP_LT_OS

#define _CMP_LT_OS   0x01 /* Less-than (ordered, signaling) */

Definition at line 1613 of file avxintrin.h.

◆ _CMP_NEQ_OQ

#define _CMP_NEQ_OQ   0x0c /* Not-equal (ordered, non-signaling) */

Definition at line 1624 of file avxintrin.h.

◆ _CMP_NEQ_OS

#define _CMP_NEQ_OS   0x1c /* Not-equal (ordered, signaling) */

Definition at line 1640 of file avxintrin.h.

◆ _CMP_NEQ_UQ

#define _CMP_NEQ_UQ   0x04 /* Not-equal (unordered, non-signaling) */

Definition at line 1616 of file avxintrin.h.

◆ _CMP_NEQ_US

#define _CMP_NEQ_US   0x14 /* Not-equal (unordered, signaling) */

Definition at line 1632 of file avxintrin.h.

◆ _CMP_NGE_UQ

#define _CMP_NGE_UQ   0x19 /* Not-greater-than-or-equal (unordered, non-signaling) */

Definition at line 1637 of file avxintrin.h.

◆ _CMP_NGE_US

#define _CMP_NGE_US   0x09 /* Not-greater-than-or-equal (unordered, signaling) */

Definition at line 1621 of file avxintrin.h.

◆ _CMP_NGT_UQ

#define _CMP_NGT_UQ   0x1a /* Not-greater-than (unordered, non-signaling) */

Definition at line 1638 of file avxintrin.h.

◆ _CMP_NGT_US

#define _CMP_NGT_US   0x0a /* Not-greater-than (unordered, signaling) */

Definition at line 1622 of file avxintrin.h.

◆ _CMP_NLE_UQ

#define _CMP_NLE_UQ   0x16 /* Not-less-than-or-equal (unordered, non-signaling) */

Definition at line 1634 of file avxintrin.h.

◆ _CMP_NLE_US

#define _CMP_NLE_US   0x06 /* Not-less-than-or-equal (unordered, signaling) */

Definition at line 1618 of file avxintrin.h.

◆ _CMP_NLT_UQ

#define _CMP_NLT_UQ   0x15 /* Not-less-than (unordered, non-signaling) */

Definition at line 1633 of file avxintrin.h.

◆ _CMP_NLT_US

#define _CMP_NLT_US   0x05 /* Not-less-than (unordered, signaling) */

Definition at line 1617 of file avxintrin.h.

◆ _CMP_ORD_Q

#define _CMP_ORD_Q   0x07 /* Ordered (non-signaling) */

Definition at line 1619 of file avxintrin.h.

◆ _CMP_ORD_S

#define _CMP_ORD_S   0x17 /* Ordered (signaling) */

Definition at line 1635 of file avxintrin.h.

◆ _CMP_TRUE_UQ

#define _CMP_TRUE_UQ   0x0f /* True (unordered, non-signaling) */

Definition at line 1627 of file avxintrin.h.

◆ _CMP_TRUE_US

#define _CMP_TRUE_US   0x1f /* True (unordered, signaling) */

Definition at line 1643 of file avxintrin.h.

◆ _CMP_UNORD_Q

#define _CMP_UNORD_Q   0x03 /* Unordered (non-signaling) */

Definition at line 1615 of file avxintrin.h.

◆ _CMP_UNORD_S

#define _CMP_UNORD_S   0x13 /* Unordered (signaling) */

Definition at line 1631 of file avxintrin.h.

◆ _mm256_blend_pd

#define _mm256_blend_pd (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256d)__builtin_shufflevector((__v4df)(__m256d)(V1), \
(__v4df)(__m256d)(V2), \
(((M) & 0x01) ? 4 : 0), \
(((M) & 0x02) ? 5 : 1), \
(((M) & 0x04) ? 6 : 2), \
(((M) & 0x08) ? 7 : 3)); })

Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the immediate integer operand.

__m256d _mm256_blend_pd(__m256d V1, __m256d V2, const int M);

This intrinsic corresponds to the VBLENDPD instruction.

Parameters
V1A 256-bit vector of [4 x double].
V2A 256-bit vector of [4 x double].
MAn immediate integer operand, with mask bits [3:0] specifying how the values are to be copied. The position of the mask bit corresponds to the index of a copied value. When a mask bit is 0, the corresponding 64-bit element in operand V1 is copied to the same position in the destination. When a mask bit is 1, the corresponding 64-bit element in operand V2 is copied to the same position in the destination.
Returns
A 256-bit vector of [4 x double] containing the copied values.

Definition at line 1357 of file avxintrin.h.

◆ _mm256_blend_ps

#define _mm256_blend_ps (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256)__builtin_shufflevector((__v8sf)(__m256)(V1), \
(__v8sf)(__m256)(V2), \
(((M) & 0x01) ? 8 : 0), \
(((M) & 0x02) ? 9 : 1), \
(((M) & 0x04) ? 10 : 2), \
(((M) & 0x08) ? 11 : 3), \
(((M) & 0x10) ? 12 : 4), \
(((M) & 0x20) ? 13 : 5), \
(((M) & 0x40) ? 14 : 6), \
(((M) & 0x80) ? 15 : 7)); })

Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the immediate integer operand.

__m256 _mm256_blend_ps(__m256 V1, __m256 V2, const int M);

This intrinsic corresponds to the VBLENDPS instruction.

Parameters
V1A 256-bit vector of [8 x float].
V2A 256-bit vector of [8 x float].
MAn immediate integer operand, with mask bits [7:0] specifying how the values are to be copied. The position of the mask bit corresponds to the index of a copied value. When a mask bit is 0, the corresponding 32-bit element in operand V1 is copied to the same position in the destination. When a mask bit is 1, the corresponding 32-bit element in operand V2 is copied to the same position in the destination.
Returns
A 256-bit vector of [8 x float] containing the copied values.

Definition at line 1389 of file avxintrin.h.

◆ _mm256_ceil_pd

#define _mm256_ceil_pd (   V)    _mm256_round_pd((V), _MM_FROUND_CEIL)

Rounds up the values stored in a 256-bit vector of [4 x double].

The source values are rounded up to integer values and returned as 64-bit double-precision floating-point values.

__m256d _mm256_ceil_pd(__m256d V);

This intrinsic corresponds to the VROUNDPD instruction.

Parameters
VA 256-bit vector of [4 x double].
Returns
A 256-bit vector of [4 x double] containing the rounded up values.

Definition at line 461 of file avxintrin.h.

◆ _mm256_ceil_ps

#define _mm256_ceil_ps (   V)    _mm256_round_ps((V), _MM_FROUND_CEIL)

Rounds up the values stored in a 256-bit vector of [8 x float].

The source values are rounded up to integer values and returned as floating-point values.

__m256 _mm256_ceil_ps(__m256 V);

This intrinsic corresponds to the VROUNDPS instruction.

Parameters
VA 256-bit vector of [8 x float].
Returns
A 256-bit vector of [8 x float] containing the rounded up values.

Definition at line 496 of file avxintrin.h.

◆ _mm256_cmp_pd

#define _mm256_cmp_pd (   a,
  b,
 
)
Value:
__extension__ ({ \
(__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
(__v4df)(__m256d)(b), (c)); })

Compares each of the corresponding double-precision values of two 256-bit vectors of [4 x double], using the operation specified by the immediate integer operand.

Returns a [4 x double] vector consisting of four doubles corresponding to the four comparison results: zero if the comparison is false, and all 1's if the comparison is true.

__m256d _mm256_cmp_pd(__m256d a, __m256d b, const int c);

This intrinsic corresponds to the VCMPPD instruction.

Parameters
aA 256-bit vector of [4 x double].
bA 256-bit vector of [4 x double].
cAn immediate integer operand, with bits [4:0] specifying which comparison operation to use:
0x00 : Equal (ordered, non-signaling) 0x01 : Less-than (ordered, signaling) 0x02 : Less-than-or-equal (ordered, signaling) 0x03 : Unordered (non-signaling) 0x04 : Not-equal (unordered, non-signaling) 0x05 : Not-less-than (unordered, signaling) 0x06 : Not-less-than-or-equal (unordered, signaling) 0x07 : Ordered (non-signaling) 0x08 : Equal (unordered, non-signaling) 0x09 : Not-greater-than-or-equal (unordered, signaling) 0x0a : Not-greater-than (unordered, signaling) 0x0b : False (ordered, non-signaling) 0x0c : Not-equal (ordered, non-signaling) 0x0d : Greater-than-or-equal (ordered, signaling) 0x0e : Greater-than (ordered, signaling) 0x0f : True (unordered, non-signaling) 0x10 : Equal (ordered, signaling) 0x11 : Less-than (ordered, non-signaling) 0x12 : Less-than-or-equal (ordered, non-signaling) 0x13 : Unordered (signaling) 0x14 : Not-equal (unordered, signaling) 0x15 : Not-less-than (unordered, non-signaling) 0x16 : Not-less-than-or-equal (unordered, non-signaling) 0x17 : Ordered (signaling) 0x18 : Equal (unordered, signaling) 0x19 : Not-greater-than-or-equal (unordered, non-signaling) 0x1a : Not-greater-than (unordered, non-signaling) 0x1b : False (ordered, signaling) 0x1c : Not-equal (ordered, signaling) 0x1d : Greater-than-or-equal (ordered, non-signaling) 0x1e : Greater-than (ordered, non-signaling) 0x1f : True (unordered, signaling)
Returns
A 256-bit vector of [4 x double] containing the comparison results.

Definition at line 1821 of file avxintrin.h.

◆ _mm256_cmp_ps

#define _mm256_cmp_ps (   a,
  b,
 
)
Value:
__extension__ ({ \
(__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
(__v8sf)(__m256)(b), (c)); })

Compares each of the corresponding values of two 256-bit vectors of [8 x float], using the operation specified by the immediate integer operand.

Returns a [8 x float] vector consisting of eight floats corresponding to the eight comparison results: zero if the comparison is false, and all 1's if the comparison is true.

__m256 _mm256_cmp_ps(__m256 a, __m256 b, const int c);

This intrinsic corresponds to the VCMPPS instruction.

Parameters
aA 256-bit vector of [8 x float].
bA 256-bit vector of [8 x float].
cAn immediate integer operand, with bits [4:0] specifying which comparison operation to use:
0x00 : Equal (ordered, non-signaling) 0x01 : Less-than (ordered, signaling) 0x02 : Less-than-or-equal (ordered, signaling) 0x03 : Unordered (non-signaling) 0x04 : Not-equal (unordered, non-signaling) 0x05 : Not-less-than (unordered, signaling) 0x06 : Not-less-than-or-equal (unordered, signaling) 0x07 : Ordered (non-signaling) 0x08 : Equal (unordered, non-signaling) 0x09 : Not-greater-than-or-equal (unordered, signaling) 0x0a : Not-greater-than (unordered, signaling) 0x0b : False (ordered, non-signaling) 0x0c : Not-equal (ordered, non-signaling) 0x0d : Greater-than-or-equal (ordered, signaling) 0x0e : Greater-than (ordered, signaling) 0x0f : True (unordered, non-signaling) 0x10 : Equal (ordered, signaling) 0x11 : Less-than (ordered, non-signaling) 0x12 : Less-than-or-equal (ordered, non-signaling) 0x13 : Unordered (signaling) 0x14 : Not-equal (unordered, signaling) 0x15 : Not-less-than (unordered, non-signaling) 0x16 : Not-less-than-or-equal (unordered, non-signaling) 0x17 : Ordered (signaling) 0x18 : Equal (unordered, signaling) 0x19 : Not-greater-than-or-equal (unordered, non-signaling) 0x1a : Not-greater-than (unordered, non-signaling) 0x1b : False (ordered, signaling) 0x1c : Not-equal (ordered, signaling) 0x1d : Greater-than-or-equal (ordered, non-signaling) 0x1e : Greater-than (ordered, non-signaling) 0x1f : True (unordered, signaling)
Returns
A 256-bit vector of [8 x float] containing the comparison results.

Definition at line 1881 of file avxintrin.h.

◆ _mm256_dp_ps

#define _mm256_dp_ps (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \
(__v8sf)(__m256)(V2), (M)); })

Computes two dot products in parallel, using the lower and upper halves of two [8 x float] vectors as input to the two computations, and returning the two dot products in the lower and upper halves of the [8 x float] result.

The immediate integer operand controls which input elements will contribute to the dot product, and where the final results are returned. In general, for each dot product, the four corresponding elements of the input vectors are multiplied; the first two and second two products are summed, then the two sums are added to form the final result.

__m256 _mm256_dp_ps(__m256 V1, __m256 V2, const int M);

This intrinsic corresponds to the VDPPS instruction.

Parameters
V1A vector of [8 x float] values, treated as two [4 x float] vectors.
V2A vector of [8 x float] values, treated as two [4 x float] vectors.
MAn immediate integer argument. Bits [7:4] determine which elements of the input vectors are used, with bit [4] corresponding to the lowest element and bit [7] corresponding to the highest element of each [4 x float] subvector. If a bit is set, the corresponding elements from the two input vectors are used as an input for dot product; otherwise that input is treated as zero. Bits [3:0] determine which elements of the result will receive a copy of the final dot product, with bit [0] corresponding to the lowest element and bit [3] corresponding to the highest element of each [4 x float] subvector. If a bit is set, the dot product is returned in the corresponding element; otherwise that element is set to zero. The bitmask is applied in the same way to each of the two parallel dot product computations.
Returns
A 256-bit vector of [8 x float] containing the two dot products.

Definition at line 1495 of file avxintrin.h.

◆ _mm256_extractf128_pd

#define _mm256_extractf128_pd (   V,
 
)
Value:
__extension__ ({ \
(__m128d)__builtin_shufflevector( \
(__v4df)(__m256d)(V), \
(__v4df)(_mm256_undefined_pd()), \
(((M) & 1) ? 2 : 0), \
(((M) & 1) ? 3 : 1) );})
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd(void)
Create a 256-bit vector of [4 x double] with undefined values.
Definition: avxintrin.h:3647

Extracts either the upper or the lower 128 bits from a 256-bit vector of [4 x double], as determined by the immediate integer parameter, and returns the extracted bits as a 128-bit vector of [2 x double].

__m128d _mm256_extractf128_pd(__m256d V, const int M);

This intrinsic corresponds to the VEXTRACTF128 instruction.

Parameters
VA 256-bit vector of [4 x double].
MAn immediate integer. The least significant bit determines which bits are extracted from the first parameter:
If bit [0] of M is 0, bits [127:0] of V are copied to the result.
If bit [0] of M is 1, bits [255:128] of V are copied to the result.
Returns
A 128-bit vector of [2 x double] containing the extracted bits.

Definition at line 4826 of file avxintrin.h.

Referenced by _mm256_storeu2_m128d().

◆ _mm256_extractf128_ps

#define _mm256_extractf128_ps (   V,
 
)
Value:
__extension__ ({ \
(__m128)__builtin_shufflevector( \
(__v8sf)(__m256)(V), \
(__v8sf)(_mm256_undefined_ps()), \
(((M) & 1) ? 4 : 0), \
(((M) & 1) ? 5 : 1), \
(((M) & 1) ? 6 : 2), \
(((M) & 1) ? 7 : 3) );})
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
Definition: avxintrin.h:3660

Extracts either the upper or the lower 128 bits from a 256-bit vector of [8 x float], as determined by the immediate integer parameter, and returns the extracted bits as a 128-bit vector of [4 x float].

__m128 _mm256_extractf128_ps(__m256 V, const int M);

This intrinsic corresponds to the VEXTRACTF128 instruction.

Parameters
VA 256-bit vector of [8 x float].
MAn immediate integer. The least significant bit determines which bits are extracted from the first parameter:
If bit [0] of M is 0, bits [127:0] of V are copied to the result.
If bit [0] of M is 1, bits [255:128] of V are copied to the result.
Returns
A 128-bit vector of [4 x float] containing the extracted bits.

Definition at line 4796 of file avxintrin.h.

Referenced by _mm256_storeu2_m128().

◆ _mm256_extractf128_si256

#define _mm256_extractf128_si256 (   V,
 
)
Value:
__extension__ ({ \
(__m128i)__builtin_shufflevector( \
(__v4di)(__m256i)(V), \
(__v4di)(_mm256_undefined_si256()), \
(((M) & 1) ? 2 : 0), \
(((M) & 1) ? 3 : 1) );})
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition: avxintrin.h:3673

Extracts either the upper or the lower 128 bits from a 256-bit integer vector, as determined by the immediate integer parameter, and returns the extracted bits as a 128-bit integer vector.

__m128i _mm256_extractf128_si256(__m256i V, const int M);

This intrinsic corresponds to the VEXTRACTF128 instruction.

Parameters
VA 256-bit integer vector.
MAn immediate integer. The least significant bit determines which bits are extracted from the first parameter:
If bit [0] of M is 0, bits [127:0] of V are copied to the result.
If bit [0] of M is 1, bits [255:128] of V are copied to the result.
Returns
A 128-bit integer vector containing the extracted bits.

Definition at line 4854 of file avxintrin.h.

Referenced by _mm256_storeu2_m128i().

◆ _mm256_floor_pd

#define _mm256_floor_pd (   V)    _mm256_round_pd((V), _MM_FROUND_FLOOR)

Rounds down the values stored in a 256-bit vector of [4 x double].

The source values are rounded down to integer values and returned as 64-bit double-precision floating-point values.

__m256d _mm256_floor_pd(__m256d V);

This intrinsic corresponds to the VROUNDPD instruction.

Parameters
VA 256-bit vector of [4 x double].
Returns
A 256-bit vector of [4 x double] containing the rounded down values.

Definition at line 479 of file avxintrin.h.

◆ _mm256_floor_ps

#define _mm256_floor_ps (   V)    _mm256_round_ps((V), _MM_FROUND_FLOOR)

Rounds down the values stored in a 256-bit vector of [8 x float].

The source values are rounded down to integer values and returned as floating-point values.

__m256 _mm256_floor_ps(__m256 V);

This intrinsic corresponds to the VROUNDPS instruction.

Parameters
VA 256-bit vector of [8 x float].
Returns
A 256-bit vector of [8 x float] containing the rounded down values.

Definition at line 513 of file avxintrin.h.

◆ _mm256_insertf128_pd

#define _mm256_insertf128_pd (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256d)__builtin_shufflevector( \
(__v4df)(__m256d)(V1), \
(__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \
(((M) & 1) ? 0 : 4), \
(((M) & 1) ? 1 : 5), \
(((M) & 1) ? 4 : 2), \
(((M) & 1) ? 5 : 3) );})
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castpd128_pd256(__m128d __a)
Constructs a 256-bit floating-point vector of [4 x double] from a 128-bit floating-point vector of [2...
Definition: avxintrin.h:4532

Constructs a new 256-bit vector of [4 x double] by first duplicating a 256-bit vector of [4 x double] given in the first parameter, and then replacing either the upper or the lower 128 bits with the contents of a 128-bit vector of [2 x double] in the second parameter.

The immediate integer parameter determines between the upper or the lower 128 bits.

__m256d _mm256_insertf128_pd(__m256d V1, __m128d V2, const int M);

This intrinsic corresponds to the VINSERTF128 instruction.

Parameters
V1A 256-bit vector of [4 x double]. This vector is copied to the result first, and then either the upper or the lower 128 bits of the result will be replaced by the contents of V2.
V2A 128-bit vector of [2 x double]. The contents of this parameter are written to either the upper or the lower 128 bits of the result depending on the value of parameter M.
MAn immediate integer. The least significant bit determines how the values from the two parameters are interleaved:
If bit [0] of M is 0, V2 are copied to bits [127:0] of the result, and bits [255:128] of V1 are copied to bits [255:128] of the result.
If bit [0] of M is 1, V2 are copied to bits [255:128] of the result, and bits [127:0] of V1 are copied to bits [127:0] of the result.
Returns
A 256-bit vector of [4 x double] containing the interleaved values.

Definition at line 4718 of file avxintrin.h.

Referenced by _mm256_loadu2_m128d().

◆ _mm256_insertf128_ps

#define _mm256_insertf128_ps (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256)__builtin_shufflevector( \
(__v8sf)(__m256)(V1), \
(__v8sf)_mm256_castps128_ps256((__m128)(V2)), \
(((M) & 1) ? 0 : 8), \
(((M) & 1) ? 1 : 9), \
(((M) & 1) ? 2 : 10), \
(((M) & 1) ? 3 : 11), \
(((M) & 1) ? 8 : 4), \
(((M) & 1) ? 9 : 5), \
(((M) & 1) ? 10 : 6), \
(((M) & 1) ? 11 : 7) );})
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castps128_ps256(__m128 __a)
Constructs a 256-bit floating-point vector of [8 x float] from a 128-bit floating-point vector of [4 ...
Definition: avxintrin.h:4553

Constructs a new 256-bit vector of [8 x float] by first duplicating a 256-bit vector of [8 x float] given in the first parameter, and then replacing either the upper or the lower 128 bits with the contents of a 128-bit vector of [4 x float] in the second parameter.

The immediate integer parameter determines between the upper or the lower 128 bits.

__m256 _mm256_insertf128_ps(__m256 V1, __m128 V2, const int M);

This intrinsic corresponds to the VINSERTF128 instruction.

Parameters
V1A 256-bit vector of [8 x float]. This vector is copied to the result first, and then either the upper or the lower 128 bits of the result will be replaced by the contents of V2.
V2A 128-bit vector of [4 x float]. The contents of this parameter are written to either the upper or the lower 128 bits of the result depending on the value of parameter M.
MAn immediate integer. The least significant bit determines how the values from the two parameters are interleaved:
If bit [0] of M is 0, V2 are copied to bits [127:0] of the result, and bits [255:128] of V1 are copied to bits [255:128] of the result.
If bit [0] of M is 1, V2 are copied to bits [255:128] of the result, and bits [127:0] of V1 are copied to bits [127:0] of the result.
Returns
A 256-bit vector of [8 x float] containing the interleaved values.

Definition at line 4671 of file avxintrin.h.

Referenced by _mm256_loadu2_m128().

◆ _mm256_insertf128_si256

#define _mm256_insertf128_si256 (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256i)__builtin_shufflevector( \
(__v4di)(__m256i)(V1), \
(__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
(((M) & 1) ? 0 : 4), \
(((M) & 1) ? 1 : 5), \
(((M) & 1) ? 4 : 2), \
(((M) & 1) ? 5 : 3) );})
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castsi128_si256(__m128i __a)
Constructs a 256-bit integer vector from a 128-bit integer vector.
Definition: avxintrin.h:4572

Constructs a new 256-bit integer vector by first duplicating a 256-bit integer vector given in the first parameter, and then replacing either the upper or the lower 128 bits with the contents of a 128-bit integer vector in the second parameter.

The immediate integer parameter determines between the upper or the lower 128 bits.

__m256i _mm256_insertf128_si256(__m256i V1, __m128i V2, const int M);

This intrinsic corresponds to the VINSERTF128 instruction.

Parameters
V1A 256-bit integer vector. This vector is copied to the result first, and then either the upper or the lower 128 bits of the result will be replaced by the contents of V2.
V2A 128-bit integer vector. The contents of this parameter are written to either the upper or the lower 128 bits of the result depending on the value of parameter M.
MAn immediate integer. The least significant bit determines how the values from the two parameters are interleaved:
If bit [0] of M is 0, V2 are copied to bits [127:0] of the result, and bits [255:128] of V1 are copied to bits [255:128] of the result.
If bit [0] of M is 1, V2 are copied to bits [255:128] of the result, and bits [127:0] of V1 are copied to bits [127:0] of the result.
Returns
A 256-bit integer vector containing the interleaved values.

Definition at line 4761 of file avxintrin.h.

Referenced by _mm256_loadu2_m128i().

◆ _mm256_permute2f128_pd

#define _mm256_permute2f128_pd (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \
(__v4df)(__m256d)(V2), (M)); })

Permutes 128-bit data values stored in two 256-bit vectors of [4 x double], as specified by the immediate integer operand.

__m256d _mm256_permute2f128_pd(__m256d V1, __m256d V2, const int M);

This intrinsic corresponds to the VPERM2F128 instruction.

Parameters
V1A 256-bit vector of [4 x double].
V2A 256-bit vector of [4 x double.
MAn immediate integer operand specifying how the values are to be permuted.
Bits [1:0]:
00: Bits [127:0] of operand V1 are copied to bits [127:0] of the destination.
01: Bits [255:128] of operand V1 are copied to bits [127:0] of the destination.
10: Bits [127:0] of operand V2 are copied to bits [127:0] of the destination.
11: Bits [255:128] of operand V2 are copied to bits [127:0] of the destination.
Bits [5:4]:
00: Bits [127:0] of operand V1 are copied to bits [255:128] of the destination.
01: Bits [255:128] of operand V1 are copied to bits [255:128] of the destination.
10: Bits [127:0] of operand V2 are copied to bits [255:128] of the destination.
11: Bits [255:128] of operand V2 are copied to bits [255:128] of the destination.
Returns
A 256-bit vector of [4 x double] containing the copied values.

Definition at line 1247 of file avxintrin.h.

◆ _mm256_permute2f128_ps

#define _mm256_permute2f128_ps (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
(__v8sf)(__m256)(V2), (M)); })

Permutes 128-bit data values stored in two 256-bit vectors of [8 x float], as specified by the immediate integer operand.

__m256 _mm256_permute2f128_ps(__m256 V1, __m256 V2, const int M);

This intrinsic corresponds to the VPERM2F128 instruction.

Parameters
V1A 256-bit vector of [8 x float].
V2A 256-bit vector of [8 x float].
MAn immediate integer operand specifying how the values are to be permuted.
Bits [1:0]:
00: Bits [127:0] of operand V1 are copied to bits [127:0] of the destination.
01: Bits [255:128] of operand V1 are copied to bits [127:0] of the destination.
10: Bits [127:0] of operand V2 are copied to bits [127:0] of the destination.
11: Bits [255:128] of operand V2 are copied to bits [127:0] of the destination.
Bits [5:4]:
00: Bits [127:0] of operand V1 are copied to bits [255:128] of the destination.
01: Bits [255:128] of operand V1 are copied to bits [255:128] of the destination.
10: Bits [127:0] of operand V2 are copied to bits [255:128] of the destination.
11: Bits [255:128] of operand V2 are copied to bits [255:128] of the destination.
Returns
A 256-bit vector of [8 x float] containing the copied values.

Definition at line 1288 of file avxintrin.h.

◆ _mm256_permute2f128_si256

#define _mm256_permute2f128_si256 (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
(__v8si)(__m256i)(V2), (M)); })

Permutes 128-bit data values stored in two 256-bit integer vectors, as specified by the immediate integer operand.

__m256i _mm256_permute2f128_si256(__m256i V1, __m256i V2, const int M);

This intrinsic corresponds to the VPERM2F128 instruction.

Parameters
V1A 256-bit integer vector.
V2A 256-bit integer vector.
MAn immediate integer operand specifying how the values are to be copied. Bits [1:0]:
00: Bits [127:0] of operand V1 are copied to bits [127:0] of the destination.
01: Bits [255:128] of operand V1 are copied to bits [127:0] of the destination.
10: Bits [127:0] of operand V2 are copied to bits [127:0] of the destination.
11: Bits [255:128] of operand V2 are copied to bits [127:0] of the destination.
Bits [5:4]:
00: Bits [127:0] of operand V1 are copied to bits [255:128] of the destination.
01: Bits [255:128] of operand V1 are copied to bits [255:128] of the destination.
10: Bits [127:0] of operand V2 are copied to bits [255:128] of the destination.
11: Bits [255:128] of operand V2 are copied to bits [255:128] of the destination.
Returns
A 256-bit integer vector containing the copied values.

Definition at line 1328 of file avxintrin.h.

◆ _mm256_permute_pd

#define _mm256_permute_pd (   A,
 
)
Value:
__extension__ ({ \
(__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
(__v4df)_mm256_undefined_pd(), \
0 + (((C) >> 0) & 0x1), \
0 + (((C) >> 1) & 0x1), \
2 + (((C) >> 2) & 0x1), \
2 + (((C) >> 3) & 0x1)); })
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd(void)
Create a 256-bit vector of [4 x double] with undefined values.
Definition: avxintrin.h:3647

Copies the values in a 256-bit vector of [4 x double] as specified by the immediate integer operand.

__m256d _mm256_permute_pd(__m256d A, const int C);

This intrinsic corresponds to the VPERMILPD instruction.

Parameters
AA 256-bit vector of [4 x double].
CAn immediate integer operand specifying how the values are to be copied.
Bit [0]:
0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector.
1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector.
Bit [1]:
0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector.
1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector.
Bit [2]:
0: Bits [191:128] of the source are copied to bits [191:128] of the returned vector.
1: Bits [255:192] of the source are copied to bits [191:128] of the returned vector.
Bit [3]:
0: Bits [191:128] of the source are copied to bits [255:192] of the returned vector.
1: Bits [255:192] of the source are copied to bits [255:192] of the returned vector.
Returns
A 256-bit vector of [4 x double] containing the copied values.

Definition at line 1042 of file avxintrin.h.

◆ _mm256_permute_ps

#define _mm256_permute_ps (   A,
 
)
Value:
__extension__ ({ \
(__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
(__v8sf)_mm256_undefined_ps(), \
0 + (((C) >> 0) & 0x3), \
0 + (((C) >> 2) & 0x3), \
0 + (((C) >> 4) & 0x3), \
0 + (((C) >> 6) & 0x3), \
4 + (((C) >> 0) & 0x3), \
4 + (((C) >> 2) & 0x3), \
4 + (((C) >> 4) & 0x3), \
4 + (((C) >> 6) & 0x3)); })
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
Definition: avxintrin.h:3660

Copies the values in a 256-bit vector of [8 x float] as specified by the immediate integer operand.

__m256 _mm256_permute_ps(__m256 A, const int C);

This intrinsic corresponds to the VPERMILPS instruction.

Parameters
AA 256-bit vector of [8 x float].
CAn immediate integer operand specifying how the values are to be
copied.
Bits [1:0]:
00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector.
01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector.
10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector.
11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector.
Bits [3:2]:
00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector.
01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector.
10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector.
11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector.
Bits [5:4]:
00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector.
01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector.
10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector.
11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector.
Bits [7:6]:
00: Bits [31:qq0] of the source are copied to bits [127:96] of the returned vector.
01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector.
10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector.
11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector.
Bits [1:0]:
00: Bits [159:128] of the source are copied to bits [159:128] of the returned vector.
01: Bits [191:160] of the source are copied to bits [159:128] of the returned vector.
10: Bits [223:192] of the source are copied to bits [159:128] of the returned vector.
11: Bits [255:224] of the source are copied to bits [159:128] of the returned vector.
Bits [3:2]:
00: Bits [159:128] of the source are copied to bits [191:160] of the returned vector.
01: Bits [191:160] of the source are copied to bits [191:160] of the returned vector.
10: Bits [223:192] of the source are copied to bits [191:160] of the returned vector.
11: Bits [255:224] of the source are copied to bits [191:160] of the returned vector.
Bits [5:4]:
00: Bits [159:128] of the source are copied to bits [223:192] of the returned vector.
01: Bits [191:160] of the source are copied to bits [223:192] of the returned vector.
10: Bits [223:192] of the source are copied to bits [223:192] of the returned vector.
11: Bits [255:224] of the source are copied to bits [223:192] of the returned vector.
Bits [7:6]:
00: Bits [159:128] of the source are copied to bits [255:224] of the returned vector.
01: Bits [191:160] of the source are copied to bits [255:224] of the returned vector.
10: Bits [223:192] of the source are copied to bits [255:224] of the returned vector.
11: Bits [255:224] of the source are copied to bits [255:224] of the returned vector.
Returns
A 256-bit vector of [8 x float] containing the copied values.

Definition at line 1198 of file avxintrin.h.

◆ _mm256_round_pd

#define _mm256_round_pd (   V,
 
)
Value:
__extension__ ({ \
(__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)); })

Rounds the values in a 256-bit vector of [4 x double] as specified by the byte operand.

The source values are rounded to integer values and returned as 64-bit double-precision floating-point values.

__m256d _mm256_round_pd(__m256d V, const int M);

This intrinsic corresponds to the VROUNDPD instruction.

Parameters
VA 256-bit vector of [4 x double].
MAn integer value that specifies the rounding operation.
Bits [7:4] are reserved.
Bit [3] is a precision exception value:
0: A normal PE exception is used.
1: The PE field is not updated.
Bit [2] is the rounding control source:
0: Use bits [1:0] of M.
1: Use the current MXCSR setting.
Bits [1:0] contain the rounding control definition:
00: Nearest.
01: Downward (toward negative infinity).
10: Upward (toward positive infinity).
11: Truncated.
Returns
A 256-bit vector of [4 x double] containing the rounded values.

Definition at line 411 of file avxintrin.h.

◆ _mm256_round_ps

#define _mm256_round_ps (   V,
 
)
Value:
__extension__ ({ \
(__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)); })

Rounds the values stored in a 256-bit vector of [8 x float] as specified by the byte operand.

The source values are rounded to integer values and returned as floating-point values.

__m256 _mm256_round_ps(__m256 V, const int M);

This intrinsic corresponds to the VROUNDPS instruction.

Parameters
VA 256-bit vector of [8 x float].
MAn integer value that specifies the rounding operation.
Bits [7:4] are reserved.
Bit [3] is a precision exception value:
0: A normal PE exception is used.
1: The PE field is not updated.
Bit [2] is the rounding control source:
0: Use bits [1:0] of M.
1: Use the current MXCSR setting.
Bits [1:0] contain the rounding control definition:
00: Nearest.
01: Downward (toward negative infinity).
10: Upward (toward positive infinity).
11: Truncated.
Returns
A 256-bit vector of [8 x float] containing the rounded values.

Definition at line 443 of file avxintrin.h.

◆ _mm256_shuffle_pd

#define _mm256_shuffle_pd (   a,
  b,
  mask 
)
Value:
__extension__ ({ \
(__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \
(__v4df)(__m256d)(b), \
0 + (((mask) >> 0) & 0x1), \
4 + (((mask) >> 1) & 0x1), \
2 + (((mask) >> 2) & 0x1), \
6 + (((mask) >> 3) & 0x1)); })

Selects four double-precision values from the 256-bit operands of [4 x double], as specified by the immediate value operand.

The selected elements from the first 256-bit operand are copied to bits [63:0] and bits [191:128] in the destination, and the selected elements from the second 256-bit operand are copied to bits [127:64] and bits [255:192] in the destination. For example, if bits [3:0] of the immediate operand contain a value of 0xF, the 256-bit destination vector would contain the following values: b[3], a[3], b[1], a[1].

__m256d _mm256_shuffle_pd(__m256d a, __m256d b, const int mask);

This intrinsic corresponds to the VSHUFPD instruction.

Parameters
aA 256-bit vector of [4 x double].
bA 256-bit vector of [4 x double].
maskAn immediate value containing 8-bit values specifying which elements to copy from a and b:
Bit [0]=0: Bits [63:0] are copied from a to bits [63:0] of the destination.
Bit [0]=1: Bits [127:64] are copied from a to bits [63:0] of the destination.
Bit [1]=0: Bits [63:0] are copied from b to bits [127:64] of the destination.
Bit [1]=1: Bits [127:64] are copied from b to bits [127:64] of the destination.
Bit [2]=0: Bits [191:128] are copied from a to bits [191:128] of the destination.
Bit [2]=1: Bits [255:192] are copied from a to bits [191:128] of the destination.
Bit [3]=0: Bits [191:128] are copied from b to bits [255:192] of the destination.
Bit [3]=1: Bits [255:192] are copied from b to bits [255:192] of the destination.
Returns
A 256-bit vector of [4 x double] containing the shuffled values.

Definition at line 1603 of file avxintrin.h.

◆ _mm256_shuffle_ps

#define _mm256_shuffle_ps (   a,
  b,
  mask 
)
Value:
__extension__ ({ \
(__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \
(__v8sf)(__m256)(b), \
0 + (((mask) >> 0) & 0x3), \
0 + (((mask) >> 2) & 0x3), \
8 + (((mask) >> 4) & 0x3), \
8 + (((mask) >> 6) & 0x3), \
4 + (((mask) >> 0) & 0x3), \
4 + (((mask) >> 2) & 0x3), \
12 + (((mask) >> 4) & 0x3), \
12 + (((mask) >> 6) & 0x3)); })

Selects 8 float values from the 256-bit operands of [8 x float], as specified by the immediate value operand.

The four selected elements in each operand are copied to the destination according to the bits specified in the immediate operand. The selected elements from the first 256-bit operand are copied to bits [63:0] and bits [191:128] of the destination, and the selected elements from the second 256-bit operand are copied to bits [127:64] and bits [255:192] of the destination. For example, if bits [7:0] of the immediate operand contain a value of 0xFF, the 256-bit destination vector would contain the following values: b[7], b[7], a[7], a[7], b[3], b[3], a[3], a[3].

__m256 _mm256_shuffle_ps(__m256 a, __m256 b, const int mask);

This intrinsic corresponds to the VSHUFPS instruction.

Parameters
aA 256-bit vector of [8 x float]. The four selected elements in this operand are copied to bits [63:0] and bits [191:128] in the destination, according to the bits specified in the immediate operand.
bA 256-bit vector of [8 x float]. The four selected elements in this operand are copied to bits [127:64] and bits [255:192] in the destination, according to the bits specified in the immediate operand.
maskAn immediate value containing an 8-bit value specifying which elements to copy from a and b
. Bits [3:0] specify the values copied from operand a.
Bits [7:4] specify the values copied from operand b.
The destinations within the 256-bit destination are assigned values as follows, according to the bit value assignments described below:
Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the destination.
Bits [3:2] are used to assign values to bits [63:32] and [191:160] in the destination.
Bits [5:4] are used to assign values to bits [95:64] and [223:192] in the destination.
Bits [7:6] are used to assign values to bits [127:96] and [255:224] in the destination.
Bit value assignments:
00: Bits [31:0] and [159:128] are copied from the selected operand.
01: Bits [63:32] and [191:160] are copied from the selected operand.
10: Bits [95:64] and [223:192] are copied from the selected operand.
11: Bits [127:96] and [255:224] are copied from the selected operand.
Returns
A 256-bit vector of [8 x float] containing the shuffled values.

Definition at line 1549 of file avxintrin.h.

◆ _mm_cmp_pd

#define _mm_cmp_pd (   a,
  b,
 
)
Value:
__extension__ ({ \
(__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
(__v2df)(__m128d)(b), (c)); })

Compares each of the corresponding double-precision values of two 128-bit vectors of [2 x double], using the operation specified by the immediate integer operand.

Returns a [2 x double] vector consisting of two doubles corresponding to the two comparison results: zero if the comparison is false, and all 1's if the comparison is true.

__m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);

This intrinsic corresponds to the VCMPPD instruction.

Parameters
aA 128-bit vector of [2 x double].
bA 128-bit vector of [2 x double].
cAn immediate integer operand, with bits [4:0] specifying which comparison operation to use:
0x00 : Equal (ordered, non-signaling) 0x01 : Less-than (ordered, signaling) 0x02 : Less-than-or-equal (ordered, signaling) 0x03 : Unordered (non-signaling) 0x04 : Not-equal (unordered, non-signaling) 0x05 : Not-less-than (unordered, signaling) 0x06 : Not-less-than-or-equal (unordered, signaling) 0x07 : Ordered (non-signaling) 0x08 : Equal (unordered, non-signaling) 0x09 : Not-greater-than-or-equal (unordered, signaling) 0x0a : Not-greater-than (unordered, signaling) 0x0b : False (ordered, non-signaling) 0x0c : Not-equal (ordered, non-signaling) 0x0d : Greater-than-or-equal (ordered, signaling) 0x0e : Greater-than (ordered, signaling) 0x0f : True (unordered, non-signaling) 0x10 : Equal (ordered, signaling) 0x11 : Less-than (ordered, non-signaling) 0x12 : Less-than-or-equal (ordered, non-signaling) 0x13 : Unordered (signaling) 0x14 : Not-equal (unordered, signaling) 0x15 : Not-less-than (unordered, non-signaling) 0x16 : Not-less-than-or-equal (unordered, non-signaling) 0x17 : Ordered (signaling) 0x18 : Equal (unordered, signaling) 0x19 : Not-greater-than-or-equal (unordered, non-signaling) 0x1a : Not-greater-than (unordered, non-signaling) 0x1b : False (ordered, signaling) 0x1c : Not-equal (ordered, signaling) 0x1d : Greater-than-or-equal (ordered, non-signaling) 0x1e : Greater-than (ordered, non-signaling) 0x1f : True (unordered, signaling)
Returns
A 128-bit vector of [2 x double] containing the comparison results.

Definition at line 1701 of file avxintrin.h.

◆ _mm_cmp_ps

#define _mm_cmp_ps (   a,
  b,
 
)
Value:
__extension__ ({ \
(__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
(__v4sf)(__m128)(b), (c)); })

Compares each of the corresponding values of two 128-bit vectors of [4 x float], using the operation specified by the immediate integer operand.

Returns a [4 x float] vector consisting of four floats corresponding to the four comparison results: zero if the comparison is false, and all 1's if the comparison is true.

__m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);

This intrinsic corresponds to the VCMPPS instruction.

Parameters
aA 128-bit vector of [4 x float].
bA 128-bit vector of [4 x float].
cAn immediate integer operand, with bits [4:0] specifying which comparison operation to use:
0x00 : Equal (ordered, non-signaling) 0x01 : Less-than (ordered, signaling) 0x02 : Less-than-or-equal (ordered, signaling) 0x03 : Unordered (non-signaling) 0x04 : Not-equal (unordered, non-signaling) 0x05 : Not-less-than (unordered, signaling) 0x06 : Not-less-than-or-equal (unordered, signaling) 0x07 : Ordered (non-signaling) 0x08 : Equal (unordered, non-signaling) 0x09 : Not-greater-than-or-equal (unordered, signaling) 0x0a : Not-greater-than (unordered, signaling) 0x0b : False (ordered, non-signaling) 0x0c : Not-equal (ordered, non-signaling) 0x0d : Greater-than-or-equal (ordered, signaling) 0x0e : Greater-than (ordered, signaling) 0x0f : True (unordered, non-signaling) 0x10 : Equal (ordered, signaling) 0x11 : Less-than (ordered, non-signaling) 0x12 : Less-than-or-equal (ordered, non-signaling) 0x13 : Unordered (signaling) 0x14 : Not-equal (unordered, signaling) 0x15 : Not-less-than (unordered, non-signaling) 0x16 : Not-less-than-or-equal (unordered, non-signaling) 0x17 : Ordered (signaling) 0x18 : Equal (unordered, signaling) 0x19 : Not-greater-than-or-equal (unordered, non-signaling) 0x1a : Not-greater-than (unordered, non-signaling) 0x1b : False (ordered, signaling) 0x1c : Not-equal (ordered, signaling) 0x1d : Greater-than-or-equal (ordered, non-signaling) 0x1e : Greater-than (ordered, non-signaling) 0x1f : True (unordered, signaling)
Returns
A 128-bit vector of [4 x float] containing the comparison results.

Definition at line 1761 of file avxintrin.h.

◆ _mm_cmp_sd

#define _mm_cmp_sd (   a,
  b,
 
)
Value:
__extension__ ({ \
(__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
(__v2df)(__m128d)(b), (c)); })

Compares each of the corresponding scalar double-precision values of two 128-bit vectors of [2 x double], using the operation specified by the immediate integer operand.

If the result is true, all 64 bits of the destination vector are set; otherwise they are cleared.

__m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);

This intrinsic corresponds to the VCMPSD instruction.

Parameters
aA 128-bit vector of [2 x double].
bA 128-bit vector of [2 x double].
cAn immediate integer operand, with bits [4:0] specifying which comparison operation to use:
0x00 : Equal (ordered, non-signaling) 0x01 : Less-than (ordered, signaling) 0x02 : Less-than-or-equal (ordered, signaling) 0x03 : Unordered (non-signaling) 0x04 : Not-equal (unordered, non-signaling) 0x05 : Not-less-than (unordered, signaling) 0x06 : Not-less-than-or-equal (unordered, signaling) 0x07 : Ordered (non-signaling) 0x08 : Equal (unordered, non-signaling) 0x09 : Not-greater-than-or-equal (unordered, signaling) 0x0a : Not-greater-than (unordered, signaling) 0x0b : False (ordered, non-signaling) 0x0c : Not-equal (ordered, non-signaling) 0x0d : Greater-than-or-equal (ordered, signaling) 0x0e : Greater-than (ordered, signaling) 0x0f : True (unordered, non-signaling) 0x10 : Equal (ordered, signaling) 0x11 : Less-than (ordered, non-signaling) 0x12 : Less-than-or-equal (ordered, non-signaling) 0x13 : Unordered (signaling) 0x14 : Not-equal (unordered, signaling) 0x15 : Not-less-than (unordered, non-signaling) 0x16 : Not-less-than-or-equal (unordered, non-signaling) 0x17 : Ordered (signaling) 0x18 : Equal (unordered, signaling) 0x19 : Not-greater-than-or-equal (unordered, non-signaling) 0x1a : Not-greater-than (unordered, non-signaling) 0x1b : False (ordered, signaling) 0x1c : Not-equal (ordered, signaling) 0x1d : Greater-than-or-equal (ordered, non-signaling) 0x1e : Greater-than (ordered, non-signaling) 0x1f : True (unordered, signaling)
Returns
A 128-bit vector of [2 x double] containing the comparison results.

Definition at line 1940 of file avxintrin.h.

◆ _mm_cmp_ss

#define _mm_cmp_ss (   a,
  b,
 
)
Value:
__extension__ ({ \
(__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
(__v4sf)(__m128)(b), (c)); })

Compares each of the corresponding scalar values of two 128-bit vectors of [4 x float], using the operation specified by the immediate integer operand.

If the result is true, all 32 bits of the destination vector are set; otherwise they are cleared.

__m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);

This intrinsic corresponds to the VCMPSS instruction.

Parameters
aA 128-bit vector of [4 x float].
bA 128-bit vector of [4 x float].
cAn immediate integer operand, with bits [4:0] specifying which comparison operation to use:
0x00 : Equal (ordered, non-signaling) 0x01 : Less-than (ordered, signaling) 0x02 : Less-than-or-equal (ordered, signaling) 0x03 : Unordered (non-signaling) 0x04 : Not-equal (unordered, non-signaling) 0x05 : Not-less-than (unordered, signaling) 0x06 : Not-less-than-or-equal (unordered, signaling) 0x07 : Ordered (non-signaling) 0x08 : Equal (unordered, non-signaling) 0x09 : Not-greater-than-or-equal (unordered, signaling) 0x0a : Not-greater-than (unordered, signaling) 0x0b : False (ordered, non-signaling) 0x0c : Not-equal (ordered, non-signaling) 0x0d : Greater-than-or-equal (ordered, signaling) 0x0e : Greater-than (ordered, signaling) 0x0f : True (unordered, non-signaling) 0x10 : Equal (ordered, signaling) 0x11 : Less-than (ordered, non-signaling) 0x12 : Less-than-or-equal (ordered, non-signaling) 0x13 : Unordered (signaling) 0x14 : Not-equal (unordered, signaling) 0x15 : Not-less-than (unordered, non-signaling) 0x16 : Not-less-than-or-equal (unordered, non-signaling) 0x17 : Ordered (signaling) 0x18 : Equal (unordered, signaling) 0x19 : Not-greater-than-or-equal (unordered, non-signaling) 0x1a : Not-greater-than (unordered, non-signaling) 0x1b : False (ordered, signaling) 0x1c : Not-equal (ordered, signaling) 0x1d : Greater-than-or-equal (ordered, non-signaling) 0x1e : Greater-than (ordered, non-signaling) 0x1f : True (unordered, signaling)
Returns
A 128-bit vector of [4 x float] containing the comparison results.

Definition at line 1999 of file avxintrin.h.

◆ _mm_permute_pd

#define _mm_permute_pd (   A,
 
)
Value:
__extension__ ({ \
(__m128d)__builtin_shufflevector((__v2df)(__m128d)(A), \
(__v2df)_mm_undefined_pd(), \
((C) >> 0) & 0x1, ((C) >> 1) & 0x1); })
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] with unspecified content.
Definition: emmintrin.h:1756

Copies the values in a 128-bit vector of [2 x double] as specified by the immediate integer operand.

__m128d _mm_permute_pd(__m128d A, const int C);

This intrinsic corresponds to the VPERMILPD instruction.

Parameters
AA 128-bit vector of [2 x double].
CAn immediate integer operand specifying how the values are to be copied.
Bit [0]:
0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector.
1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector.
Bit [1]:
0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector.
1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector.
Returns
A 128-bit vector of [2 x double] containing the copied values.

Definition at line 1000 of file avxintrin.h.

◆ _mm_permute_ps

#define _mm_permute_ps (   A,
 
)
Value:
__extension__ ({ \
(__m128)__builtin_shufflevector((__v4sf)(__m128)(A), \
(__v4sf)_mm_undefined_ps(), \
((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void)
Create a 128-bit vector of [4 x float] with undefined values.
Definition: xmmintrin.h:1754

Copies the values in a 128-bit vector of [4 x float] as specified by the immediate integer operand.

__m128 _mm_permute_ps(__m128 A, const int C);

This intrinsic corresponds to the VPERMILPS instruction.

Parameters
AA 128-bit vector of [4 x float].
CAn immediate integer operand specifying how the values are to be copied.
Bits [1:0]:
00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector.
01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector.
10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector.
11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector.
Bits [3:2]:
00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector.
01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector.
10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector.
11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector.
Bits [5:4]:
00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector.
01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector.
10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector.
11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector.
Bits [7:6]:
00: Bits [31:0] of the source are copied to bits [127:96] of the returned vector.
01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector.
10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector.
11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector.
Returns
A 128-bit vector of [4 x float] containing the copied values.

Definition at line 1103 of file avxintrin.h.

Typedef Documentation

◆ __attribute__

typedef long long __m256i __attribute__((__vector_size__(32)))

Definition at line 31 of file avxintrin.h.

Function Documentation

◆ _mm256_add_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_add_pd ( __m256d  __a,
__m256d  __b 
)
static

Adds two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VADDPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the source operands.
__bA 256-bit vector of [4 x double] containing one of the source operands.
Returns
A 256-bit vector of [4 x double] containing the sums of both operands.

Definition at line 69 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_add_pd(), and _mm256_maskz_add_pd().

◆ _mm256_add_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_add_ps ( __m256  __a,
__m256  __b 
)
static

Adds two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VADDPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the source operands.
__bA 256-bit vector of [8 x float] containing one of the source operands.
Returns
A 256-bit vector of [8 x float] containing the sums of both operands.

Definition at line 87 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_add_ps(), and _mm256_maskz_add_ps().

◆ _mm256_addsub_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_addsub_pd ( __m256d  __a,
__m256d  __b 
)
static

Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VADDSUBPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing the left source operand.
__bA 256-bit vector of [4 x double] containing the right source operand.
Returns
A 256-bit vector of [4 x double] containing the alternating sums and differences between both operands.

Definition at line 142 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_addsub_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_addsub_ps ( __m256  __a,
__m256  __b 
)
static

Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VADDSUBPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing the left source operand.
__bA 256-bit vector of [8 x float] containing the right source operand.
Returns
A 256-bit vector of [8 x float] containing the alternating sums and differences between both operands.

Definition at line 161 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_and_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_and_pd ( __m256d  __a,
__m256d  __b 
)
static

Performs a bitwise AND of two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VANDPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the source operands.
__bA 256-bit vector of [4 x double] containing one of the source operands.
Returns
A 256-bit vector of [4 x double] containing the bitwise AND of the values between both operands.

Definition at line 529 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_and_pd(), and _mm256_maskz_and_pd().

◆ _mm256_and_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_and_ps ( __m256  __a,
__m256  __b 
)
static

Performs a bitwise AND of two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VANDPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the source operands.
__bA 256-bit vector of [8 x float] containing one of the source operands.
Returns
A 256-bit vector of [8 x float] containing the bitwise AND of the values between both operands.

Definition at line 547 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_and_ps(), and _mm256_maskz_and_ps().

◆ _mm256_andnot_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_andnot_pd ( __m256d  __a,
__m256d  __b 
)
static

Performs a bitwise AND of two 256-bit vectors of [4 x double], using the one's complement of the values contained in the first source operand.

This intrinsic corresponds to the VANDNPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing the left source operand. The one's complement of this value is used in the bitwise AND.
__bA 256-bit vector of [4 x double] containing the right source operand.
Returns
A 256-bit vector of [4 x double] containing the bitwise AND of the values of the second operand and the one's complement of the first operand.

Definition at line 568 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_andnot_pd(), and _mm256_maskz_andnot_pd().

◆ _mm256_andnot_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_andnot_ps ( __m256  __a,
__m256  __b 
)
static

Performs a bitwise AND of two 256-bit vectors of [8 x float], using the one's complement of the values contained in the first source operand.

This intrinsic corresponds to the VANDNPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing the left source operand. The one's complement of this value is used in the bitwise AND.
__bA 256-bit vector of [8 x float] containing the right source operand.
Returns
A 256-bit vector of [8 x float] containing the bitwise AND of the values of the second operand and the one's complement of the first operand.

Definition at line 589 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_andnot_ps(), and _mm256_maskz_andnot_ps().

◆ _mm256_blendv_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_blendv_pd ( __m256d  __a,
__m256d  __b,
__m256d  __c 
)
static

Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the 256-bit vector operand.

This intrinsic corresponds to the VBLENDVPD instruction.

Parameters
__aA 256-bit vector of [4 x double].
__bA 256-bit vector of [4 x double].
__cA 256-bit vector operand, with mask bits 255, 191, 127, and 63 specifying how the values are to be copied. The position of the mask bit corresponds to the most significant bit of a copied value. When a mask bit is 0, the corresponding 64-bit element in operand __a is copied to the same position in the destination. When a mask bit is 1, the corresponding 64-bit element in operand __b is copied to the same position in the destination.
Returns
A 256-bit vector of [4 x double] containing the copied values.

Definition at line 1423 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_blendv_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_blendv_ps ( __m256  __a,
__m256  __b,
__m256  __c 
)
static

Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the 256-bit vector operand.

This intrinsic corresponds to the VBLENDVPS instruction.

Parameters
__aA 256-bit vector of [8 x float].
__bA 256-bit vector of [8 x float].
__cA 256-bit vector operand, with mask bits 255, 223, 191, 159, 127, 95, 63, and 31 specifying how the values are to be copied. The position of the mask bit corresponds to the most significant bit of a copied value. When a mask bit is 0, the corresponding 32-bit element in operand __a is copied to the same position in the destination. When a mask bit is 1, the corresponding 32-bit element in operand __b is copied to the same position in the destination.
Returns
A 256-bit vector of [8 x float] containing the copied values.

Definition at line 1451 of file avxintrin.h.

◆ _mm256_broadcast_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_pd ( __m128d const *  __a)
static

Loads the data from a 128-bit vector of [2 x double] from the specified address pointed to by __a and broadcasts it to 128-bit elements in a 256-bit vector of [4 x double].

This intrinsic corresponds to the VBROADCASTF128 instruction.

Parameters
__aThe 128-bit vector of [2 x double] to be broadcast.
Returns
A 256-bit vector of [4 x double] whose 128-bit elements are set equal to the broadcast value.

Definition at line 3113 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_broadcast_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ps ( __m128 const *  __a)
static

Loads the data from a 128-bit vector of [4 x float] from the specified address pointed to by __a and broadcasts it to 128-bit elements in a 256-bit vector of [8 x float].

This intrinsic corresponds to the VBROADCASTF128 instruction.

Parameters
__aThe 128-bit vector of [4 x float] to be broadcast.
Returns
A 256-bit vector of [8 x float] whose 128-bit elements are set equal to the broadcast value.

Definition at line 3131 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_broadcast_sd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_sd ( double const *  __a)
static

Loads a scalar double-precision floating point value from the specified address pointed to by __a and broadcasts it to the elements of a [4 x double] vector.

This intrinsic corresponds to the VBROADCASTSD instruction.

Parameters
__aThe double-precision floating point value to be broadcast.
Returns
A 256-bit vector of [4 x double] whose 64-bit elements are set equal to the broadcast value.

Definition at line 3075 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_broadcast_ss()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ss ( float const *  __a)
static

Loads a scalar single-precision floating point value from the specified address pointed to by __a and broadcasts it to the elements of a [8 x float] vector.

This intrinsic corresponds to the VBROADCASTSS instruction.

Parameters
__aThe single-precision floating point value to be broadcast.
Returns
A 256-bit vector of [8 x float] whose 32-bit elements are set equal to the broadcast value.

Definition at line 3094 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_castpd128_pd256()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castpd128_pd256 ( __m128d  __a)
static

Constructs a 256-bit floating-point vector of [4 x double] from a 128-bit floating-point vector of [2 x double].

The lower 128 bits contain the value of the source vector. The contents of the upper 128 bits are undefined.

This intrinsic has no corresponding instruction.

Parameters
__aA 128-bit vector of [2 x double].
Returns
A 256-bit floating-point vector of [4 x double]. The lower 128 bits contain the value of the parameter. The contents of the upper 128 bits are undefined.

Definition at line 4532 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_loadu2_m128d().

◆ _mm256_castpd256_pd128()

static __inline __m128d __DEFAULT_FN_ATTRS _mm256_castpd256_pd128 ( __m256d  __a)
static

Returns the lower 128 bits of a 256-bit floating-point vector of [4 x double] as a 128-bit floating-point vector of [2 x double].

This intrinsic has no corresponding instruction.

Parameters
__aA 256-bit floating-point vector of [4 x double].
Returns
A 128-bit floating-point vector of [2 x double] containing the lower 128 bits of the parameter.

Definition at line 4478 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_storeu2_m128d().

◆ _mm256_castpd_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castpd_ps ( __m256d  __a)
static

Casts a 256-bit floating-point vector of [4 x double] into a 256-bit floating-point vector of [8 x float].

This intrinsic has no corresponding instruction.

Parameters
__aA 256-bit floating-point vector of [4 x double].
Returns
A 256-bit floating-point vector of [8 x float] containing the same bitwise pattern as the parameter.

Definition at line 4376 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_castpd_si256()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castpd_si256 ( __m256d  __a)
static

Casts a 256-bit floating-point vector of [4 x double] into a 256-bit integer vector.

This intrinsic has no corresponding instruction.

Parameters
__aA 256-bit floating-point vector of [4 x double].
Returns
A 256-bit integer vector containing the same bitwise pattern as the parameter.

Definition at line 4393 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_castps128_ps256()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castps128_ps256 ( __m128  __a)
static

Constructs a 256-bit floating-point vector of [8 x float] from a 128-bit floating-point vector of [4 x float].

The lower 128 bits contain the value of the source vector. The contents of the upper 128 bits are undefined.

This intrinsic has no corresponding instruction.

Parameters
__aA 128-bit vector of [4 x float].
Returns
A 256-bit floating-point vector of [8 x float]. The lower 128 bits contain the value of the parameter. The contents of the upper 128 bits are undefined.

Definition at line 4553 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_loadu2_m128().

◆ _mm256_castps256_ps128()

static __inline __m128 __DEFAULT_FN_ATTRS _mm256_castps256_ps128 ( __m256  __a)
static

Returns the lower 128 bits of a 256-bit floating-point vector of [8 x float] as a 128-bit floating-point vector of [4 x float].

This intrinsic has no corresponding instruction.

Parameters
__aA 256-bit floating-point vector of [8 x float].
Returns
A 128-bit floating-point vector of [4 x float] containing the lower 128 bits of the parameter.

Definition at line 4495 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_storeu2_m128().

◆ _mm256_castps_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castps_pd ( __m256  __a)
static

Casts a 256-bit floating-point vector of [8 x float] into a 256-bit floating-point vector of [4 x double].

This intrinsic has no corresponding instruction.

Parameters
__aA 256-bit floating-point vector of [8 x float].
Returns
A 256-bit floating-point vector of [4 x double] containing the same bitwise pattern as the parameter.

Definition at line 4410 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_castps_si256()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castps_si256 ( __m256  __a)
static

Casts a 256-bit floating-point vector of [8 x float] into a 256-bit integer vector.

This intrinsic has no corresponding instruction.

Parameters
__aA 256-bit floating-point vector of [8 x float].
Returns
A 256-bit integer vector containing the same bitwise pattern as the parameter.

Definition at line 4427 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_castsi128_si256()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castsi128_si256 ( __m128i  __a)
static

Constructs a 256-bit integer vector from a 128-bit integer vector.

The lower 128 bits contain the value of the source vector. The contents of the upper 128 bits are undefined.

This intrinsic has no corresponding instruction.

Parameters
__aA 128-bit integer vector.
Returns
A 256-bit integer vector. The lower 128 bits contain the value of the parameter. The contents of the upper 128 bits are undefined.

Definition at line 4572 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_loadu2_m128i().

◆ _mm256_castsi256_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castsi256_pd ( __m256i  __a)
static

Casts a 256-bit integer vector into a 256-bit floating-point vector of [4 x double].

This intrinsic has no corresponding instruction.

Parameters
__aA 256-bit integer vector.
Returns
A 256-bit floating-point vector of [4 x double] containing the same bitwise pattern as the parameter.

Definition at line 4461 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_castsi256_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castsi256_ps ( __m256i  __a)
static

Casts a 256-bit integer vector into a 256-bit floating-point vector of [8 x float].

This intrinsic has no corresponding instruction.

Parameters
__aA 256-bit integer vector.
Returns
A 256-bit floating-point vector of [8 x float] containing the same bitwise pattern as the parameter.

Definition at line 4444 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_castsi256_si128()

static __inline __m128i __DEFAULT_FN_ATTRS _mm256_castsi256_si128 ( __m256i  __a)
static

Truncates a 256-bit integer vector into a 128-bit integer vector.

This intrinsic has no corresponding instruction.

Parameters
__aA 256-bit integer vector.
Returns
A 128-bit integer vector containing the lower 128 bits of the parameter.

Definition at line 4511 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_storeu2_m128i().

◆ _mm256_cvtepi32_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi32_pd ( __m128i  __a)
static

Converts a vector of [4 x i32] into a vector of [4 x double].

This intrinsic corresponds to the VCVTDQ2PD instruction.

Parameters
__aA 128-bit integer vector of [4 x i32].
Returns
A 256-bit vector of [4 x double] containing the converted values.

Definition at line 2211 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_cvtepi32_pd(), and _mm256_maskz_cvtepi32_pd().

◆ _mm256_cvtepi32_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_cvtepi32_ps ( __m256i  __a)
static

Converts a vector of [8 x i32] into a vector of [8 x float].

This intrinsic corresponds to the VCVTDQ2PS instruction.

Parameters
__aA 256-bit integer vector.
Returns
A 256-bit vector of [8 x float] containing the converted values.

Definition at line 2226 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_cvtpd_epi32()

static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epi32 ( __m256d  __a)
static

Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x i32].

When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register.

This intrinsic corresponds to the VCVTPD2DQ instruction.

Parameters
__aA 256-bit vector of [4 x double].
Returns
A 128-bit integer vector containing the converted values.

Definition at line 2307 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_cvtpd_ps()

static __inline __m128 __DEFAULT_FN_ATTRS _mm256_cvtpd_ps ( __m256d  __a)
static

Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x float].

This intrinsic corresponds to the VCVTPD2PS instruction.

Parameters
__aA 256-bit vector of [4 x double].
Returns
A 128-bit vector of [4 x float] containing the converted values.

Definition at line 2242 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_cvtps_epi32()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epi32 ( __m256  __a)
static

Converts a vector of [8 x float] into a vector of [8 x i32].

This intrinsic corresponds to the VCVTPS2DQ instruction.

Parameters
__aA 256-bit vector of [8 x float].
Returns
A 256-bit integer vector containing the converted values.

Definition at line 2257 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_cvtps_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtps_pd ( __m128  __a)
static

Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4 x double].

This intrinsic corresponds to the VCVTPS2PD instruction.

Parameters
__aA 128-bit vector of [4 x float].
Returns
A 256-bit vector of [4 x double] containing the converted values.

Definition at line 2273 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_cvtsd_f64()

static __inline double __DEFAULT_FN_ATTRS _mm256_cvtsd_f64 ( __m256d  __a)
static

Returns the first element of the input vector of [4 x double].

This intrinsic is a utility function and does not correspond to a specific instruction.

Parameters
__aA 256-bit vector of [4 x double].
Returns
A 64 bit double containing the first element of the input vector.

Definition at line 2339 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_cvtsi256_si32()

static __inline int __DEFAULT_FN_ATTRS _mm256_cvtsi256_si32 ( __m256i  __a)
static

Returns the first element of the input vector of [8 x i32].

This intrinsic is a utility function and does not correspond to a specific instruction.

Parameters
__aA 256-bit vector of [8 x i32].
Returns
A 32 bit integer containing the first element of the input vector.

Definition at line 2355 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_cvtss_f32()

static __inline float __DEFAULT_FN_ATTRS _mm256_cvtss_f32 ( __m256  __a)
static

Returns the first element of the input vector of [8 x float].

This intrinsic is a utility function and does not correspond to a specific instruction.

Parameters
__aA 256-bit vector of [8 x float].
Returns
A 32 bit float containing the first element of the input vector.

Definition at line 2372 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_cvttpd_epi32()

static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32 ( __m256d  __a)
static

Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x i32], truncating the result by rounding towards zero when it is inexact.

This intrinsic corresponds to the VCVTTPD2DQ instruction.

Parameters
__aA 256-bit vector of [4 x double].
Returns
A 128-bit integer vector containing the converted values.

Definition at line 2290 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_cvttps_epi32()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32 ( __m256  __a)
static

Converts a vector of [8 x float] into a vector of [8 x i32], truncating the result by rounding towards zero when it is inexact.

This intrinsic corresponds to the VCVTTPS2DQ instruction.

Parameters
__aA 256-bit vector of [8 x float].
Returns
A 256-bit integer vector containing the converted values.

Definition at line 2323 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_div_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_div_pd ( __m256d  __a,
__m256d  __b 
)
static

Divides two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VDIVPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing the dividend.
__bA 256-bit vector of [4 x double] containing the divisor.
Returns
A 256-bit vector of [4 x double] containing the quotients of both operands.

Definition at line 179 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_div_pd(), and _mm256_maskz_div_pd().

◆ _mm256_div_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_div_ps ( __m256  __a,
__m256  __b 
)
static

Divides two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VDIVPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing the dividend.
__bA 256-bit vector of [8 x float] containing the divisor.
Returns
A 256-bit vector of [8 x float] containing the quotients of both operands.

Definition at line 197 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_div_ps(), and _mm256_maskz_div_ps().

◆ _mm256_extract_epi16()

static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi16 ( __m256i  __a,
const int  __imm 
)
static

Takes a [16 x i16] vector and returns the vector element value indexed by the immediate constant operand.

This intrinsic corresponds to the VEXTRACTF128+COMPOSITE instruction.

Parameters
__aA 256-bit integer vector of [16 x i16].
__immAn immediate integer operand with bits [3:0] determining which vector element is extracted and returned.
Returns
A 32-bit integer containing the extracted 16 bits of zero extended packed data.

Definition at line 2041 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

◆ _mm256_extract_epi32()

static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi32 ( __m256i  __a,
const int  __imm 
)
static

Takes a [8 x i32] vector and returns the vector element value indexed by the immediate constant operand.

This intrinsic corresponds to the VEXTRACTF128+COMPOSITE instruction.

Parameters
__aA 256-bit vector of [8 x i32].
__immAn immediate integer operand with bits [2:0] determining which vector element is extracted and returned.
Returns
A 32-bit integer containing the extracted 32 bits of extended packed data.

Definition at line 2019 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

◆ _mm256_extract_epi8()

static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi8 ( __m256i  __a,
const int  __imm 
)
static

Takes a [32 x i8] vector and returns the vector element value indexed by the immediate constant operand.

This intrinsic corresponds to the VEXTRACTF128+COMPOSITE instruction.

Parameters
__aA 256-bit integer vector of [32 x i8].
__immAn immediate integer operand with bits [4:0] determining which vector element is extracted and returned.
Returns
A 32-bit integer containing the extracted 8 bits of zero extended packed data.

Definition at line 2063 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

◆ _mm256_hadd_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_hadd_pd ( __m256d  __a,
__m256d  __b 
)
static

Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VHADDPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the source operands. The horizontal sums of the values are returned in the even-indexed elements of a vector of [4 x double].
__bA 256-bit vector of [4 x double] containing one of the source operands. The horizontal sums of the values are returned in the odd-indexed elements of a vector of [4 x double].
Returns
A 256-bit vector of [4 x double] containing the horizontal sums of both operands.

Definition at line 685 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_hadd_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_hadd_ps ( __m256  __a,
__m256  __b 
)
static

Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VHADDPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the source operands. The horizontal sums of the values are returned in the elements with index 0, 1, 4, 5 of a vector of [8 x float].
__bA 256-bit vector of [8 x float] containing one of the source operands. The horizontal sums of the values are returned in the elements with index 2, 3, 6, 7 of a vector of [8 x float].
Returns
A 256-bit vector of [8 x float] containing the horizontal sums of both operands.

Definition at line 708 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_hsub_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_hsub_pd ( __m256d  __a,
__m256d  __b 
)
static

Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VHSUBPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the source operands. The horizontal differences between the values are returned in the even-indexed elements of a vector of [4 x double].
__bA 256-bit vector of [4 x double] containing one of the source operands. The horizontal differences between the values are returned in the odd-indexed elements of a vector of [4 x double].
Returns
A 256-bit vector of [4 x double] containing the horizontal differences of both operands.

Definition at line 731 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_hsub_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_hsub_ps ( __m256  __a,
__m256  __b 
)
static

Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VHSUBPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the source operands. The horizontal differences between the values are returned in the elements with index 0, 1, 4, 5 of a vector of [8 x float].
__bA 256-bit vector of [8 x float] containing one of the source operands. The horizontal differences between the values are returned in the elements with index 2, 3, 6, 7 of a vector of [8 x float].
Returns
A 256-bit vector of [8 x float] containing the horizontal differences of both operands.

Definition at line 754 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_insert_epi16()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_insert_epi16 ( __m256i  __a,
int  __b,
int const  __imm 
)
static

Takes a [16 x i16] vector and replaces the vector element value indexed by the immediate constant operand with a new value.

Returns the modified vector.

This intrinsic corresponds to the VINSERTF128+COMPOSITE instruction.

Parameters
__aA vector of [16 x i16] to be used by the insert operation.
__bAn i16 integer value. The replacement value for the insert operation.
__immAn immediate integer specifying the index of the vector element to be replaced.
Returns
A copy of vector __a, after replacing its element indexed by __imm with __b.

Definition at line 2139 of file avxintrin.h.

References __b, __c, and __DEFAULT_FN_ATTRS.

◆ _mm256_insert_epi32()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_insert_epi32 ( __m256i  __a,
int  __b,
int const  __imm 
)
static

Takes a [8 x i32] vector and replaces the vector element value indexed by the immediate constant operand by a new value.

Returns the modified vector.

This intrinsic corresponds to the VINSERTF128+COMPOSITE instruction.

Parameters
__aA vector of [8 x i32] to be used by the insert operation.
__bAn integer value. The replacement value for the insert operation.
__immAn immediate integer specifying the index of the vector element to be replaced.
Returns
A copy of vector __a, after replacing its element indexed by __imm with __b.

Definition at line 2112 of file avxintrin.h.

References __b, __c, and __DEFAULT_FN_ATTRS.

◆ _mm256_insert_epi8()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_insert_epi8 ( __m256i  __a,
int  __b,
int const  __imm 
)
static

Takes a [32 x i8] vector and replaces the vector element value indexed by the immediate constant operand with a new value.

Returns the modified vector.

This intrinsic corresponds to the VINSERTF128+COMPOSITE instruction.

Parameters
__aA vector of [32 x i8] to be used by the insert operation.
__bAn i8 integer value. The replacement value for the insert operation.
__immAn immediate integer specifying the index of the vector element to be replaced.
Returns
A copy of vector __a, after replacing its element indexed by __imm with __b.

Definition at line 2165 of file avxintrin.h.

References __b, __c, and __DEFAULT_FN_ATTRS.

◆ _mm256_lddqu_si256()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_lddqu_si256 ( __m256i const *  __p)
static

Loads 256 bits of integer data from an unaligned memory location pointed to by __p into a 256-bit integer vector.

This intrinsic may perform better than _mm256_loadu_si256 when the data crosses a cache line boundary.

This intrinsic corresponds to the VLDDQU instruction.

Parameters
__pA pointer to a 256-bit integer vector containing integer values.
Returns
A 256-bit integer vector containing the moved values.

Definition at line 3259 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_load_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_load_pd ( double const *  __p)
static

Loads 4 double-precision floating point values from a 32-byte aligned memory location pointed to by __p into a vector of [4 x double].

This intrinsic corresponds to the VMOVAPD instruction.

Parameters
__pA 32-byte aligned pointer to a memory location containing double-precision floating point values.
Returns
A 256-bit vector of [4 x double] containing the moved values.

Definition at line 3149 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_load_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_load_ps ( float const *  __p)
static

Loads 8 single-precision floating point values from a 32-byte aligned memory location pointed to by __p into a vector of [8 x float].

This intrinsic corresponds to the VMOVAPS instruction.

Parameters
__pA 32-byte aligned pointer to a memory location containing float values.
Returns
A 256-bit vector of [8 x float] containing the moved values.

Definition at line 3165 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_load_si256()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_load_si256 ( __m256i const *  __p)
static

Loads 256 bits of integer data from a 32-byte aligned memory location pointed to by __p into elements of a 256-bit integer vector.

This intrinsic corresponds to the VMOVDQA instruction.

Parameters
__pA 32-byte aligned pointer to a 256-bit integer vector containing integer values.
Returns
A 256-bit integer vector containing the moved values.

Definition at line 3222 of file avxintrin.h.

References __DEFAULT_FN_ATTRS, and __p.

◆ _mm256_loadu2_m128()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_loadu2_m128 ( float const *  __addr_hi,
float const *  __addr_lo 
)
static

Loads two 128-bit floating-point vectors of [4 x float] from unaligned memory locations and constructs a 256-bit floating-point vector of [8 x float] by concatenating the two 128-bit vectors.

This intrinsic corresponds to load instructions followed by the VINSERTF128 instruction.

Parameters
__addr_hiA pointer to a 128-bit memory location containing 4 consecutive single-precision floating-point values. These values are to be copied to bits[255:128] of the result. The address of the memory location does not have to be aligned.
__addr_loA pointer to a 128-bit memory location containing 4 consecutive single-precision floating-point values. These values are to be copied to bits[127:0] of the result. The address of the memory location does not have to be aligned.
Returns
A 256-bit floating-point vector of [8 x float] containing the concatenated result.

Definition at line 4884 of file avxintrin.h.

References __DEFAULT_FN_ATTRS, _mm256_castps128_ps256(), _mm256_insertf128_ps, and _mm_loadu_ps().

◆ _mm256_loadu2_m128d()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_loadu2_m128d ( double const *  __addr_hi,
double const *  __addr_lo 
)
static

Loads two 128-bit floating-point vectors of [2 x double] from unaligned memory locations and constructs a 256-bit floating-point vector of [4 x double] by concatenating the two 128-bit vectors.

This intrinsic corresponds to load instructions followed by the VINSERTF128 instruction.

Parameters
__addr_hiA pointer to a 128-bit memory location containing two consecutive double-precision floating-point values. These values are to be copied to bits[255:128] of the result. The address of the memory location does not have to be aligned.
__addr_loA pointer to a 128-bit memory location containing two consecutive double-precision floating-point values. These values are to be copied to bits[127:0] of the result. The address of the memory location does not have to be aligned.
Returns
A 256-bit floating-point vector of [4 x double] containing the concatenated result.

Definition at line 4912 of file avxintrin.h.

References __DEFAULT_FN_ATTRS, _mm256_castpd128_pd256(), _mm256_insertf128_pd, and _mm_loadu_pd().

◆ _mm256_loadu2_m128i()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_loadu2_m128i ( __m128i const *  __addr_hi,
__m128i const *  __addr_lo 
)
static

Loads two 128-bit integer vectors from unaligned memory locations and constructs a 256-bit integer vector by concatenating the two 128-bit vectors.

This intrinsic corresponds to load instructions followed by the VINSERTF128 instruction.

Parameters
__addr_hiA pointer to a 128-bit memory location containing a 128-bit integer vector. This vector is to be copied to bits[255:128] of the result. The address of the memory location does not have to be aligned.
__addr_loA pointer to a 128-bit memory location containing a 128-bit integer vector. This vector is to be copied to bits[127:0] of the result. The address of the memory location does not have to be aligned.
Returns
A 256-bit integer vector containing the concatenated result.

Definition at line 4937 of file avxintrin.h.

References __DEFAULT_FN_ATTRS, _mm256_castsi128_si256(), _mm256_insertf128_si256, and _mm_loadu_si128().

◆ _mm256_loadu_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_loadu_pd ( double const *  __p)
static

Loads 4 double-precision floating point values from an unaligned memory location pointed to by __p into a vector of [4 x double].

This intrinsic corresponds to the VMOVUPD instruction.

Parameters
__pA pointer to a memory location containing double-precision floating point values.
Returns
A 256-bit vector of [4 x double] containing the moved values.

Definition at line 3182 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_loadu_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_loadu_ps ( float const *  __p)
static

Loads 8 single-precision floating point values from an unaligned memory location pointed to by __p into a vector of [8 x float].

This intrinsic corresponds to the VMOVUPS instruction.

Parameters
__pA pointer to a memory location containing single-precision floating point values.
Returns
A 256-bit vector of [8 x float] containing the moved values.

Definition at line 3202 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_loadu_si256()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_loadu_si256 ( __m256i const *  __p)
static

Loads 256 bits of integer data from an unaligned memory location pointed to by __p into a 256-bit integer vector.

This intrinsic corresponds to the VMOVDQU instruction.

Parameters
__pA pointer to a 256-bit integer vector containing integer values.
Returns
A 256-bit integer vector containing the moved values.

Definition at line 3238 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_maskload_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_maskload_pd ( double const *  __p,
__m256i  __m 
)
static

Conditionally loads double-precision floating point elements from a memory location pointed to by __p into a 256-bit vector of [4 x double], depending on the mask bits associated with each data element.

This intrinsic corresponds to the VMASKMOVPD instruction.

Parameters
__pA pointer to a memory location that contains the double-precision floating point values.
__mA 256-bit integer vector of [4 x quadword] containing the mask. The most significant bit of each quadword element represents the mask bits. If a mask bit is zero, the corresponding value in the memory location is not loaded and the corresponding field in the return value is set to zero.
Returns
A 256-bit vector of [4 x double] containing the loaded values.

Definition at line 3425 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_maskload_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_maskload_ps ( float const *  __p,
__m256i  __m 
)
static

Conditionally loads single-precision floating point elements from a memory location pointed to by __p into a 256-bit vector of [8 x float], depending on the mask bits associated with each data element.

This intrinsic corresponds to the VMASKMOVPS instruction.

Parameters
__pA pointer to a memory location that contains the single-precision floating point values.
__mA 256-bit integer vector of [8 x dword] containing the mask. The most significant bit of each dword element represents the mask bits. If a mask bit is zero, the corresponding value in the memory location is not loaded and the corresponding field in the return value is set to zero.
Returns
A 256-bit vector of [8 x float] containing the loaded values.

Definition at line 3474 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_maskstore_pd()

static __inline void __DEFAULT_FN_ATTRS _mm256_maskstore_pd ( double *  __p,
__m256i  __m,
__m256d  __a 
)
static

Moves double-precision values from a 256-bit vector of [4 x double] to a memory location pointed to by __p, according to the specified mask.

This intrinsic corresponds to the VMASKMOVPD instruction.

Parameters
__pA pointer to a memory location that will receive the float values.
__mA 256-bit integer vector of [4 x quadword] containing the mask. The most significant bit of each quadword element in the mask vector represents the mask bits. If a mask bit is zero, the corresponding value from vector __a is not stored and the corresponding field in the memory location pointed to by __p is not changed.
__aA 256-bit vector of [4 x double] containing the values to be stored.

Definition at line 3547 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_maskstore_ps()

static __inline void __DEFAULT_FN_ATTRS _mm256_maskstore_ps ( float *  __p,
__m256i  __m,
__m256  __a 
)
static

Moves single-precision floating point values from a 256-bit vector of [8 x float] to a memory location pointed to by __p, according to the specified mask.

This intrinsic corresponds to the VMASKMOVPS instruction.

Parameters
__pA pointer to a memory location that will receive the float values.
__mA 256-bit integer vector of [8 x dword] containing the mask. The most significant bit of each dword element in the mask vector represents the mask bits. If a mask bit is zero, the corresponding value from vector __a is not stored and the corresponding field in the memory location pointed to by __p is not changed.
__aA 256-bit vector of [8 x float] containing the values to be stored.

Definition at line 3499 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_max_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_max_pd ( __m256d  __a,
__m256d  __b 
)
static

Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values.

This intrinsic corresponds to the VMAXPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the operands.
__bA 256-bit vector of [4 x double] containing one of the operands.
Returns
A 256-bit vector of [4 x double] containing the maximum values between both operands.

Definition at line 216 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_max_pd(), and _mm256_maskz_max_pd().

◆ _mm256_max_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_max_ps ( __m256  __a,
__m256  __b 
)
static

Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values.

This intrinsic corresponds to the VMAXPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the operands.
__bA 256-bit vector of [8 x float] containing one of the operands.
Returns
A 256-bit vector of [8 x float] containing the maximum values between both operands.

Definition at line 235 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_max_ps(), and _mm256_maskz_max_ps().

◆ _mm256_min_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_min_pd ( __m256d  __a,
__m256d  __b 
)
static

Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values.

This intrinsic corresponds to the VMINPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the operands.
__bA 256-bit vector of [4 x double] containing one of the operands.
Returns
A 256-bit vector of [4 x double] containing the minimum values between both operands.

Definition at line 254 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_min_pd(), and _mm256_maskz_min_pd().

◆ _mm256_min_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_min_ps ( __m256  __a,
__m256  __b 
)
static

Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values.

This intrinsic corresponds to the VMINPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the operands.
__bA 256-bit vector of [8 x float] containing one of the operands.
Returns
A 256-bit vector of [8 x float] containing the minimum values between both operands.

Definition at line 273 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_min_ps(), and _mm256_maskz_min_ps().

◆ _mm256_movedup_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_movedup_pd ( __m256d  __a)
static

Moves and duplicates double-precision floating point values from a 256-bit vector of [4 x double] to double-precision values in a 256-bit vector of [4 x double].

This intrinsic corresponds to the VMOVDDUP instruction.

Parameters
__aA 256-bit vector of [4 x double].
Bits [63:0] of __a are written to bits [127:64] and [63:0] of the return value.
Bits [191:128] of __a are written to bits [255:192] and [191:128] of the return value.
Returns
A 256-bit vector of [4 x double] containing the moved and duplicated values.

Definition at line 2446 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_movedup_pd(), and _mm256_maskz_movedup_pd().

◆ _mm256_movehdup_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_movehdup_ps ( __m256  __a)
static

Moves and duplicates high-order (odd-indexed) values from a 256-bit vector of [8 x float] to float values in a 256-bit vector of [8 x float].

This intrinsic corresponds to the VMOVSHDUP instruction.

Parameters
__aA 256-bit vector of [8 x float].
Bits [255:224] of __a are written to bits [255:224] and [223:192] of the return value.
Bits [191:160] of __a are written to bits [191:160] and [159:128] of the return value.
Bits [127:96] of __a are written to bits [127:96] and [95:64] of the return value.
Bits [63:32] of __a are written to bits [63:32] and [31:0] of the return value.
Returns
A 256-bit vector of [8 x float] containing the moved and duplicated values.

Definition at line 2399 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_movehdup_ps(), and _mm256_maskz_movehdup_ps().

◆ _mm256_moveldup_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_moveldup_ps ( __m256  __a)
static

Moves and duplicates low-order (even-indexed) values from a 256-bit vector of [8 x float] to float values in a 256-bit vector of [8 x float].

This intrinsic corresponds to the VMOVSLDUP instruction.

Parameters
__aA 256-bit vector of [8 x float].
Bits [223:192] of __a are written to bits [255:224] and [223:192] of the return value.
Bits [159:128] of __a are written to bits [191:160] and [159:128] of the return value.
Bits [95:64] of __a are written to bits [127:96] and [95:64] of the return value.
Bits [31:0] of __a are written to bits [63:32] and [31:0] of the return value.
Returns
A 256-bit vector of [8 x float] containing the moved and duplicated values.

Definition at line 2424 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_moveldup_ps(), and _mm256_maskz_moveldup_ps().

◆ _mm256_movemask_pd()

static __inline int __DEFAULT_FN_ATTRS _mm256_movemask_pd ( __m256d  __a)
static

Extracts the sign bits of double-precision floating point elements in a 256-bit vector of [4 x double] and writes them to the lower order bits of the return value.

This intrinsic corresponds to the VMOVMSKPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing the double-precision floating point values with sign bits to be extracted.
Returns
The sign bits from the operand, written to bits [3:0].

Definition at line 2996 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_movemask_ps()

static __inline int __DEFAULT_FN_ATTRS _mm256_movemask_ps ( __m256  __a)
static

Extracts the sign bits of double-precision floating point elements in a 256-bit vector of [8 x float] and writes them to the lower order bits of the return value.

This intrinsic corresponds to the VMOVMSKPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing the double-precision floating point values with sign bits to be extracted.
Returns
The sign bits from the operand, written to bits [7:0].

Definition at line 3014 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_mul_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_mul_pd ( __m256d  __a,
__m256d  __b 
)
static

Multiplies two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VMULPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the operands.
__bA 256-bit vector of [4 x double] containing one of the operands.
Returns
A 256-bit vector of [4 x double] containing the products of both operands.

Definition at line 291 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_mul_pd(), and _mm256_maskz_mul_pd().

◆ _mm256_mul_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_mul_ps ( __m256  __a,
__m256  __b 
)
static

Multiplies two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VMULPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the operands.
__bA 256-bit vector of [8 x float] containing one of the operands.
Returns
A 256-bit vector of [8 x float] containing the products of both operands.

Definition at line 309 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_mul_ps(), and _mm256_maskz_mul_ps().

◆ _mm256_or_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_or_pd ( __m256d  __a,
__m256d  __b 
)
static

Performs a bitwise OR of two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VORPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the source operands.
__bA 256-bit vector of [4 x double] containing one of the source operands.
Returns
A 256-bit vector of [4 x double] containing the bitwise OR of the values between both operands.

Definition at line 607 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_or_pd(), and _mm256_maskz_or_pd().

◆ _mm256_or_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_or_ps ( __m256  __a,
__m256  __b 
)
static

Performs a bitwise OR of two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VORPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the source operands.
__bA 256-bit vector of [8 x float] containing one of the source operands.
Returns
A 256-bit vector of [8 x float] containing the bitwise OR of the values between both operands.

Definition at line 625 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_or_ps(), and _mm256_maskz_or_ps().

◆ _mm256_permutevar_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_permutevar_pd ( __m256d  __a,
__m256i  __c 
)
static

Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector operand.

This intrinsic corresponds to the VPERMILPD instruction.

Parameters
__aA 256-bit vector of [4 x double].
__cA 256-bit integer vector operand specifying how the values are to be copied.
Bit [1]:
0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector.
1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector.
Bit [65]:
0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector.
1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector.
Bit [129]:
0: Bits [191:128] of the source are copied to bits [191:128] of the returned vector.
1: Bits [255:192] of the source are copied to bits [191:128] of the returned vector.
Bit [193]:
0: Bits [191:128] of the source are copied to bits [255:192] of the returned vector.
1: Bits [255:192] of the source are copied to bits [255:192] of the returned vector.
Returns
A 256-bit vector of [4 x double] containing the copied values.

Definition at line 823 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_permutevar_pd(), and _mm256_maskz_permutevar_pd().

◆ _mm256_permutevar_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_permutevar_ps ( __m256  __a,
__m256i  __c 
)
static

Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vector operand.

This intrinsic corresponds to the VPERMILPS instruction.

Parameters
__aA 256-bit vector of [8 x float].
__cA 256-bit integer vector operand specifying how the values are to be copied.
Bits [1:0]:
00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector.
01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector.
10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector.
11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector.
Bits [33:32]:
00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector.
01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector.
10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector.
11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector.
Bits [65:64]:
00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector.
01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector.
10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector.
11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector.
Bits [97:96]:
00: Bits [31:0] of the source are copied to bits [127:96] of the returned vector.
01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector.
10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector.
11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector.
Bits [129:128]:
00: Bits [159:128] of the source are copied to bits [159:128] of the returned vector.
01: Bits [191:160] of the source are copied to bits [159:128] of the returned vector.
10: Bits [223:192] of the source are copied to bits [159:128] of the returned vector.
11: Bits [255:224] of the source are copied to bits [159:128] of the returned vector.
Bits [161:160]:
00: Bits [159:128] of the source are copied to bits [191:160] of the returned vector.
01: Bits [191:160] of the source are copied to bits [191:160] of the returned vector.
10: Bits [223:192] of the source are copied to bits [191:160] of the returned vector.
11: Bits [255:224] of the source are copied to bits [191:160] of the returned vector.
Bits [193:192]:
00: Bits [159:128] of the source are copied to bits [223:192] of the returned vector.
01: Bits [191:160] of the source are copied to bits [223:192] of the returned vector.
10: Bits [223:192] of the source are copied to bits [223:192] of the returned vector.
11: Bits [255:224] of the source are copied to bits [223:192] of the returned vector.
Bits [225:224]:
00: Bits [159:128] of the source are copied to bits [255:224] of the returned vector.
01: Bits [191:160] of the source are copied to bits [255:224] of the returned vector.
10: Bits [223:192] of the source are copied to bits [255:224] of the returned vector.
11: Bits [255:224] of the source are copied to bits [255:224] of the returned vector.
Returns
A 256-bit vector of [8 x float] containing the copied values.

Definition at line 968 of file avxintrin.h.

Referenced by _mm256_mask_permutevar_ps(), and _mm256_maskz_permutevar_ps().

◆ _mm256_rcp_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_rcp_ps ( __m256  __a)
static

Calculates the reciprocals of the values in a 256-bit vector of [8 x float].

This intrinsic corresponds to the VRCPPS instruction.

Parameters
__aA 256-bit vector of [8 x float].
Returns
A 256-bit vector of [8 x float] containing the reciprocals of the values in the operand.

Definition at line 377 of file avxintrin.h.

◆ _mm256_rsqrt_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_rsqrt_ps ( __m256  __a)
static

Calculates the reciprocal square roots of the values in a 256-bit vector of [8 x float].

This intrinsic corresponds to the VRSQRTPS instruction.

Parameters
__aA 256-bit vector of [8 x float].
Returns
A 256-bit vector of [8 x float] containing the reciprocal square roots of the values in the operand.

Definition at line 360 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_set1_epi16()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi16 ( short  __w)
static

Constructs a 256-bit integer vector of [16 x i16], with each of the 16-bit integral vector elements set to the specified 16-bit integral value.

This intrinsic corresponds to the VPSHUFB+VINSERTF128 instruction.

Parameters
__wA 16-bit integral value used to initialize each vector element of the result.
Returns
An initialized 256-bit integer vector of [16 x i16].

Definition at line 4278 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_set1_epi16(), and _mm256_maskz_set1_epi16().

◆ _mm256_set1_epi32()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi32 ( int  __i)
static

Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements set to the specified 32-bit integral value.

This intrinsic corresponds to the VPERMILPS+VINSERTF128 instruction.

Parameters
__iA 32-bit integral value used to initialize each vector element of the result.
Returns
An initialized 256-bit integer vector of [8 x i32].

Definition at line 4260 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_broadcastmw_epi32(), _mm256_mask_set1_epi32(), and _mm256_maskz_set1_epi32().

◆ _mm256_set1_epi64x()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi64x ( long long  __q)
static

Constructs a 256-bit integer vector of [4 x i64], with each of the 64-bit integral vector elements set to the specified 64-bit integral value.

This intrinsic corresponds to the VMOVDDUP+VINSERTF128 instruction.

Parameters
__qA 64-bit integral value used to initialize each vector element of the result.
Returns
An initialized 256-bit integer vector of [4 x i64].

Definition at line 4316 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_broadcastmb_epi64(), and _mm256_maskz_set1_epi32().

◆ _mm256_set1_epi8()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi8 ( char  __b)
static

Constructs a 256-bit integer vector of [32 x i8], with each of the 8-bit integral vector elements set to the specified 8-bit integral value.

This intrinsic corresponds to the VPSHUFB+VINSERTF128 instruction.

Parameters
__bAn 8-bit integral value used to initialize each vector element of the result.
Returns
An initialized 256-bit integer vector of [32 x i8].

Definition at line 4296 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_set1_epi8(), and _mm256_maskz_set1_epi8().

◆ _mm256_set1_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set1_pd ( double  __w)
static

Constructs a 256-bit floating-point vector of [4 x double], with each of the four double-precision floating-point vector elements set to the specified double-precision floating-point value.

This intrinsic corresponds to the VMOVDDUP+VINSERTF128 instruction.

Parameters
__wA double-precision floating-point value used to initialize each vector element of the result.
Returns
An initialized 256-bit floating-point vector of [4 x double].

Definition at line 4222 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_set1_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set1_ps ( float  __w)
static

Constructs a 256-bit floating-point vector of [8 x float], with each of the eight single-precision floating-point vector elements set to the specified single-precision floating-point value.

This intrinsic corresponds to the VPERMILPS+VINSERTF128 instruction.

Parameters
__wA single-precision floating-point value used to initialize each vector element of the result.
Returns
An initialized 256-bit floating-point vector of [8 x float].

Definition at line 4241 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_set_epi16()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi16 ( short  __w15,
short  __w14,
short  __w13,
short  __w12,
short  __w11,
short  __w10,
short  __w09,
short  __w08,
short  __w07,
short  __w06,
short  __w05,
short  __w04,
short  __w03,
short  __w02,
short  __w01,
short  __w00 
)
static

Constructs a 256-bit integer vector initialized with the specified 16-bit integral values.

This intrinsic is a utility function and does not correspond to a specific instruction.

Parameters
__w15A 16-bit integral value used to initialize bits [255:240] of the result.
__w14A 16-bit integral value used to initialize bits [239:224] of the result.
__w13A 16-bit integral value used to initialize bits [223:208] of the result.
__w12A 16-bit integral value used to initialize bits [207:192] of the result.
__w11A 16-bit integral value used to initialize bits [191:176] of the result.
__w10A 16-bit integral value used to initialize bits [175:160] of the result.
__w09A 16-bit integral value used to initialize bits [159:144] of the result.
__w08A 16-bit integral value used to initialize bits [143:128] of the result.
__w07A 16-bit integral value used to initialize bits [127:112] of the result.
__w06A 16-bit integral value used to initialize bits [111:96] of the result.
__w05A 16-bit integral value used to initialize bits [95:80] of the result.
__w04A 16-bit integral value used to initialize bits [79:64] of the result.
__w03A 16-bit integral value used to initialize bits [63:48] of the result.
__w02A 16-bit integral value used to initialize bits [47:32] of the result.
__w01A 16-bit integral value used to initialize bits [31:16] of the result.
__w00A 16-bit integral value used to initialize bits [15:0] of the result.
Returns
An initialized 256-bit integer vector.

Definition at line 3819 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_set_epi32()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi32 ( int  __i0,
int  __i1,
int  __i2,
int  __i3,
int  __i4,
int  __i5,
int  __i6,
int  __i7 
)
static

Constructs a 256-bit integer vector initialized with the specified 32-bit integral values.

This intrinsic is a utility function and does not correspond to a specific instruction.

Parameters
__i0A 32-bit integral value used to initialize bits [255:224] of the result.
__i1A 32-bit integral value used to initialize bits [223:192] of the result.
__i2A 32-bit integral value used to initialize bits [191:160] of the result.
__i3A 32-bit integral value used to initialize bits [159:128] of the result.
__i4A 32-bit integral value used to initialize bits [127:96] of the result.
__i5A 32-bit integral value used to initialize bits [95:64] of the result.
__i6A 32-bit integral value used to initialize bits [63:32] of the result.
__i7A 32-bit integral value used to initialize bits [31:0] of the result.
Returns
An initialized 256-bit integer vector.

Definition at line 3771 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_set_epi64x()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi64x ( long long  __a,
long long  __b,
long long  __c,
long long  __d 
)
static

Constructs a 256-bit integer vector initialized with the specified 64-bit integral values.

This intrinsic corresponds to the VPUNPCKLQDQ+VINSERTF128 instruction.

Parameters
__aA 64-bit integral value used to initialize bits [255:192] of the result.
__bA 64-bit integral value used to initialize bits [191:128] of the result.
__cA 64-bit integral value used to initialize bits [127:64] of the result.
__dA 64-bit integral value used to initialize bits [63:0] of the result.
Returns
An initialized 256-bit integer vector.

Definition at line 3937 of file avxintrin.h.

References __b, __c, and __DEFAULT_FN_ATTRS.

◆ _mm256_set_epi8()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi8 ( char  __b31,
char  __b30,
char  __b29,
char  __b28,
char  __b27,
char  __b26,
char  __b25,
char  __b24,
char  __b23,
char  __b22,
char  __b21,
char  __b20,
char  __b19,
char  __b18,
char  __b17,
char  __b16,
char  __b15,
char  __b14,
char  __b13,
char  __b12,
char  __b11,
char  __b10,
char  __b09,
char  __b08,
char  __b07,
char  __b06,
char  __b05,
char  __b04,
char  __b03,
char  __b02,
char  __b01,
char  __b00 
)
static

Constructs a 256-bit integer vector initialized with the specified 8-bit integral values.

This intrinsic is a utility function and does not correspond to a specific instruction.

Parameters
__b31An 8-bit integral value used to initialize bits [255:248] of the result.
__b30An 8-bit integral value used to initialize bits [247:240] of the result.
__b29An 8-bit integral value used to initialize bits [239:232] of the result.
__b28An 8-bit integral value used to initialize bits [231:224] of the result.
__b27An 8-bit integral value used to initialize bits [223:216] of the result.
__b26An 8-bit integral value used to initialize bits [215:208] of the result.
__b25An 8-bit integral value used to initialize bits [207:200] of the result.
__b24An 8-bit integral value used to initialize bits [199:192] of the result.
__b23An 8-bit integral value used to initialize bits [191:184] of the result.
__b22An 8-bit integral value used to initialize bits [183:176] of the result.
__b21An 8-bit integral value used to initialize bits [175:168] of the result.
__b20An 8-bit integral value used to initialize bits [167:160] of the result.
__b19An 8-bit integral value used to initialize bits [159:152] of the result.
__b18An 8-bit integral value used to initialize bits [151:144] of the result.
__b17An 8-bit integral value used to initialize bits [143:136] of the result.
__b16An 8-bit integral value used to initialize bits [135:128] of the result.
__b15An 8-bit integral value used to initialize bits [127:120] of the result.
__b14An 8-bit integral value used to initialize bits [119:112] of the result.
__b13An 8-bit integral value used to initialize bits [111:104] of the result.
__b12An 8-bit integral value used to initialize bits [103:96] of the result.
__b11An 8-bit integral value used to initialize bits [95:88] of the result.
__b10An 8-bit integral value used to initialize bits [87:80] of the result.
__b09An 8-bit integral value used to initialize bits [79:72] of the result.
__b08An 8-bit integral value used to initialize bits [71:64] of the result.
__b07An 8-bit integral value used to initialize bits [63:56] of the result.
__b06An 8-bit integral value used to initialize bits [55:48] of the result.
__b05An 8-bit integral value used to initialize bits [47:40] of the result.
__b04An 8-bit integral value used to initialize bits [39:32] of the result.
__b03An 8-bit integral value used to initialize bits [31:24] of the result.
__b02An 8-bit integral value used to initialize bits [23:16] of the result.
__b01An 8-bit integral value used to initialize bits [15:8] of the result.
__b00An 8-bit integral value used to initialize bits [7:0] of the result.
Returns
An initialized 256-bit integer vector.

Definition at line 3902 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_set_m128()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set_m128 ( __m128  __hi,
__m128  __lo 
)
static

Constructs a 256-bit floating-point vector of [8 x float] by concatenating two 128-bit floating-point vectors of [4 x float].

This intrinsic corresponds to the VINSERTF128 instruction.

Parameters
__hiA 128-bit floating-point vector of [4 x float] to be copied to the upper 128 bits of the result.
__loA 128-bit floating-point vector of [4 x float] to be copied to the lower 128 bits of the result.
Returns
A 256-bit floating-point vector of [8 x float] containing the concatenated result.

Definition at line 5047 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_set_m128d(), _mm256_set_m128i(), _mm256_setr_m128(), _mm256_setr_m128d(), and _mm256_setr_m128i().

◆ _mm256_set_m128d()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_m128d ( __m128d  __hi,
__m128d  __lo 
)
static

Constructs a 256-bit floating-point vector of [4 x double] by concatenating two 128-bit floating-point vectors of [2 x double].

This intrinsic corresponds to the VINSERTF128 instruction.

Parameters
__hiA 128-bit floating-point vector of [2 x double] to be copied to the upper 128 bits of the result.
__loA 128-bit floating-point vector of [2 x double] to be copied to the lower 128 bits of the result.
Returns
A 256-bit floating-point vector of [4 x double] containing the concatenated result.

Definition at line 5068 of file avxintrin.h.

References __DEFAULT_FN_ATTRS, and _mm256_set_m128().

◆ _mm256_set_m128i()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_m128i ( __m128i  __hi,
__m128i  __lo 
)
static

Constructs a 256-bit integer vector by concatenating two 128-bit integer vectors.

This intrinsic corresponds to the VINSERTF128 instruction.

Parameters
__hiA 128-bit integer vector to be copied to the upper 128 bits of the result.
__loA 128-bit integer vector to be copied to the lower 128 bits of the result.
Returns
A 256-bit integer vector containing the concatenated result.

Definition at line 5088 of file avxintrin.h.

References __DEFAULT_FN_ATTRS, and _mm256_set_m128().

◆ _mm256_set_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_pd ( double  __a,
double  __b,
double  __c,
double  __d 
)
static

Constructs a 256-bit floating-point vector of [4 x double] initialized with the specified double-precision floating-point values.

This intrinsic corresponds to the VUNPCKLPD+VINSERTF128 instruction.

Parameters
__aA double-precision floating-point value used to initialize bits [255:192] of the result.
__bA double-precision floating-point value used to initialize bits [191:128] of the result.
__cA double-precision floating-point value used to initialize bits [127:64] of the result.
__dA double-precision floating-point value used to initialize bits [63:0] of the result.
Returns
An initialized 256-bit floating-point vector of [4 x double].

Definition at line 3700 of file avxintrin.h.

References __b, __c, and __DEFAULT_FN_ATTRS.

◆ _mm256_set_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set_ps ( float  __a,
float  __b,
float  __c,
float  __d,
float  __e,
float  __f,
float  __g,
float  __h 
)
static

Constructs a 256-bit floating-point vector of [8 x float] initialized with the specified single-precision floating-point values.

This intrinsic is a utility function and does not correspond to a specific instruction.

Parameters
__aA single-precision floating-point value used to initialize bits [255:224] of the result.
__bA single-precision floating-point value used to initialize bits [223:192] of the result.
__cA single-precision floating-point value used to initialize bits [191:160] of the result.
__dA single-precision floating-point value used to initialize bits [159:128] of the result.
__eA single-precision floating-point value used to initialize bits [127:96] of the result.
__fA single-precision floating-point value used to initialize bits [95:64] of the result.
__gA single-precision floating-point value used to initialize bits [63:32] of the result.
__hA single-precision floating-point value used to initialize bits [31:0] of the result.
Returns
An initialized 256-bit floating-point vector of [8 x float].

Definition at line 3739 of file avxintrin.h.

References __b, __c, and __DEFAULT_FN_ATTRS.

◆ _mm256_setr_epi16()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi16 ( short  __w15,
short  __w14,
short  __w13,
short  __w12,
short  __w11,
short  __w10,
short  __w09,
short  __w08,
short  __w07,
short  __w06,
short  __w05,
short  __w04,
short  __w03,
short  __w02,
short  __w01,
short  __w00 
)
static

Constructs a 256-bit integer vector, initialized in reverse order with the specified 16-bit integral values.

This intrinsic is a utility function and does not correspond to a specific instruction.

Parameters
__w15A 16-bit integral value used to initialize bits [15:0] of the result.
__w14A 16-bit integral value used to initialize bits [31:16] of the result.
__w13A 16-bit integral value used to initialize bits [47:32] of the result.
__w12A 16-bit integral value used to initialize bits [63:48] of the result.
__w11A 16-bit integral value used to initialize bits [79:64] of the result.
__w10A 16-bit integral value used to initialize bits [95:80] of the result.
__w09A 16-bit integral value used to initialize bits [111:96] of the result.
__w08A 16-bit integral value used to initialize bits [127:112] of the result.
__w07A 16-bit integral value used to initialize bits [143:128] of the result.
__w06A 16-bit integral value used to initialize bits [159:144] of the result.
__w05A 16-bit integral value used to initialize bits [175:160] of the result.
__w04A 16-bit integral value used to initialize bits [191:176] of the result.
__w03A 16-bit integral value used to initialize bits [207:192] of the result.
__w02A 16-bit integral value used to initialize bits [223:208] of the result.
__w01A 16-bit integral value used to initialize bits [239:224] of the result.
__w00A 16-bit integral value used to initialize bits [255:240] of the result.
Returns
An initialized 256-bit integer vector.

Definition at line 4086 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_setr_epi32()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi32 ( int  __i0,
int  __i1,
int  __i2,
int  __i3,
int  __i4,
int  __i5,
int  __i6,
int  __i7 
)
static

Constructs a 256-bit integer vector, initialized in reverse order with the specified 32-bit integral values.

This intrinsic is a utility function and does not correspond to a specific instruction.

Parameters
__i0A 32-bit integral value used to initialize bits [31:0] of the result.
__i1A 32-bit integral value used to initialize bits [63:32] of the result.
__i2A 32-bit integral value used to initialize bits [95:64] of the result.
__i3A 32-bit integral value used to initialize bits [127:96] of the result.
__i4A 32-bit integral value used to initialize bits [159:128] of the result.
__i5A 32-bit integral value used to initialize bits [191:160] of the result.
__i6A 32-bit integral value used to initialize bits [223:192] of the result.
__i7A 32-bit integral value used to initialize bits [255:224] of the result.
Returns
An initialized 256-bit integer vector.

Definition at line 4038 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_setr_epi64x()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi64x ( long long  __a,
long long  __b,
long long  __c,
long long  __d 
)
static

Constructs a 256-bit integer vector, initialized in reverse order with the specified 64-bit integral values.

This intrinsic corresponds to the VPUNPCKLQDQ+VINSERTF128 instruction.

Parameters
__aA 64-bit integral value used to initialize bits [63:0] of the result.
__bA 64-bit integral value used to initialize bits [127:64] of the result.
__cA 64-bit integral value used to initialize bits [191:128] of the result.
__dA 64-bit integral value used to initialize bits [255:192] of the result.
Returns
An initialized 256-bit integer vector.

Definition at line 4203 of file avxintrin.h.

References __b, __c, and __DEFAULT_FN_ATTRS.

◆ _mm256_setr_epi8()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi8 ( char  __b31,
char  __b30,
char  __b29,
char  __b28,
char  __b27,
char  __b26,
char  __b25,
char  __b24,
char  __b23,
char  __b22,
char  __b21,
char  __b20,
char  __b19,
char  __b18,
char  __b17,
char  __b16,
char  __b15,
char  __b14,
char  __b13,
char  __b12,
char  __b11,
char  __b10,
char  __b09,
char  __b08,
char  __b07,
char  __b06,
char  __b05,
char  __b04,
char  __b03,
char  __b02,
char  __b01,
char  __b00 
)
static

Constructs a 256-bit integer vector, initialized in reverse order with the specified 8-bit integral values.

This intrinsic is a utility function and does not correspond to a specific instruction.

Parameters
__b31An 8-bit integral value used to initialize bits [7:0] of the result.
__b30An 8-bit integral value used to initialize bits [15:8] of the result.
__b29An 8-bit integral value used to initialize bits [23:16] of the result.
__b28An 8-bit integral value used to initialize bits [31:24] of the result.
__b27An 8-bit integral value used to initialize bits [39:32] of the result.
__b26An 8-bit integral value used to initialize bits [47:40] of the result.
__b25An 8-bit integral value used to initialize bits [55:48] of the result.
__b24An 8-bit integral value used to initialize bits [63:56] of the result.
__b23An 8-bit integral value used to initialize bits [71:64] of the result.
__b22An 8-bit integral value used to initialize bits [79:72] of the result.
__b21An 8-bit integral value used to initialize bits [87:80] of the result.
__b20An 8-bit integral value used to initialize bits [95:88] of the result.
__b19An 8-bit integral value used to initialize bits [103:96] of the result.
__b18An 8-bit integral value used to initialize bits [111:104] of the result.
__b17An 8-bit integral value used to initialize bits [119:112] of the result.
__b16An 8-bit integral value used to initialize bits [127:120] of the result.
__b15An 8-bit integral value used to initialize bits [135:128] of the result.
__b14An 8-bit integral value used to initialize bits [143:136] of the result.
__b13An 8-bit integral value used to initialize bits [151:144] of the result.
__b12An 8-bit integral value used to initialize bits [159:152] of the result.
__b11An 8-bit integral value used to initialize bits [167:160] of the result.
__b10An 8-bit integral value used to initialize bits [175:168] of the result.
__b09An 8-bit integral value used to initialize bits [183:176] of the result.
__b08An 8-bit integral value used to initialize bits [191:184] of the result.
__b07An 8-bit integral value used to initialize bits [199:192] of the result.
__b06An 8-bit integral value used to initialize bits [207:200] of the result.
__b05An 8-bit integral value used to initialize bits [215:208] of the result.
__b04An 8-bit integral value used to initialize bits [223:216] of the result.
__b03An 8-bit integral value used to initialize bits [231:224] of the result.
__b02An 8-bit integral value used to initialize bits [239:232] of the result.
__b01An 8-bit integral value used to initialize bits [247:240] of the result.
__b00An 8-bit integral value used to initialize bits [255:248] of the result.
Returns
An initialized 256-bit integer vector.

Definition at line 4169 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_setr_m128()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setr_m128 ( __m128  __lo,
__m128  __hi 
)
static

Constructs a 256-bit floating-point vector of [8 x float] by concatenating two 128-bit floating-point vectors of [4 x float].

This is similar to _mm256_set_m128, but the order of the input parameters is swapped.

This intrinsic corresponds to the VINSERTF128 instruction.

Parameters
__loA 128-bit floating-point vector of [4 x float] to be copied to the lower 128 bits of the result.
__hiA 128-bit floating-point vector of [4 x float] to be copied to the upper 128 bits of the result.
Returns
A 256-bit floating-point vector of [8 x float] containing the concatenated result.

Definition at line 5111 of file avxintrin.h.

References __DEFAULT_FN_ATTRS, and _mm256_set_m128().

◆ _mm256_setr_m128d()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setr_m128d ( __m128d  __lo,
__m128d  __hi 
)
static

Constructs a 256-bit floating-point vector of [4 x double] by concatenating two 128-bit floating-point vectors of [2 x double].

This is similar to _mm256_set_m128d, but the order of the input parameters is swapped.

This intrinsic corresponds to the VINSERTF128 instruction.

Parameters
__loA 128-bit floating-point vector of [2 x double] to be copied to the lower 128 bits of the result.
__hiA 128-bit floating-point vector of [2 x double] to be copied to the upper 128 bits of the result.
Returns
A 256-bit floating-point vector of [4 x double] containing the concatenated result.

Definition at line 5134 of file avxintrin.h.

References __DEFAULT_FN_ATTRS, and _mm256_set_m128().

◆ _mm256_setr_m128i()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_m128i ( __m128i  __lo,
__m128i  __hi 
)
static

Constructs a 256-bit integer vector by concatenating two 128-bit integer vectors.

This is similar to _mm256_set_m128i, but the order of the input parameters is swapped.

This intrinsic corresponds to the VINSERTF128 instruction.

Parameters
__loA 128-bit integer vector to be copied to the lower 128 bits of the result.
__hiA 128-bit integer vector to be copied to the upper 128 bits of the result.
Returns
A 256-bit integer vector containing the concatenated result.

Definition at line 5155 of file avxintrin.h.

References _mm256_set_m128().

◆ _mm256_setr_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setr_pd ( double  __a,
double  __b,
double  __c,
double  __d 
)
static

Constructs a 256-bit floating-point vector of [4 x double], initialized in reverse order with the specified double-precision floating-point values.

This intrinsic corresponds to the VUNPCKLPD+VINSERTF128 instruction.

Parameters
__aA double-precision floating-point value used to initialize bits [63:0] of the result.
__bA double-precision floating-point value used to initialize bits [127:64] of the result.
__cA double-precision floating-point value used to initialize bits [191:128] of the result.
__dA double-precision floating-point value used to initialize bits [255:192] of the result.
Returns
An initialized 256-bit floating-point vector of [4 x double].

Definition at line 3966 of file avxintrin.h.

References __b, __c, and __DEFAULT_FN_ATTRS.

◆ _mm256_setr_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setr_ps ( float  __a,
float  __b,
float  __c,
float  __d,
float  __e,
float  __f,
float  __g,
float  __h 
)
static

Constructs a 256-bit floating-point vector of [8 x float], initialized in reverse order with the specified single-precision float-point values.

This intrinsic is a utility function and does not correspond to a specific instruction.

Parameters
__aA single-precision floating-point value used to initialize bits [31:0] of the result.
__bA single-precision floating-point value used to initialize bits [63:32] of the result.
__cA single-precision floating-point value used to initialize bits [95:64] of the result.
__dA single-precision floating-point value used to initialize bits [127:96] of the result.
__eA single-precision floating-point value used to initialize bits [159:128] of the result.
__fA single-precision floating-point value used to initialize bits [191:160] of the result.
__gA single-precision floating-point value used to initialize bits [223:192] of the result.
__hA single-precision floating-point value used to initialize bits [255:224] of the result.
Returns
An initialized 256-bit floating-point vector of [8 x float].

Definition at line 4006 of file avxintrin.h.

References __b, __c, and __DEFAULT_FN_ATTRS.

◆ _mm256_setzero_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd ( void  )
static

◆ _mm256_setzero_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps ( void  )
static

◆ _mm256_setzero_si256()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256 ( void  )
static

Constructs a 256-bit integer vector initialized to zero.

This intrinsic corresponds to the VXORPS instruction.

Returns
A 256-bit integer vector initialized to zero.

Definition at line 4358 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_abs_epi64(), _mm256_cvtpd_epi64(), _mm256_cvtpd_epu64(), _mm256_cvtps_epi64(), _mm256_cvtps_epu32(), _mm256_cvtps_epu64(), _mm256_cvttpd_epi64(), _mm256_cvttpd_epu64(), _mm256_cvttps_epi64(), _mm256_cvttps_epu32(), _mm256_cvttps_epu64(), _mm256_lzcnt_epi32(), _mm256_lzcnt_epi64(), _mm256_mask_test_epi16_mask(), _mm256_mask_test_epi32_mask(), _mm256_mask_test_epi64_mask(), _mm256_mask_test_epi8_mask(), _mm256_mask_testn_epi16_mask(), _mm256_mask_testn_epi32_mask(), _mm256_mask_testn_epi64_mask(), _mm256_mask_testn_epi8_mask(), _mm256_maskz_abs_epi16(), _mm256_maskz_abs_epi32(), _mm256_maskz_abs_epi64(), _mm256_maskz_abs_epi8(), _mm256_maskz_add_epi16(), _mm256_maskz_add_epi32(), _mm256_maskz_add_epi64(), _mm256_maskz_add_epi8(), _mm256_maskz_adds_epi16(), _mm256_maskz_adds_epi8(), _mm256_maskz_adds_epu16(), _mm256_maskz_adds_epu8(), _mm256_maskz_and_epi32(), _mm256_maskz_and_epi64(), _mm256_maskz_andnot_epi32(), _mm256_maskz_andnot_epi64(), _mm256_maskz_avg_epu16(), _mm256_maskz_avg_epu8(), _mm256_maskz_broadcast_i32x2(), _mm256_maskz_broadcast_i32x4(), _mm256_maskz_broadcast_i64x2(), _mm256_maskz_broadcastb_epi8(), _mm256_maskz_broadcastd_epi32(), _mm256_maskz_broadcastq_epi64(), _mm256_maskz_broadcastw_epi16(), _mm256_maskz_compress_epi32(), _mm256_maskz_compress_epi64(), _mm256_maskz_conflict_epi32(), _mm256_maskz_conflict_epi64(), _mm256_maskz_cvtepi16_epi32(), _mm256_maskz_cvtepi16_epi64(), _mm256_maskz_cvtepi32_epi64(), _mm256_maskz_cvtepi8_epi16(), _mm256_maskz_cvtepi8_epi32(), _mm256_maskz_cvtepi8_epi64(), _mm256_maskz_cvtepu16_epi32(), _mm256_maskz_cvtepu16_epi64(), _mm256_maskz_cvtepu32_epi64(), _mm256_maskz_cvtepu8_epi16(), _mm256_maskz_cvtepu8_epi32(), _mm256_maskz_cvtepu8_epi64(), _mm256_maskz_cvtpd_epi64(), _mm256_maskz_cvtpd_epu64(), _mm256_maskz_cvtps_epi32(), _mm256_maskz_cvtps_epi64(), _mm256_maskz_cvtps_epu32(), _mm256_maskz_cvtps_epu64(), _mm256_maskz_cvttpd_epi64(), _mm256_maskz_cvttpd_epu64(), _mm256_maskz_cvttps_epi32(), _mm256_maskz_cvttps_epi64(), _mm256_maskz_cvttps_epu32(), _mm256_maskz_cvttps_epu64(), _mm256_maskz_expand_epi32(), _mm256_maskz_expand_epi64(), _mm256_maskz_expandloadu_epi32(), _mm256_maskz_expandloadu_epi64(), _mm256_maskz_load_epi32(), _mm256_maskz_load_epi64(), _mm256_maskz_loadu_epi16(), _mm256_maskz_loadu_epi32(), _mm256_maskz_loadu_epi64(), _mm256_maskz_loadu_epi8(), _mm256_maskz_lzcnt_epi32(), _mm256_maskz_lzcnt_epi64(), _mm256_maskz_madd_epi16(), _mm256_maskz_maddubs_epi16(), _mm256_maskz_max_epi16(), _mm256_maskz_max_epi32(), _mm256_maskz_max_epi64(), _mm256_maskz_max_epi8(), _mm256_maskz_max_epu16(), _mm256_maskz_max_epu32(), _mm256_maskz_max_epu64(), _mm256_maskz_max_epu8(), _mm256_maskz_min_epi16(), _mm256_maskz_min_epi32(), _mm256_maskz_min_epi64(), _mm256_maskz_min_epi8(), _mm256_maskz_min_epu16(), _mm256_maskz_min_epu32(), _mm256_maskz_min_epu64(), _mm256_maskz_min_epu8(), _mm256_maskz_mov_epi16(), _mm256_maskz_mov_epi32(), _mm256_maskz_mov_epi64(), _mm256_maskz_mov_epi8(), _mm256_maskz_mul_epi32(), _mm256_maskz_mul_epu32(), _mm256_maskz_mulhi_epi16(), _mm256_maskz_mulhi_epu16(), _mm256_maskz_mulhrs_epi16(), _mm256_maskz_mullo_epi16(), _mm256_maskz_mullo_epi32(), _mm256_maskz_mullo_epi64(), _mm256_maskz_multishift_epi64_epi8(), _mm256_maskz_or_epi32(), _mm256_maskz_or_epi64(), _mm256_maskz_packs_epi16(), _mm256_maskz_packs_epi32(), _mm256_maskz_packus_epi16(), _mm256_maskz_packus_epi32(), _mm256_maskz_permutexvar_epi16(), _mm256_maskz_permutexvar_epi32(), _mm256_maskz_permutexvar_epi64(), _mm256_maskz_permutexvar_epi8(), _mm256_maskz_rolv_epi32(), _mm256_maskz_rolv_epi64(), _mm256_maskz_rorv_epi32(), _mm256_maskz_rorv_epi64(), _mm256_maskz_set1_epi16(), _mm256_maskz_set1_epi32(), _mm256_maskz_set1_epi8(), _mm256_maskz_shuffle_epi8(), _mm256_maskz_sll_epi16(), _mm256_maskz_sll_epi32(), _mm256_maskz_sll_epi64(), _mm256_maskz_slli_epi16(), _mm256_maskz_slli_epi32(), _mm256_maskz_slli_epi64(), _mm256_maskz_sllv_epi16(), _mm256_maskz_sllv_epi32(), _mm256_maskz_sllv_epi64(), _mm256_maskz_sra_epi16(), _mm256_maskz_sra_epi32(), _mm256_maskz_sra_epi64(), _mm256_maskz_srai_epi16(), _mm256_maskz_srai_epi32(), _mm256_maskz_srai_epi64(), _mm256_maskz_srav_epi16(), _mm256_maskz_srav_epi32(), _mm256_maskz_srav_epi64(), _mm256_maskz_srl_epi16(), _mm256_maskz_srl_epi32(), _mm256_maskz_srl_epi64(), _mm256_maskz_srli_epi16(), _mm256_maskz_srli_epi32(), _mm256_maskz_srli_epi64(), _mm256_maskz_srlv_epi16(), _mm256_maskz_srlv_epi32(), _mm256_maskz_srlv_epi64(), _mm256_maskz_sub_epi16(), _mm256_maskz_sub_epi32(), _mm256_maskz_sub_epi64(), _mm256_maskz_sub_epi8(), _mm256_maskz_subs_epi16(), _mm256_maskz_subs_epi8(), _mm256_maskz_subs_epu16(), _mm256_maskz_subs_epu8(), _mm256_maskz_unpackhi_epi16(), _mm256_maskz_unpackhi_epi32(), _mm256_maskz_unpackhi_epi64(), _mm256_maskz_unpackhi_epi8(), _mm256_maskz_unpacklo_epi16(), _mm256_maskz_unpacklo_epi32(), _mm256_maskz_unpacklo_epi64(), _mm256_maskz_unpacklo_epi8(), _mm256_maskz_xor_epi32(), _mm256_maskz_xor_epi64(), _mm256_max_epi64(), _mm256_max_epu64(), _mm256_min_epi64(), _mm256_min_epu64(), _mm256_rolv_epi32(), _mm256_rolv_epi64(), _mm256_rorv_epi32(), _mm256_rorv_epi64(), _mm256_test_epi16_mask(), _mm256_test_epi32_mask(), _mm256_test_epi64_mask(), _mm256_test_epi8_mask(), _mm256_testn_epi16_mask(), _mm256_testn_epi32_mask(), _mm256_testn_epi64_mask(), _mm256_testn_epi8_mask(), _mm512_cvtepi16_epi8(), _mm512_cvtsepi16_epi8(), _mm512_cvttpd_epi32(), _mm512_cvtusepi16_epi8(), _mm512_maskz_cvtepi16_epi8(), _mm512_maskz_cvtepi32_epi16(), _mm512_maskz_cvtepi64_epi32(), _mm512_maskz_cvtpd_epi32(), _mm512_maskz_cvtpd_epu32(), _mm512_maskz_cvtsepi16_epi8(), _mm512_maskz_cvtsepi32_epi16(), _mm512_maskz_cvtsepi64_epi32(), _mm512_maskz_cvttpd_epi32(), _mm512_maskz_cvttpd_epu32(), _mm512_maskz_cvtusepi16_epi8(), _mm512_maskz_cvtusepi32_epi16(), _mm512_maskz_cvtusepi64_epi32(), and _mm512_zextsi256_si512().

◆ _mm256_sqrt_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd ( __m256d  __a)
static

Calculates the square roots of the values in a 256-bit vector of [4 x double].

This intrinsic corresponds to the VSQRTPD instruction.

Parameters
__aA 256-bit vector of [4 x double].
Returns
A 256-bit vector of [4 x double] containing the square roots of the values in the operand.

Definition at line 326 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_sqrt_pd(), and _mm256_maskz_sqrt_pd().

◆ _mm256_sqrt_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps ( __m256  __a)
static

Calculates the square roots of the values in a 256-bit vector of [8 x float].

This intrinsic corresponds to the VSQRTPS instruction.

Parameters
__aA 256-bit vector of [8 x float].
Returns
A 256-bit vector of [8 x float] containing the square roots of the values in the operand.

Definition at line 343 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_sqrt_ps(), and _mm256_maskz_sqrt_ps().

◆ _mm256_store_pd()

static __inline void __DEFAULT_FN_ATTRS _mm256_store_pd ( double *  __p,
__m256d  __a 
)
static

Stores double-precision floating point values from a 256-bit vector of [4 x double] to a 32-byte aligned memory location pointed to by __p.

This intrinsic corresponds to the VMOVAPD instruction.

Parameters
__pA 32-byte aligned pointer to a memory location that will receive the double-precision floaing point values.
__aA 256-bit vector of [4 x double] containing the values to be moved.

Definition at line 3279 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_store_ps()

static __inline void __DEFAULT_FN_ATTRS _mm256_store_ps ( float *  __p,
__m256  __a 
)
static

Stores single-precision floating point values from a 256-bit vector of [8 x float] to a 32-byte aligned memory location pointed to by __p.

This intrinsic corresponds to the VMOVAPS instruction.

Parameters
__pA 32-byte aligned pointer to a memory location that will receive the float values.
__aA 256-bit vector of [8 x float] containing the values to be moved.

Definition at line 3297 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_store_si256()

static __inline void __DEFAULT_FN_ATTRS _mm256_store_si256 ( __m256i *  __p,
__m256i  __a 
)
static

Stores integer values from a 256-bit integer vector to a 32-byte aligned memory location pointed to by __p.

This intrinsic corresponds to the VMOVDQA instruction.

Parameters
__pA 32-byte aligned pointer to a memory location that will receive the integer values.
__aA 256-bit integer vector containing the values to be moved.

Definition at line 3356 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_storeu2_m128()

static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128 ( float *  __addr_hi,
float *  __addr_lo,
__m256  __a 
)
static

Stores the upper and lower 128 bits of a 256-bit floating-point vector of [8 x float] into two different unaligned memory locations.

This intrinsic corresponds to the VEXTRACTF128 instruction and the store instructions.

Parameters
__addr_hiA pointer to a 128-bit memory location. Bits[255:128] of __a are to be copied to this memory location. The address of this memory location does not have to be aligned.
__addr_loA pointer to a 128-bit memory location. Bits[127:0] of __a are to be copied to this memory location. The address of this memory location does not have to be aligned.
__aA 256-bit floating-point vector of [8 x float].

Definition at line 4963 of file avxintrin.h.

References __DEFAULT_FN_ATTRS, _mm256_castps256_ps128(), _mm256_extractf128_ps, and _mm_storeu_ps().

◆ _mm256_storeu2_m128d()

static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128d ( double *  __addr_hi,
double *  __addr_lo,
__m256d  __a 
)
static

Stores the upper and lower 128 bits of a 256-bit floating-point vector of [4 x double] into two different unaligned memory locations.

This intrinsic corresponds to the VEXTRACTF128 instruction and the store instructions.

Parameters
__addr_hiA pointer to a 128-bit memory location. Bits[255:128] of __a are to be copied to this memory location. The address of this memory location does not have to be aligned.
__addr_loA pointer to a 128-bit memory location. Bits[127:0] of __a are to be copied to this memory location. The address of this memory location does not have to be aligned.
__aA 256-bit floating-point vector of [4 x double].

Definition at line 4992 of file avxintrin.h.

References __DEFAULT_FN_ATTRS, _mm256_castpd256_pd128(), _mm256_extractf128_pd, and _mm_storeu_pd().

◆ _mm256_storeu2_m128i()

static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128i ( __m128i *  __addr_hi,
__m128i *  __addr_lo,
__m256i  __a 
)
static

Stores the upper and lower 128 bits of a 256-bit integer vector into two different unaligned memory locations.

This intrinsic corresponds to the VEXTRACTF128 instruction and the store instructions.

Parameters
__addr_hiA pointer to a 128-bit memory location. Bits[255:128] of __a are to be copied to this memory location. The address of this memory location does not have to be aligned.
__addr_loA pointer to a 128-bit memory location. Bits[127:0] of __a are to be copied to this memory location. The address of this memory location does not have to be aligned.
__aA 256-bit integer vector.

Definition at line 5021 of file avxintrin.h.

References __DEFAULT_FN_ATTRS, _mm256_castsi256_si128(), _mm256_extractf128_si256, and _mm_storeu_si128().

◆ _mm256_storeu_pd()

static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_pd ( double *  __p,
__m256d  __a 
)
static

Stores double-precision floating point values from a 256-bit vector of [4 x double] to an unaligned memory location pointed to by __p.

This intrinsic corresponds to the VMOVUPD instruction.

Parameters
__pA pointer to a memory location that will receive the double-precision floating point values.
__aA 256-bit vector of [4 x double] containing the values to be moved.

Definition at line 3315 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_storeu_ps()

static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_ps ( float *  __p,
__m256  __a 
)
static

Stores single-precision floating point values from a 256-bit vector of [8 x float] to an unaligned memory location pointed to by __p.

This intrinsic corresponds to the VMOVUPS instruction.

Parameters
__pA pointer to a memory location that will receive the float values.
__aA 256-bit vector of [8 x float] containing the values to be moved.

Definition at line 3335 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_storeu_si256()

static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_si256 ( __m256i *  __p,
__m256i  __a 
)
static

Stores integer values from a 256-bit integer vector to an unaligned memory location pointed to by __p.

This intrinsic corresponds to the VMOVDQU instruction.

Parameters
__pA pointer to a memory location that will receive the integer values.
__aA 256-bit integer vector containing the values to be moved.

Definition at line 3373 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_stream_pd()

static __inline void __DEFAULT_FN_ATTRS _mm256_stream_pd ( double *  __a,
__m256d  __b 
)
static

Moves double-precision values from a 256-bit vector of [4 x double] to a 32-byte aligned memory location.

To minimize caching, the data is flagged as non-temporal (unlikely to be used again soon).

This intrinsic corresponds to the VMOVNTPD instruction.

Parameters
__aA pointer to a 32-byte aligned memory location that will receive the double-precision floating-point values.
__bA 256-bit vector of [4 x double] containing the values to be moved.

Definition at line 3611 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_stream_ps()

static __inline void __DEFAULT_FN_ATTRS _mm256_stream_ps ( float *  __p,
__m256  __a 
)
static

Moves single-precision floating point values from a 256-bit vector of [8 x float] to a 32-byte aligned memory location.

To minimize caching, the data is flagged as non-temporal (unlikely to be used again soon).

This intrinsic corresponds to the VMOVNTPS instruction.

Parameters
__pA pointer to a 32-byte aligned memory location that will receive the single-precision floating point values.
__aA 256-bit vector of [8 x float] containing the values to be moved.

Definition at line 3632 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_stream_si256()

static __inline void __DEFAULT_FN_ATTRS _mm256_stream_si256 ( __m256i *  __a,
__m256i  __b 
)
static

Moves integer data from a 256-bit integer vector to a 32-byte aligned memory location.

To minimize caching, the data is flagged as non-temporal (unlikely to be used again soon).

This intrinsic corresponds to the VMOVNTDQ instruction.

Parameters
__aA pointer to a 32-byte aligned memory location that will receive the integer values.
__bA 256-bit integer vector containing the values to be moved.

Definition at line 3591 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_sub_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sub_pd ( __m256d  __a,
__m256d  __b 
)
static

Subtracts two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VSUBPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing the minuend.
__bA 256-bit vector of [4 x double] containing the subtrahend.
Returns
A 256-bit vector of [4 x double] containing the differences between both operands.

Definition at line 105 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_sub_pd(), and _mm256_maskz_sub_pd().

◆ _mm256_sub_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sub_ps ( __m256  __a,
__m256  __b 
)
static

Subtracts two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VSUBPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing the minuend.
__bA 256-bit vector of [8 x float] containing the subtrahend.
Returns
A 256-bit vector of [8 x float] containing the differences between both operands.

Definition at line 123 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_sub_ps(), and _mm256_maskz_sub_ps().

◆ _mm256_testc_pd()

static __inline int __DEFAULT_FN_ATTRS _mm256_testc_pd ( __m256d  __a,
__m256d  __b 
)
static

Given two 256-bit floating-point vectors of [4 x double], perform an element-by-element comparison of the double-precision elements in the first source vector and the corresponding elements in the second source vector.

The EFLAGS register is updated as follows:
If there is at least one pair of double-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of double-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the CF flag.

This intrinsic corresponds to the VTESTPD instruction.

Parameters
__aA 256-bit vector of [4 x double].
__bA 256-bit vector of [4 x double].
Returns
the CF flag.

Definition at line 2780 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_testc_ps()

static __inline int __DEFAULT_FN_ATTRS _mm256_testc_ps ( __m256  __a,
__m256  __b 
)
static

Given two 256-bit floating-point vectors of [8 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector.

The EFLAGS register is updated as follows:
If there is at least one pair of single-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of single-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the CF flag.

This intrinsic corresponds to the VTESTPS instruction.

Parameters
__aA 256-bit vector of [8 x float].
__bA 256-bit vector of [8 x float].
Returns
the CF flag.

Definition at line 2868 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_testc_si256()

static __inline int __DEFAULT_FN_ATTRS _mm256_testc_si256 ( __m256i  __a,
__m256i  __b 
)
static

Given two 256-bit integer vectors, perform a bit-by-bit comparison of the two source vectors.

The EFLAGS register is updated as follows:
If there is at least one pair of bits where both bits are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of bits where the bit from the first source vector is 0 and the bit from the second source vector is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the CF flag.

This intrinsic corresponds to the VPTEST instruction.

Parameters
__aA 256-bit integer vector.
__bA 256-bit integer vector.
Returns
the CF flag.

Definition at line 2950 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_testnzc_pd()

static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_pd ( __m256d  __a,
__m256d  __b 
)
static

Given two 256-bit floating-point vectors of [4 x double], perform an element-by-element comparison of the double-precision elements in the first source vector and the corresponding elements in the second source vector.

The EFLAGS register is updated as follows:
If there is at least one pair of double-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of double-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns 1 if both the ZF and CF flags are set to 0, otherwise it returns 0.

This intrinsic corresponds to the VTESTPD instruction.

Parameters
__aA 256-bit vector of [4 x double].
__bA 256-bit vector of [4 x double].
Returns
1 if both the ZF and CF flags are set to 0, otherwise returns 0.

Definition at line 2810 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_testnzc_ps()

static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_ps ( __m256  __a,
__m256  __b 
)
static

Given two 256-bit floating-point vectors of [8 x float], perform an element-by-element comparison of the single-precision elements in the first source vector and the corresponding elements in the second source vector.

The EFLAGS register is updated as follows:
If there is at least one pair of single-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of single-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns 1 if both the ZF and CF flags are set to 0, otherwise it returns 0.

This intrinsic corresponds to the VTESTPS instruction.

Parameters
__aA 256-bit vector of [8 x float].
__bA 256-bit vector of [8 x float].
Returns
1 if both the ZF and CF flags are set to 0, otherwise returns 0.

Definition at line 2898 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_testnzc_si256()

static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_si256 ( __m256i  __a,
__m256i  __b 
)
static

Given two 256-bit integer vectors, perform a bit-by-bit comparison of the two source vectors.

The EFLAGS register is updated as follows:
If there is at least one pair of bits where both bits are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of bits where the bit from the first source vector is 0 and the bit from the second source vector is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns 1 if both the ZF and CF flags are set to 0, otherwise it returns 0.

This intrinsic corresponds to the VPTEST instruction.

Parameters
__aA 256-bit integer vector.
__bA 256-bit integer vector.
Returns
1 if both the ZF and CF flags are set to 0, otherwise returns 0.

Definition at line 2977 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_testz_pd()

static __inline int __DEFAULT_FN_ATTRS _mm256_testz_pd ( __m256d  __a,
__m256d  __b 
)
static

Given two 256-bit floating-point vectors of [4 x double], perform an element-by-element comparison of the double-precision elements in the first source vector and the corresponding elements in the second source vector.

The EFLAGS register is updated as follows:
If there is at least one pair of double-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of double-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the ZF flag.

This intrinsic corresponds to the VTESTPD instruction.

Parameters
__aA 256-bit vector of [4 x double].
__bA 256-bit vector of [4 x double].
Returns
the ZF flag.

Definition at line 2751 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_testz_ps()

static __inline int __DEFAULT_FN_ATTRS _mm256_testz_ps ( __m256  __a,
__m256  __b 
)
static

Given two 256-bit floating-point vectors of [8 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector.

The EFLAGS register is updated as follows:
If there is at least one pair of single-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of single-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the ZF flag.

This intrinsic corresponds to the VTESTPS instruction.

Parameters
__aA 256-bit vector of [8 x float].
__bA 256-bit vector of [8 x float].
Returns
the ZF flag.

Definition at line 2839 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_testz_si256()

static __inline int __DEFAULT_FN_ATTRS _mm256_testz_si256 ( __m256i  __a,
__m256i  __b 
)
static

Given two 256-bit integer vectors, perform a bit-by-bit comparison of the two source vectors.

The EFLAGS register is updated as follows:
If there is at least one pair of bits where both bits are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of bits where the bit from the first source vector is 0 and the bit from the second source vector is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the ZF flag.

This intrinsic corresponds to the VPTEST instruction.

Parameters
__aA 256-bit integer vector.
__bA 256-bit integer vector.
Returns
the ZF flag.

Definition at line 2924 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_undefined_pd()

static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd ( void  )
static

Create a 256-bit vector of [4 x double] with undefined values.

This intrinsic has no corresponding instruction.

Returns
A 256-bit vector of [4 x double] containing undefined values.

Definition at line 3647 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_undefined_ps()

static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps ( void  )
static

Create a 256-bit vector of [8 x float] with undefined values.

This intrinsic has no corresponding instruction.

Returns
A 256-bit vector of [8 x float] containing undefined values.

Definition at line 3660 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm512_cvtpd_ps().

◆ _mm256_undefined_si256()

static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256 ( void  )
static

◆ _mm256_unpackhi_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpackhi_pd ( __m256d  __a,
__m256d  __b 
)
static

Unpacks the odd-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them into a 256-bit vector of [4 x double].

This intrinsic corresponds to the VUNPCKHPD instruction.

Parameters
__aA 256-bit floating-point vector of [4 x double].
Bits [127:64] are written to bits [63:0] of the return value.
Bits [255:192] are written to bits [191:128] of the return value.
__bA 256-bit floating-point vector of [4 x double].
Bits [127:64] are written to bits [127:64] of the return value.
Bits [255:192] are written to bits [255:192] of the return value.
Returns
A 256-bit vector of [4 x double] containing the interleaved values.

Definition at line 2469 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_unpackhi_pd(), and _mm256_maskz_unpackhi_pd().

◆ _mm256_unpackhi_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpackhi_ps ( __m256  __a,
__m256  __b 
)
static

Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the two 256-bit vectors of [8 x float] and interleaves them into a 256-bit vector of [8 x float].

This intrinsic corresponds to the VUNPCKHPS instruction.

Parameters
__aA 256-bit vector of [8 x float].
Bits [95:64] are written to bits [31:0] of the return value.
Bits [127:96] are written to bits [95:64] of the return value.
Bits [223:192] are written to bits [159:128] of the return value.
Bits [255:224] are written to bits [223:192] of the return value.
__bA 256-bit vector of [8 x float].
Bits [95:64] are written to bits [63:32] of the return value.
Bits [127:96] are written to bits [127:96] of the return value.
Bits [223:192] are written to bits [191:160] of the return value.
Bits [255:224] are written to bits [255:224] of the return value.
Returns
A 256-bit vector of [8 x float] containing the interleaved values.

Definition at line 2518 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_unpackhi_ps(), and _mm256_maskz_unpackhi_ps().

◆ _mm256_unpacklo_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpacklo_pd ( __m256d  __a,
__m256d  __b 
)
static

Unpacks the even-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them into a 256-bit vector of [4 x double].

This intrinsic corresponds to the VUNPCKLPD instruction.

Parameters
__aA 256-bit floating-point vector of [4 x double].
Bits [63:0] are written to bits [63:0] of the return value.
Bits [191:128] are written to bits [191:128] of the return value.
__bA 256-bit floating-point vector of [4 x double].
Bits [63:0] are written to bits [127:64] of the return value.
Bits [191:128] are written to bits [255:192] of the return value.
Returns
A 256-bit vector of [4 x double] containing the interleaved values.

Definition at line 2491 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_unpacklo_pd(), and _mm256_maskz_unpacklo_pd().

◆ _mm256_unpacklo_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpacklo_ps ( __m256  __a,
__m256  __b 
)
static

Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the two 256-bit vectors of [8 x float] and interleaves them into a 256-bit vector of [8 x float].

This intrinsic corresponds to the VUNPCKLPS instruction.

Parameters
__aA 256-bit vector of [8 x float].
Bits [31:0] are written to bits [31:0] of the return value.
Bits [63:32] are written to bits [95:64] of the return value.
Bits [159:128] are written to bits [159:128] of the return value.
Bits [191:160] are written to bits [223:192] of the return value.
__bA 256-bit vector of [8 x float].
Bits [31:0] are written to bits [63:32] of the return value.
Bits [63:32] are written to bits [127:96] of the return value.
Bits [159:128] are written to bits [191:160] of the return value.
Bits [191:160] are written to bits [255:224] of the return value.
Returns
A 256-bit vector of [8 x float] containing the interleaved values.

Definition at line 2545 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_unpacklo_ps(), and _mm256_maskz_unpacklo_ps().

◆ _mm256_xor_pd()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_xor_pd ( __m256d  __a,
__m256d  __b 
)
static

Performs a bitwise XOR of two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VXORPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the source operands.
__bA 256-bit vector of [4 x double] containing one of the source operands.
Returns
A 256-bit vector of [4 x double] containing the bitwise XOR of the values between both operands.

Definition at line 643 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_xor_pd(), and _mm256_maskz_xor_pd().

◆ _mm256_xor_ps()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_xor_ps ( __m256  __a,
__m256  __b 
)
static

Performs a bitwise XOR of two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VXORPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the source operands.
__bA 256-bit vector of [8 x float] containing one of the source operands.
Returns
A 256-bit vector of [8 x float] containing the bitwise XOR of the values between both operands.

Definition at line 661 of file avxintrin.h.

References __b, and __DEFAULT_FN_ATTRS.

Referenced by _mm256_mask_xor_ps(), and _mm256_maskz_xor_ps().

◆ _mm256_zeroall()

static __inline void __DEFAULT_FN_ATTRS _mm256_zeroall ( void  )
static

Zeroes the contents of all XMM or YMM registers.

This intrinsic corresponds to the VZEROALL instruction.

Definition at line 3026 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_zeroupper()

static __inline void __DEFAULT_FN_ATTRS _mm256_zeroupper ( void  )
static

Zeroes the upper 128 bits (bits 255:128) of all YMM registers.

This intrinsic corresponds to the VZEROUPPER instruction.

Definition at line 3037 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm256_zextpd128_pd256()

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_zextpd128_pd256 ( __m128d  __a)
static

Constructs a 256-bit floating-point vector of [4 x double] from a 128-bit floating-point vector of [2 x double].

The lower 128 bits contain the value of the source vector. The upper 128 bits are set to zero.

This intrinsic has no corresponding instruction.

Parameters
__aA 128-bit vector of [2 x double].
Returns
A 256-bit floating-point vector of [4 x double]. The lower 128 bits contain the value of the parameter. The upper 128 bits are set to zero.

Definition at line 4591 of file avxintrin.h.

References __DEFAULT_FN_ATTRS, and _mm_setzero_pd().

◆ _mm256_zextps128_ps256()

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_zextps128_ps256 ( __m128  __a)
static

Constructs a 256-bit floating-point vector of [8 x float] from a 128-bit floating-point vector of [4 x float].

The lower 128 bits contain the value of the source vector. The upper 128 bits are set to zero.

This intrinsic has no corresponding instruction.

Parameters
__aA 128-bit vector of [4 x float].
Returns
A 256-bit floating-point vector of [8 x float]. The lower 128 bits contain the value of the parameter. The upper 128 bits are set to zero.

Definition at line 4609 of file avxintrin.h.

References __DEFAULT_FN_ATTRS, and _mm_setzero_ps().

◆ _mm256_zextsi128_si256()

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_zextsi128_si256 ( __m128i  __a)
static

Constructs a 256-bit integer vector from a 128-bit integer vector.

The lower 128 bits contain the value of the source vector. The upper 128 bits are set to zero.

This intrinsic has no corresponding instruction.

Parameters
__aA 128-bit integer vector.
Returns
A 256-bit integer vector. The lower 128 bits contain the value of the parameter. The upper 128 bits are set to zero.

Definition at line 4627 of file avxintrin.h.

References _mm_setzero_si128().

◆ _mm_broadcast_ss()

static __inline __m128 __DEFAULT_FN_ATTRS _mm_broadcast_ss ( float const *  __a)
static

Loads a scalar single-precision floating point value from the specified address pointed to by __a and broadcasts it to the elements of a [4 x float] vector.

This intrinsic corresponds to the VBROADCASTSS instruction.

Parameters
__aThe single-precision floating point value to be broadcast.
Returns
A 128-bit vector of [4 x float] whose 32-bit elements are set equal to the broadcast value.

Definition at line 3056 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm_maskload_pd()

static __inline __m128d __DEFAULT_FN_ATTRS _mm_maskload_pd ( double const *  __p,
__m128i  __m 
)
static

Conditionally loads double-precision floating point elements from a memory location pointed to by __p into a 128-bit vector of [2 x double], depending on the mask bits associated with each data element.

This intrinsic corresponds to the VMASKMOVPD instruction.

Parameters
__pA pointer to a memory location that contains the double-precision floating point values.
__mA 128-bit integer vector containing the mask. The most significant bit of each data element represents the mask bits. If a mask bit is zero, the corresponding value in the memory location is not loaded and the corresponding field in the return value is set to zero.
Returns
A 128-bit vector of [2 x double] containing the loaded values.

Definition at line 3401 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm_maskload_ps()

static __inline __m128 __DEFAULT_FN_ATTRS _mm_maskload_ps ( float const *  __p,
__m128i  __m 
)
static

Conditionally loads single-precision floating point elements from a memory location pointed to by __p into a 128-bit vector of [4 x float], depending on the mask bits associated with each data element.

This intrinsic corresponds to the VMASKMOVPS instruction.

Parameters
__pA pointer to a memory location that contains the single-precision floating point values.
__mA 128-bit integer vector containing the mask. The most significant bit of each data element represents the mask bits. If a mask bit is zero, the corresponding value in the memory location is not loaded and the corresponding field in the return value is set to zero.
Returns
A 128-bit vector of [4 x float] containing the loaded values.

Definition at line 3450 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm_maskstore_pd()

static __inline void __DEFAULT_FN_ATTRS _mm_maskstore_pd ( double *  __p,
__m128i  __m,
__m128d  __a 
)
static

Moves double-precision values from a 128-bit vector of [2 x double] to a memory location pointed to by __p, according to the specified mask.

This intrinsic corresponds to the VMASKMOVPD instruction.

Parameters
__pA pointer to a memory location that will receive the float values.
__mA 128-bit integer vector containing the mask. The most significant bit of each field in the mask vector represents the mask bits. If a mask bit is zero, the corresponding value from vector __a is not stored and the corresponding field in the memory location pointed to by __p is not changed.
__aA 128-bit vector of [2 x double] containing the values to be stored.

Definition at line 3523 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm_maskstore_ps()

static __inline void __DEFAULT_FN_ATTRS _mm_maskstore_ps ( float *  __p,
__m128i  __m,
__m128  __a 
)
static

Moves single-precision floating point values from a 128-bit vector of [4 x float] to a memory location pointed to by __p, according to the specified mask.

This intrinsic corresponds to the VMASKMOVPS instruction.

Parameters
__pA pointer to a memory location that will receive the float values.
__mA 128-bit integer vector containing the mask. The most significant bit of each field in the mask vector represents the mask bits. If a mask bit is zero, the corresponding value from vector __a is not stored and the corresponding field in the memory location pointed to by __p is not changed.
__aA 128-bit vector of [4 x float] containing the values to be stored.

Definition at line 3571 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm_permutevar_pd()

static __inline __m128d __DEFAULT_FN_ATTRS _mm_permutevar_pd ( __m128d  __a,
__m128i  __c 
)
static

Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector operand.

This intrinsic corresponds to the VPERMILPD instruction.

Parameters
__aA 128-bit vector of [2 x double].
__cA 128-bit integer vector operand specifying how the values are to be copied.
Bit [1]:
0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector.
1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector.
Bit [65]:
0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector.
1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector.
Returns
A 128-bit vector of [2 x double] containing the copied values.

Definition at line 784 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm_mask_permutevar_pd(), and _mm_maskz_permutevar_pd().

◆ _mm_permutevar_ps()

static __inline __m128 __DEFAULT_FN_ATTRS _mm_permutevar_ps ( __m128  __a,
__m128i  __c 
)
static

Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vector operand.

This intrinsic corresponds to the VPERMILPS instruction.

Parameters
__aA 128-bit vector of [4 x float].
__cA 128-bit integer vector operand specifying how the values are to be copied.
Bits [1:0]:
00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector.
01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector.
10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector.
11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector.
Bits [33:32]:
00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector.
01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector.
10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector.
11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector.
Bits [65:64]:
00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector.
01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector.
10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector.
11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector.
Bits [97:96]:
00: Bits [31:0] of the source are copied to bits [127:96] of the returned vector.
01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector.
10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector.
11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector.
Returns
A 128-bit vector of [4 x float] containing the copied values.

Definition at line 877 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

Referenced by _mm_mask_permutevar_ps(), and _mm_maskz_permutevar_ps().

◆ _mm_testc_pd()

static __inline int __DEFAULT_FN_ATTRS _mm_testc_pd ( __m128d  __a,
__m128d  __b 
)
static

Given two 128-bit floating-point vectors of [2 x double], perform an element-by-element comparison of the double-precision element in the first source vector and the corresponding element in the second source vector.

The EFLAGS register is updated as follows:
If there is at least one pair of double-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of double-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the CF flag.

This intrinsic corresponds to the VTESTPD instruction.

Parameters
__aA 128-bit vector of [2 x double].
__bA 128-bit vector of [2 x double].
Returns
the CF flag in the EFLAGS register.

Definition at line 2604 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm_testc_ps()

static __inline int __DEFAULT_FN_ATTRS _mm_testc_ps ( __m128  __a,
__m128  __b 
)
static

Given two 128-bit floating-point vectors of [4 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector.

The EFLAGS register is updated as follows:
If there is at least one pair of single-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of single-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the CF flag.

This intrinsic corresponds to the VTESTPS instruction.

Parameters
__aA 128-bit vector of [4 x float].
__bA 128-bit vector of [4 x float].
Returns
the CF flag.

Definition at line 2692 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm_testnzc_pd()

static __inline int __DEFAULT_FN_ATTRS _mm_testnzc_pd ( __m128d  __a,
__m128d  __b 
)
static

Given two 128-bit floating-point vectors of [2 x double], perform an element-by-element comparison of the double-precision element in the first source vector and the corresponding element in the second source vector.

The EFLAGS register is updated as follows:
If there is at least one pair of double-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of double-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns 1 if both the ZF and CF flags are set to 0, otherwise it returns 0.

This intrinsic corresponds to the VTESTPD instruction.

Parameters
__aA 128-bit vector of [2 x double].
__bA 128-bit vector of [2 x double].
Returns
1 if both the ZF and CF flags are set to 0, otherwise returns 0.

Definition at line 2634 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm_testnzc_ps()

static __inline int __DEFAULT_FN_ATTRS _mm_testnzc_ps ( __m128  __a,
__m128  __b 
)
static

Given two 128-bit floating-point vectors of [4 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector.

The EFLAGS register is updated as follows:
If there is at least one pair of single-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of single-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns 1 if both the ZF and CF flags are set to 0, otherwise it returns 0.

This intrinsic corresponds to the VTESTPS instruction.

Parameters
__aA 128-bit vector of [4 x float].
__bA 128-bit vector of [4 x float].
Returns
1 if both the ZF and CF flags are set to 0, otherwise returns 0.

Definition at line 2722 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm_testz_pd()

static __inline int __DEFAULT_FN_ATTRS _mm_testz_pd ( __m128d  __a,
__m128d  __b 
)
static

Given two 128-bit floating-point vectors of [2 x double], perform an element-by-element comparison of the double-precision element in the first source vector and the corresponding element in the second source vector.

The EFLAGS register is updated as follows:
If there is at least one pair of double-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of double-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the ZF flag.

This intrinsic corresponds to the VTESTPD instruction.

Parameters
__aA 128-bit vector of [2 x double].
__bA 128-bit vector of [2 x double].
Returns
the ZF flag in the EFLAGS register.

Definition at line 2575 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.

◆ _mm_testz_ps()

static __inline int __DEFAULT_FN_ATTRS _mm_testz_ps ( __m128  __a,
__m128  __b 
)
static

Given two 128-bit floating-point vectors of [4 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector.

The EFLAGS register is updated as follows:
If there is at least one pair of single-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of single-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the ZF flag.

This intrinsic corresponds to the VTESTPS instruction.

Parameters
__aA 128-bit vector of [4 x float].
__bA 128-bit vector of [4 x float].
Returns
the ZF flag.

Definition at line 2663 of file avxintrin.h.

References __DEFAULT_FN_ATTRS.