clang 20.0.0git
|
Go to the source code of this file.
Macros | |
#define | __DEFAULT_FN_ATTRS |
#define | __DEFAULT_FN_ATTRS128 |
#define | _mm256_round_pd(V, M) ((__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M))) |
Rounds the values in a 256-bit vector of [4 x double] as specified by the byte operand. | |
#define | _mm256_round_ps(V, M) ((__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M))) |
Rounds the values stored in a 256-bit vector of [8 x float] as specified by the byte operand. | |
#define | _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL) |
Rounds up the values stored in a 256-bit vector of [4 x double]. | |
#define | _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR) |
Rounds down the values stored in a 256-bit vector of [4 x double]. | |
#define | _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL) |
Rounds up the values stored in a 256-bit vector of [8 x float]. | |
#define | _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR) |
Rounds down the values stored in a 256-bit vector of [8 x float]. | |
#define | _mm_permute_pd(A, C) ((__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C))) |
Copies the values in a 128-bit vector of [2 x double] as specified by the immediate integer operand. | |
#define | _mm256_permute_pd(A, C) ((__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C))) |
Copies the values in a 256-bit vector of [4 x double] as specified by the immediate integer operand. | |
#define | _mm_permute_ps(A, C) ((__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C))) |
Copies the values in a 128-bit vector of [4 x float] as specified by the immediate integer operand. | |
#define | _mm256_permute_ps(A, C) ((__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C))) |
Copies the values in a 256-bit vector of [8 x float] as specified by the immediate integer operand. | |
#define | _mm256_permute2f128_pd(V1, V2, M) |
Permutes 128-bit data values stored in two 256-bit vectors of [4 x double], as specified by the immediate integer operand. | |
#define | _mm256_permute2f128_ps(V1, V2, M) |
Permutes 128-bit data values stored in two 256-bit vectors of [8 x float], as specified by the immediate integer operand. | |
#define | _mm256_permute2f128_si256(V1, V2, M) |
Permutes 128-bit data values stored in two 256-bit integer vectors, as specified by the immediate integer operand. | |
#define | _mm256_blend_pd(V1, V2, M) |
Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the immediate integer operand. | |
#define | _mm256_blend_ps(V1, V2, M) |
Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the immediate integer operand. | |
#define | _mm256_dp_ps(V1, V2, M) |
Computes two dot products in parallel, using the lower and upper halves of two [8 x float] vectors as input to the two computations, and returning the two dot products in the lower and upper halves of the [8 x float] result. | |
#define | _mm256_shuffle_ps(a, b, mask) |
Selects 8 float values from the 256-bit operands of [8 x float], as specified by the immediate value operand. | |
#define | _mm256_shuffle_pd(a, b, mask) |
Selects four double-precision values from the 256-bit operands of [4 x double], as specified by the immediate value operand. | |
#define | _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */ |
#define | _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */ |
#define | _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */ |
#define | _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */ |
#define | _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */ |
#define | _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */ |
#define | _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */ |
#define | _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */ |
#define | _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */ |
#define | _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */ |
#define | _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */ |
#define | _CMP_UNORD_S 0x13 /* Unordered (signaling) */ |
#define | _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */ |
#define | _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */ |
#define | _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unordered, non-signaling) */ |
#define | _CMP_ORD_S 0x17 /* Ordered (signaling) */ |
#define | _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */ |
#define | _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unordered, non-signaling) */ |
#define | _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */ |
#define | _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */ |
#define | _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */ |
#define | _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */ |
#define | _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ |
#define | _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ |
#define | _mm256_cmp_pd(a, b, c) |
Compares each of the corresponding double-precision values of two 256-bit vectors of [4 x double], using the operation specified by the immediate integer operand. | |
#define | _mm256_cmp_ps(a, b, c) |
Compares each of the corresponding values of two 256-bit vectors of [8 x float], using the operation specified by the immediate integer operand. | |
#define | _mm256_extract_epi32(X, N) ((int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N))) |
Takes a [8 x i32] vector and returns the vector element value indexed by the immediate constant operand. | |
#define | _mm256_extract_epi16(X, N) |
Takes a [16 x i16] vector and returns the vector element value indexed by the immediate constant operand. | |
#define | _mm256_extract_epi8(X, N) |
Takes a [32 x i8] vector and returns the vector element value indexed by the immediate constant operand. | |
#define | _mm256_insert_epi32(X, I, N) |
Takes a [8 x i32] vector and replaces the vector element value indexed by the immediate constant operand by a new value. | |
#define | _mm256_insert_epi16(X, I, N) |
Takes a [16 x i16] vector and replaces the vector element value indexed by the immediate constant operand with a new value. | |
#define | _mm256_insert_epi8(X, I, N) |
Takes a [32 x i8] vector and replaces the vector element value indexed by the immediate constant operand with a new value. | |
#define | _mm256_insertf128_ps(V1, V2, M) |
Constructs a new 256-bit vector of [8 x float] by first duplicating a 256-bit vector of [8 x float] given in the first parameter, and then replacing either the upper or the lower 128 bits with the contents of a 128-bit vector of [4 x float] in the second parameter. | |
#define | _mm256_insertf128_pd(V1, V2, M) |
Constructs a new 256-bit vector of [4 x double] by first duplicating a 256-bit vector of [4 x double] given in the first parameter, and then replacing either the upper or the lower 128 bits with the contents of a 128-bit vector of [2 x double] in the second parameter. | |
#define | _mm256_insertf128_si256(V1, V2, M) |
Constructs a new 256-bit integer vector by first duplicating a 256-bit integer vector given in the first parameter, and then replacing either the upper or the lower 128 bits with the contents of a 128-bit integer vector in the second parameter. | |
#define | _mm256_extractf128_ps(V, M) ((__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M))) |
Extracts either the upper or the lower 128 bits from a 256-bit vector of [8 x float], as determined by the immediate integer parameter, and returns the extracted bits as a 128-bit vector of [4 x float]. | |
#define | _mm256_extractf128_pd(V, M) ((__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M))) |
Extracts either the upper or the lower 128 bits from a 256-bit vector of [4 x double], as determined by the immediate integer parameter, and returns the extracted bits as a 128-bit vector of [2 x double]. | |
#define | _mm256_extractf128_si256(V, M) ((__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M))) |
Extracts either the upper or the lower 128 bits from a 256-bit integer vector, as determined by the immediate integer parameter, and returns the extracted bits as a 128-bit integer vector. | |
Typedefs | |
typedef double __v4df | __attribute__((__vector_size__(32))) |
Functions | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_add_pd (__m256d __a, __m256d __b) |
Adds two 256-bit vectors of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_add_ps (__m256 __a, __m256 __b) |
Adds two 256-bit vectors of [8 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_sub_pd (__m256d __a, __m256d __b) |
Subtracts two 256-bit vectors of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_sub_ps (__m256 __a, __m256 __b) |
Subtracts two 256-bit vectors of [8 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_addsub_pd (__m256d __a, __m256d __b) |
Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_addsub_ps (__m256 __a, __m256 __b) |
Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [8 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_div_pd (__m256d __a, __m256d __b) |
Divides two 256-bit vectors of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_div_ps (__m256 __a, __m256 __b) |
Divides two 256-bit vectors of [8 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_max_pd (__m256d __a, __m256d __b) |
Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_max_ps (__m256 __a, __m256 __b) |
Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_min_pd (__m256d __a, __m256d __b) |
Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_min_ps (__m256 __a, __m256 __b) |
Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_mul_pd (__m256d __a, __m256d __b) |
Multiplies two 256-bit vectors of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_mul_ps (__m256 __a, __m256 __b) |
Multiplies two 256-bit vectors of [8 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_sqrt_pd (__m256d __a) |
Calculates the square roots of the values in a 256-bit vector of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_sqrt_ps (__m256 __a) |
Calculates the square roots of the values in a 256-bit vector of [8 x float]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_rsqrt_ps (__m256 __a) |
Calculates the reciprocal square roots of the values in a 256-bit vector of [8 x float]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_rcp_ps (__m256 __a) |
Calculates the reciprocals of the values in a 256-bit vector of [8 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_and_pd (__m256d __a, __m256d __b) |
Performs a bitwise AND of two 256-bit vectors of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_and_ps (__m256 __a, __m256 __b) |
Performs a bitwise AND of two 256-bit vectors of [8 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_andnot_pd (__m256d __a, __m256d __b) |
Performs a bitwise AND of two 256-bit vectors of [4 x double], using the one's complement of the values contained in the first source operand. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_andnot_ps (__m256 __a, __m256 __b) |
Performs a bitwise AND of two 256-bit vectors of [8 x float], using the one's complement of the values contained in the first source operand. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_or_pd (__m256d __a, __m256d __b) |
Performs a bitwise OR of two 256-bit vectors of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_or_ps (__m256 __a, __m256 __b) |
Performs a bitwise OR of two 256-bit vectors of [8 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_xor_pd (__m256d __a, __m256d __b) |
Performs a bitwise XOR of two 256-bit vectors of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_xor_ps (__m256 __a, __m256 __b) |
Performs a bitwise XOR of two 256-bit vectors of [8 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_hadd_pd (__m256d __a, __m256d __b) |
Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_hadd_ps (__m256 __a, __m256 __b) |
Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [8 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_hsub_pd (__m256d __a, __m256d __b) |
Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_hsub_ps (__m256 __a, __m256 __b) |
Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [8 x float]. | |
static __inline __m128d __DEFAULT_FN_ATTRS128 | _mm_permutevar_pd (__m128d __a, __m128i __c) |
Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector operand. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_permutevar_pd (__m256d __a, __m256i __c) |
Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector operand. | |
static __inline __m128 __DEFAULT_FN_ATTRS128 | _mm_permutevar_ps (__m128 __a, __m128i __c) |
Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vector operand. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_permutevar_ps (__m256 __a, __m256i __c) |
Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vector operand. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_blendv_pd (__m256d __a, __m256d __b, __m256d __c) |
Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the 256-bit vector operand. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_blendv_ps (__m256 __a, __m256 __b, __m256 __c) |
Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the 256-bit vector operand. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_cvtepi32_pd (__m128i __a) |
Converts a vector of [4 x i32] into a vector of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_cvtepi32_ps (__m256i __a) |
Converts a vector of [8 x i32] into a vector of [8 x float]. | |
static __inline __m128 __DEFAULT_FN_ATTRS | _mm256_cvtpd_ps (__m256d __a) |
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x float]. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_cvtps_epi32 (__m256 __a) |
Converts a vector of [8 x float] into a vector of [8 x i32]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_cvtps_pd (__m128 __a) |
Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4 x double]. | |
static __inline __m128i __DEFAULT_FN_ATTRS | _mm256_cvttpd_epi32 (__m256d __a) |
Converts a 256-bit vector of [4 x double] into four signed truncated (rounded toward zero) 32-bit integers returned in a 128-bit vector of [4 x i32]. | |
static __inline __m128i __DEFAULT_FN_ATTRS | _mm256_cvtpd_epi32 (__m256d __a) |
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x i32]. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_cvttps_epi32 (__m256 __a) |
Converts a vector of [8 x float] into eight signed truncated (rounded toward zero) 32-bit integers returned in a vector of [8 x i32]. | |
static __inline double __DEFAULT_FN_ATTRS | _mm256_cvtsd_f64 (__m256d __a) |
Returns the first element of the input vector of [4 x double]. | |
static __inline int __DEFAULT_FN_ATTRS | _mm256_cvtsi256_si32 (__m256i __a) |
Returns the first element of the input vector of [8 x i32]. | |
static __inline float __DEFAULT_FN_ATTRS | _mm256_cvtss_f32 (__m256 __a) |
Returns the first element of the input vector of [8 x float]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_movehdup_ps (__m256 __a) |
Moves and duplicates odd-indexed values from a 256-bit vector of [8 x float] to float values in a 256-bit vector of [8 x float]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_moveldup_ps (__m256 __a) |
Moves and duplicates even-indexed values from a 256-bit vector of [8 x float] to float values in a 256-bit vector of [8 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_movedup_pd (__m256d __a) |
Moves and duplicates double-precision floating point values from a 256-bit vector of [4 x double] to double-precision values in a 256-bit vector of [4 x double]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_unpackhi_pd (__m256d __a, __m256d __b) |
Unpacks the odd-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them into a 256-bit vector of [4 x double]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_unpacklo_pd (__m256d __a, __m256d __b) |
Unpacks the even-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them into a 256-bit vector of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_unpackhi_ps (__m256 __a, __m256 __b) |
Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the two 256-bit vectors of [8 x float] and interleaves them into a 256-bit vector of [8 x float]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_unpacklo_ps (__m256 __a, __m256 __b) |
Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the two 256-bit vectors of [8 x float] and interleaves them into a 256-bit vector of [8 x float]. | |
static __inline int __DEFAULT_FN_ATTRS128 | _mm_testz_pd (__m128d __a, __m128d __b) |
Given two 128-bit floating-point vectors of [2 x double], perform an element-by-element comparison of the double-precision element in the first source vector and the corresponding element in the second source vector. | |
static __inline int __DEFAULT_FN_ATTRS128 | _mm_testc_pd (__m128d __a, __m128d __b) |
Given two 128-bit floating-point vectors of [2 x double], perform an element-by-element comparison of the double-precision element in the first source vector and the corresponding element in the second source vector. | |
static __inline int __DEFAULT_FN_ATTRS128 | _mm_testnzc_pd (__m128d __a, __m128d __b) |
Given two 128-bit floating-point vectors of [2 x double], perform an element-by-element comparison of the double-precision element in the first source vector and the corresponding element in the second source vector. | |
static __inline int __DEFAULT_FN_ATTRS128 | _mm_testz_ps (__m128 __a, __m128 __b) |
Given two 128-bit floating-point vectors of [4 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector. | |
static __inline int __DEFAULT_FN_ATTRS128 | _mm_testc_ps (__m128 __a, __m128 __b) |
Given two 128-bit floating-point vectors of [4 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector. | |
static __inline int __DEFAULT_FN_ATTRS128 | _mm_testnzc_ps (__m128 __a, __m128 __b) |
Given two 128-bit floating-point vectors of [4 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector. | |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testz_pd (__m256d __a, __m256d __b) |
Given two 256-bit floating-point vectors of [4 x double], perform an element-by-element comparison of the double-precision elements in the first source vector and the corresponding elements in the second source vector. | |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testc_pd (__m256d __a, __m256d __b) |
Given two 256-bit floating-point vectors of [4 x double], perform an element-by-element comparison of the double-precision elements in the first source vector and the corresponding elements in the second source vector. | |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testnzc_pd (__m256d __a, __m256d __b) |
Given two 256-bit floating-point vectors of [4 x double], perform an element-by-element comparison of the double-precision elements in the first source vector and the corresponding elements in the second source vector. | |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testz_ps (__m256 __a, __m256 __b) |
Given two 256-bit floating-point vectors of [8 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector. | |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testc_ps (__m256 __a, __m256 __b) |
Given two 256-bit floating-point vectors of [8 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector. | |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testnzc_ps (__m256 __a, __m256 __b) |
Given two 256-bit floating-point vectors of [8 x float], perform an element-by-element comparison of the single-precision elements in the first source vector and the corresponding elements in the second source vector. | |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testz_si256 (__m256i __a, __m256i __b) |
Given two 256-bit integer vectors, perform a bit-by-bit comparison of the two source vectors. | |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testc_si256 (__m256i __a, __m256i __b) |
Given two 256-bit integer vectors, perform a bit-by-bit comparison of the two source vectors. | |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testnzc_si256 (__m256i __a, __m256i __b) |
Given two 256-bit integer vectors, perform a bit-by-bit comparison of the two source vectors. | |
static __inline int __DEFAULT_FN_ATTRS | _mm256_movemask_pd (__m256d __a) |
Extracts the sign bits of double-precision floating point elements in a 256-bit vector of [4 x double] and writes them to the lower order bits of the return value. | |
static __inline int __DEFAULT_FN_ATTRS | _mm256_movemask_ps (__m256 __a) |
Extracts the sign bits of single-precision floating point elements in a 256-bit vector of [8 x float] and writes them to the lower order bits of the return value. | |
static __inline void | __attribute__ ((__always_inline__, __nodebug__, __target__("avx"))) _mm256_zeroall(void) |
Zeroes the contents of all XMM or YMM registers. | |
static __inline __m128 __DEFAULT_FN_ATTRS128 | _mm_broadcast_ss (float const *__a) |
Loads a scalar single-precision floating point value from the specified address pointed to by __a and broadcasts it to the elements of a [4 x float] vector. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_broadcast_sd (double const *__a) |
Loads a scalar double-precision floating point value from the specified address pointed to by __a and broadcasts it to the elements of a [4 x double] vector. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_broadcast_ss (float const *__a) |
Loads a scalar single-precision floating point value from the specified address pointed to by __a and broadcasts it to the elements of a [8 x float] vector. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_broadcast_pd (__m128d const *__a) |
Loads the data from a 128-bit vector of [2 x double] from the specified address pointed to by __a and broadcasts it to 128-bit elements in a 256-bit vector of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_broadcast_ps (__m128 const *__a) |
Loads the data from a 128-bit vector of [4 x float] from the specified address pointed to by __a and broadcasts it to 128-bit elements in a 256-bit vector of [8 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_load_pd (double const *__p) |
Loads 4 double-precision floating point values from a 32-byte aligned memory location pointed to by __p into a vector of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_load_ps (float const *__p) |
Loads 8 single-precision floating point values from a 32-byte aligned memory location pointed to by __p into a vector of [8 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_loadu_pd (double const *__p) |
Loads 4 double-precision floating point values from an unaligned memory location pointed to by __p into a vector of [4 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_loadu_ps (float const *__p) |
Loads 8 single-precision floating point values from an unaligned memory location pointed to by __p into a vector of [8 x float]. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_load_si256 (__m256i const *__p) |
Loads 256 bits of integer data from a 32-byte aligned memory location pointed to by __p into elements of a 256-bit integer vector. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_loadu_si256 (__m256i_u const *__p) |
Loads 256 bits of integer data from an unaligned memory location pointed to by __p into a 256-bit integer vector. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_lddqu_si256 (__m256i_u const *__p) |
Loads 256 bits of integer data from an unaligned memory location pointed to by __p into a 256-bit integer vector. | |
static __inline void __DEFAULT_FN_ATTRS | _mm256_store_pd (double *__p, __m256d __a) |
Stores double-precision floating point values from a 256-bit vector of [4 x double] to a 32-byte aligned memory location pointed to by __p. | |
static __inline void __DEFAULT_FN_ATTRS | _mm256_store_ps (float *__p, __m256 __a) |
Stores single-precision floating point values from a 256-bit vector of [8 x float] to a 32-byte aligned memory location pointed to by __p. | |
static __inline void __DEFAULT_FN_ATTRS | _mm256_storeu_pd (double *__p, __m256d __a) |
Stores double-precision floating point values from a 256-bit vector of [4 x double] to an unaligned memory location pointed to by __p. | |
static __inline void __DEFAULT_FN_ATTRS | _mm256_storeu_ps (float *__p, __m256 __a) |
Stores single-precision floating point values from a 256-bit vector of [8 x float] to an unaligned memory location pointed to by __p. | |
static __inline void __DEFAULT_FN_ATTRS | _mm256_store_si256 (__m256i *__p, __m256i __a) |
Stores integer values from a 256-bit integer vector to a 32-byte aligned memory location pointed to by __p. | |
static __inline void __DEFAULT_FN_ATTRS | _mm256_storeu_si256 (__m256i_u *__p, __m256i __a) |
Stores integer values from a 256-bit integer vector to an unaligned memory location pointed to by __p. | |
static __inline __m128d __DEFAULT_FN_ATTRS128 | _mm_maskload_pd (double const *__p, __m128i __m) |
Conditionally loads double-precision floating point elements from a memory location pointed to by __p into a 128-bit vector of [2 x double], depending on the mask bits associated with each data element. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_maskload_pd (double const *__p, __m256i __m) |
Conditionally loads double-precision floating point elements from a memory location pointed to by __p into a 256-bit vector of [4 x double], depending on the mask bits associated with each data element. | |
static __inline __m128 __DEFAULT_FN_ATTRS128 | _mm_maskload_ps (float const *__p, __m128i __m) |
Conditionally loads single-precision floating point elements from a memory location pointed to by __p into a 128-bit vector of [4 x float], depending on the mask bits associated with each data element. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_maskload_ps (float const *__p, __m256i __m) |
Conditionally loads single-precision floating point elements from a memory location pointed to by __p into a 256-bit vector of [8 x float], depending on the mask bits associated with each data element. | |
static __inline void __DEFAULT_FN_ATTRS | _mm256_maskstore_ps (float *__p, __m256i __m, __m256 __a) |
Moves single-precision floating point values from a 256-bit vector of [8 x float] to a memory location pointed to by __p, according to the specified mask. | |
static __inline void __DEFAULT_FN_ATTRS128 | _mm_maskstore_pd (double *__p, __m128i __m, __m128d __a) |
Moves double-precision values from a 128-bit vector of [2 x double] to a memory location pointed to by __p, according to the specified mask. | |
static __inline void __DEFAULT_FN_ATTRS | _mm256_maskstore_pd (double *__p, __m256i __m, __m256d __a) |
Moves double-precision values from a 256-bit vector of [4 x double] to a memory location pointed to by __p, according to the specified mask. | |
static __inline void __DEFAULT_FN_ATTRS128 | _mm_maskstore_ps (float *__p, __m128i __m, __m128 __a) |
Moves single-precision floating point values from a 128-bit vector of [4 x float] to a memory location pointed to by __p, according to the specified mask. | |
static __inline void __DEFAULT_FN_ATTRS | _mm256_stream_si256 (void *__a, __m256i __b) |
Moves integer data from a 256-bit integer vector to a 32-byte aligned memory location. | |
static __inline void __DEFAULT_FN_ATTRS | _mm256_stream_pd (void *__a, __m256d __b) |
Moves double-precision values from a 256-bit vector of [4 x double] to a 32-byte aligned memory location. | |
static __inline void __DEFAULT_FN_ATTRS | _mm256_stream_ps (void *__p, __m256 __a) |
Moves single-precision floating point values from a 256-bit vector of [8 x float] to a 32-byte aligned memory location. | |
static __inline__ __m256d __DEFAULT_FN_ATTRS | _mm256_undefined_pd (void) |
Create a 256-bit vector of [4 x double] with undefined values. | |
static __inline__ __m256 __DEFAULT_FN_ATTRS | _mm256_undefined_ps (void) |
Create a 256-bit vector of [8 x float] with undefined values. | |
static __inline__ __m256i __DEFAULT_FN_ATTRS | _mm256_undefined_si256 (void) |
Create a 256-bit integer vector with undefined values. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_set_pd (double __a, double __b, double __c, double __d) |
Constructs a 256-bit floating-point vector of [4 x double] initialized with the specified double-precision floating-point values. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_set_ps (float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h) |
Constructs a 256-bit floating-point vector of [8 x float] initialized with the specified single-precision floating-point values. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set_epi32 (int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) |
Constructs a 256-bit integer vector initialized with the specified 32-bit integral values. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set_epi16 (short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, short __w03, short __w02, short __w01, short __w00) |
Constructs a 256-bit integer vector initialized with the specified 16-bit integral values. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set_epi8 (char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, char __b19, char __b18, char __b17, char __b16, char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b09, char __b08, char __b07, char __b06, char __b05, char __b04, char __b03, char __b02, char __b01, char __b00) |
Constructs a 256-bit integer vector initialized with the specified 8-bit integral values. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set_epi64x (long long __a, long long __b, long long __c, long long __d) |
Constructs a 256-bit integer vector initialized with the specified 64-bit integral values. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_setr_pd (double __a, double __b, double __c, double __d) |
Constructs a 256-bit floating-point vector of [4 x double], initialized in reverse order with the specified double-precision floating-point values. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_setr_ps (float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h) |
Constructs a 256-bit floating-point vector of [8 x float], initialized in reverse order with the specified single-precision float-point values. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_setr_epi32 (int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) |
Constructs a 256-bit integer vector, initialized in reverse order with the specified 32-bit integral values. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_setr_epi16 (short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, short __w03, short __w02, short __w01, short __w00) |
Constructs a 256-bit integer vector, initialized in reverse order with the specified 16-bit integral values. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_setr_epi8 (char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, char __b19, char __b18, char __b17, char __b16, char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b09, char __b08, char __b07, char __b06, char __b05, char __b04, char __b03, char __b02, char __b01, char __b00) |
Constructs a 256-bit integer vector, initialized in reverse order with the specified 8-bit integral values. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_setr_epi64x (long long __a, long long __b, long long __c, long long __d) |
Constructs a 256-bit integer vector, initialized in reverse order with the specified 64-bit integral values. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_set1_pd (double __w) |
Constructs a 256-bit floating-point vector of [4 x double], with each of the four double-precision floating-point vector elements set to the specified double-precision floating-point value. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_set1_ps (float __w) |
Constructs a 256-bit floating-point vector of [8 x float], with each of the eight single-precision floating-point vector elements set to the specified single-precision floating-point value. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set1_epi32 (int __i) |
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements set to the specified 32-bit integral value. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set1_epi16 (short __w) |
Constructs a 256-bit integer vector of [16 x i16], with each of the 16-bit integral vector elements set to the specified 16-bit integral value. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set1_epi8 (char __b) |
Constructs a 256-bit integer vector of [32 x i8], with each of the 8-bit integral vector elements set to the specified 8-bit integral value. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set1_epi64x (long long __q) |
Constructs a 256-bit integer vector of [4 x i64], with each of the 64-bit integral vector elements set to the specified 64-bit integral value. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_setzero_pd (void) |
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to zero. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_setzero_ps (void) |
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zero. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_setzero_si256 (void) |
Constructs a 256-bit integer vector initialized to zero. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_castpd_ps (__m256d __a) |
Casts a 256-bit floating-point vector of [4 x double] into a 256-bit floating-point vector of [8 x float]. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_castpd_si256 (__m256d __a) |
Casts a 256-bit floating-point vector of [4 x double] into a 256-bit integer vector. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_castps_pd (__m256 __a) |
Casts a 256-bit floating-point vector of [8 x float] into a 256-bit floating-point vector of [4 x double]. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_castps_si256 (__m256 __a) |
Casts a 256-bit floating-point vector of [8 x float] into a 256-bit integer vector. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_castsi256_ps (__m256i __a) |
Casts a 256-bit integer vector into a 256-bit floating-point vector of [8 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_castsi256_pd (__m256i __a) |
Casts a 256-bit integer vector into a 256-bit floating-point vector of [4 x double]. | |
static __inline __m128d __DEFAULT_FN_ATTRS | _mm256_castpd256_pd128 (__m256d __a) |
Returns the lower 128 bits of a 256-bit floating-point vector of [4 x double] as a 128-bit floating-point vector of [2 x double]. | |
static __inline __m128 __DEFAULT_FN_ATTRS | _mm256_castps256_ps128 (__m256 __a) |
Returns the lower 128 bits of a 256-bit floating-point vector of [8 x float] as a 128-bit floating-point vector of [4 x float]. | |
static __inline __m128i __DEFAULT_FN_ATTRS | _mm256_castsi256_si128 (__m256i __a) |
Truncates a 256-bit integer vector into a 128-bit integer vector. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_castpd128_pd256 (__m128d __a) |
Constructs a 256-bit floating-point vector of [4 x double] from a 128-bit floating-point vector of [2 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_castps128_ps256 (__m128 __a) |
Constructs a 256-bit floating-point vector of [8 x float] from a 128-bit floating-point vector of [4 x float]. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_castsi128_si256 (__m128i __a) |
Constructs a 256-bit integer vector from a 128-bit integer vector. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_zextpd128_pd256 (__m128d __a) |
Constructs a 256-bit floating-point vector of [4 x double] from a 128-bit floating-point vector of [2 x double]. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_zextps128_ps256 (__m128 __a) |
Constructs a 256-bit floating-point vector of [8 x float] from a 128-bit floating-point vector of [4 x float]. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_zextsi128_si256 (__m128i __a) |
Constructs a 256-bit integer vector from a 128-bit integer vector. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_set_m128 (__m128 __hi, __m128 __lo) |
Constructs a 256-bit floating-point vector of [8 x float] by concatenating two 128-bit floating-point vectors of [4 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_set_m128d (__m128d __hi, __m128d __lo) |
Constructs a 256-bit floating-point vector of [4 x double] by concatenating two 128-bit floating-point vectors of [2 x double]. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set_m128i (__m128i __hi, __m128i __lo) |
Constructs a 256-bit integer vector by concatenating two 128-bit integer vectors. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_setr_m128 (__m128 __lo, __m128 __hi) |
Constructs a 256-bit floating-point vector of [8 x float] by concatenating two 128-bit floating-point vectors of [4 x float]. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_setr_m128d (__m128d __lo, __m128d __hi) |
Constructs a 256-bit floating-point vector of [4 x double] by concatenating two 128-bit floating-point vectors of [2 x double]. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_setr_m128i (__m128i __lo, __m128i __hi) |
Constructs a 256-bit integer vector by concatenating two 128-bit integer vectors. | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_loadu2_m128 (float const *__addr_hi, float const *__addr_lo) |
Loads two 128-bit floating-point vectors of [4 x float] from unaligned memory locations and constructs a 256-bit floating-point vector of [8 x float] by concatenating the two 128-bit vectors. | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_loadu2_m128d (double const *__addr_hi, double const *__addr_lo) |
Loads two 128-bit floating-point vectors of [2 x double] from unaligned memory locations and constructs a 256-bit floating-point vector of [4 x double] by concatenating the two 128-bit vectors. | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_loadu2_m128i (__m128i_u const *__addr_hi, __m128i_u const *__addr_lo) |
Loads two 128-bit integer vectors from unaligned memory locations and constructs a 256-bit integer vector by concatenating the two 128-bit vectors. | |
static __inline void __DEFAULT_FN_ATTRS | _mm256_storeu2_m128 (float *__addr_hi, float *__addr_lo, __m256 __a) |
Stores the upper and lower 128 bits of a 256-bit floating-point vector of [8 x float] into two different unaligned memory locations. | |
static __inline void __DEFAULT_FN_ATTRS | _mm256_storeu2_m128d (double *__addr_hi, double *__addr_lo, __m256d __a) |
Stores the upper and lower 128 bits of a 256-bit floating-point vector of [4 x double] into two different unaligned memory locations. | |
static __inline void __DEFAULT_FN_ATTRS | _mm256_storeu2_m128i (__m128i_u *__addr_hi, __m128i_u *__addr_lo, __m256i __a) |
Stores the upper and lower 128 bits of a 256-bit integer vector into two different unaligned memory locations. | |
#define __DEFAULT_FN_ATTRS |
Definition at line 53 of file avxintrin.h.
#define __DEFAULT_FN_ATTRS128 |
Definition at line 56 of file avxintrin.h.
#define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */ |
Definition at line 1594 of file avxintrin.h.
#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */ |
Definition at line 1586 of file avxintrin.h.
#define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */ |
Definition at line 1602 of file avxintrin.h.
#define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */ |
Definition at line 1589 of file avxintrin.h.
#define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */ |
Definition at line 1605 of file avxintrin.h.
#define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */ |
Definition at line 1607 of file avxintrin.h.
#define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */ |
Definition at line 1591 of file avxintrin.h.
#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ |
Definition at line 1608 of file avxintrin.h.
#define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */ |
Definition at line 1592 of file avxintrin.h.
#define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */ |
Definition at line 1596 of file avxintrin.h.
#define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */ |
Definition at line 1595 of file avxintrin.h.
#define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */ |
Definition at line 1590 of file avxintrin.h.
#define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */ |
Definition at line 1606 of file avxintrin.h.
#define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */ |
Definition at line 1598 of file avxintrin.h.
#define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unordered, non-signaling) */ |
Definition at line 1603 of file avxintrin.h.
#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */ |
Definition at line 1587 of file avxintrin.h.
#define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */ |
Definition at line 1604 of file avxintrin.h.
#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */ |
Definition at line 1588 of file avxintrin.h.
#define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unordered, non-signaling) */ |
Definition at line 1600 of file avxintrin.h.
#define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */ |
Definition at line 1599 of file avxintrin.h.
#define _CMP_ORD_S 0x17 /* Ordered (signaling) */ |
Definition at line 1601 of file avxintrin.h.
#define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */ |
Definition at line 1593 of file avxintrin.h.
#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ |
Definition at line 1609 of file avxintrin.h.
#define _CMP_UNORD_S 0x13 /* Unordered (signaling) */ |
Definition at line 1597 of file avxintrin.h.
#define _mm256_blend_pd | ( | V1, | |
V2, | |||
M | |||
) |
Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the immediate integer operand.
This intrinsic corresponds to the VBLENDPD
instruction.
V1 | A 256-bit vector of [4 x double]. |
V2 | A 256-bit vector of [4 x double]. |
M | An immediate integer operand, with mask bits [3:0] specifying how the values are to be copied. The position of the mask bit corresponds to the index of a copied value. When a mask bit is 0, the corresponding 64-bit element in operand V1 is copied to the same position in the destination. When a mask bit is 1, the corresponding 64-bit element in operand V2 is copied to the same position in the destination. |
Definition at line 1352 of file avxintrin.h.
#define _mm256_blend_ps | ( | V1, | |
V2, | |||
M | |||
) |
Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the immediate integer operand.
This intrinsic corresponds to the VBLENDPS
instruction.
V1 | A 256-bit vector of [8 x float]. |
V2 | A 256-bit vector of [8 x float]. |
M | An immediate integer operand, with mask bits [7:0] specifying how the values are to be copied. The position of the mask bit corresponds to the index of a copied value. When a mask bit is 0, the corresponding 32-bit element in operand V1 is copied to the same position in the destination. When a mask bit is 1, the corresponding 32-bit element in operand V2 is copied to the same position in the destination. |
Definition at line 1380 of file avxintrin.h.
#define _mm256_ceil_pd | ( | V | ) | _mm256_round_pd((V), _MM_FROUND_CEIL) |
Rounds up the values stored in a 256-bit vector of [4 x double].
The source values are rounded up to integer values and returned as 64-bit double-precision floating-point values.
This intrinsic corresponds to the VROUNDPD
instruction.
V | A 256-bit vector of [4 x double]. |
Definition at line 474 of file avxintrin.h.
#define _mm256_ceil_ps | ( | V | ) | _mm256_round_ps((V), _MM_FROUND_CEIL) |
Rounds up the values stored in a 256-bit vector of [8 x float].
The source values are rounded up to integer values and returned as floating-point values.
This intrinsic corresponds to the VROUNDPS
instruction.
V | A 256-bit vector of [8 x float]. |
Definition at line 509 of file avxintrin.h.
Compares each of the corresponding double-precision values of two 256-bit vectors of [4 x double], using the operation specified by the immediate integer operand.
Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. If either value in a comparison is NaN, comparisons that are ordered return false, and comparisons that are unordered return true.
This intrinsic corresponds to the VCMPPD
instruction.
a | A 256-bit vector of [4 x double]. |
b | A 256-bit vector of [4 x double]. |
c | An immediate integer operand, with bits [4:0] specifying which comparison operation to use: 0x00: Equal (ordered, non-signaling) 0x01: Less-than (ordered, signaling) 0x02: Less-than-or-equal (ordered, signaling) 0x03: Unordered (non-signaling) 0x04: Not-equal (unordered, non-signaling) 0x05: Not-less-than (unordered, signaling) 0x06: Not-less-than-or-equal (unordered, signaling) 0x07: Ordered (non-signaling) 0x08: Equal (unordered, non-signaling) 0x09: Not-greater-than-or-equal (unordered, signaling) 0x0A: Not-greater-than (unordered, signaling) 0x0B: False (ordered, non-signaling) 0x0C: Not-equal (ordered, non-signaling) 0x0D: Greater-than-or-equal (ordered, signaling) 0x0E: Greater-than (ordered, signaling) 0x0F: True (unordered, non-signaling) 0x10: Equal (ordered, signaling) 0x11: Less-than (ordered, non-signaling) 0x12: Less-than-or-equal (ordered, non-signaling) 0x13: Unordered (signaling) 0x14: Not-equal (unordered, signaling) 0x15: Not-less-than (unordered, non-signaling) 0x16: Not-less-than-or-equal (unordered, non-signaling) 0x17: Ordered (signaling) 0x18: Equal (unordered, signaling) 0x19: Not-greater-than-or-equal (unordered, non-signaling) 0x1A: Not-greater-than (unordered, non-signaling) 0x1B: False (ordered, signaling) 0x1C: Not-equal (ordered, signaling) 0x1D: Greater-than-or-equal (ordered, non-signaling) 0x1E: Greater-than (ordered, non-signaling) 0x1F: True (unordered, signaling) |
Definition at line 1785 of file avxintrin.h.
Compares each of the corresponding values of two 256-bit vectors of [8 x float], using the operation specified by the immediate integer operand.
Each comparison returns 0x0 for false, 0xFFFFFFFF for true. If either value in a comparison is NaN, comparisons that are ordered return false, and comparisons that are unordered return true.
This intrinsic corresponds to the VCMPPS
instruction.
a | A 256-bit vector of [8 x float]. |
b | A 256-bit vector of [8 x float]. |
c | An immediate integer operand, with bits [4:0] specifying which comparison operation to use: 0x00: Equal (ordered, non-signaling) 0x01: Less-than (ordered, signaling) 0x02: Less-than-or-equal (ordered, signaling) 0x03: Unordered (non-signaling) 0x04: Not-equal (unordered, non-signaling) 0x05: Not-less-than (unordered, signaling) 0x06: Not-less-than-or-equal (unordered, signaling) 0x07: Ordered (non-signaling) 0x08: Equal (unordered, non-signaling) 0x09: Not-greater-than-or-equal (unordered, signaling) 0x0A: Not-greater-than (unordered, signaling) 0x0B: False (ordered, non-signaling) 0x0C: Not-equal (ordered, non-signaling) 0x0D: Greater-than-or-equal (ordered, signaling) 0x0E: Greater-than (ordered, signaling) 0x0F: True (unordered, non-signaling) 0x10: Equal (ordered, signaling) 0x11: Less-than (ordered, non-signaling) 0x12: Less-than-or-equal (ordered, non-signaling) 0x13: Unordered (signaling) 0x14: Not-equal (unordered, signaling) 0x15: Not-less-than (unordered, non-signaling) 0x16: Not-less-than-or-equal (unordered, non-signaling) 0x17: Ordered (signaling) 0x18: Equal (unordered, signaling) 0x19: Not-greater-than-or-equal (unordered, non-signaling) 0x1A: Not-greater-than (unordered, non-signaling) 0x1B: False (ordered, signaling) 0x1C: Not-equal (ordered, signaling) 0x1D: Greater-than-or-equal (ordered, non-signaling) 0x1E: Greater-than (ordered, non-signaling) 0x1F: True (unordered, signaling) |
Definition at line 1845 of file avxintrin.h.
#define _mm256_dp_ps | ( | V1, | |
V2, | |||
M | |||
) |
Computes two dot products in parallel, using the lower and upper halves of two [8 x float] vectors as input to the two computations, and returning the two dot products in the lower and upper halves of the [8 x float] result.
The immediate integer operand controls which input elements will contribute to the dot product, and where the final results are returned. In general, for each dot product, the four corresponding elements of the input vectors are multiplied; the first two and second two products are summed, then the two sums are added to form the final result.
This intrinsic corresponds to the VDPPS
instruction.
V1 | A vector of [8 x float] values, treated as two [4 x float] vectors. |
V2 | A vector of [8 x float] values, treated as two [4 x float] vectors. |
M | An immediate integer argument. Bits [7:4] determine which elements of the input vectors are used, with bit [4] corresponding to the lowest element and bit [7] corresponding to the highest element of each [4 x float] subvector. If a bit is set, the corresponding elements from the two input vectors are used as an input for dot product; otherwise that input is treated as zero. Bits [3:0] determine which elements of the result will receive a copy of the final dot product, with bit [0] corresponding to the lowest element and bit [3] corresponding to the highest element of each [4 x float] subvector. If a bit is set, the dot product is returned in the corresponding element; otherwise that element is set to zero. The bitmask is applied in the same way to each of the two parallel dot product computations. |
Definition at line 1478 of file avxintrin.h.
#define _mm256_extract_epi16 | ( | X, | |
N | |||
) |
Takes a [16 x i16] vector and returns the vector element value indexed by the immediate constant operand.
This intrinsic corresponds to the VEXTRACTF128+COMPOSITE
instruction.
X | A 256-bit integer vector of [16 x i16]. |
N | An immediate integer operand with bits [3:0] determining which vector element is extracted and returned. |
Definition at line 2008 of file avxintrin.h.
#define _mm256_extract_epi32 | ( | X, | |
N | |||
) | ((int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N))) |
Takes a [8 x i32] vector and returns the vector element value indexed by the immediate constant operand.
This intrinsic corresponds to the VEXTRACTF128+COMPOSITE
instruction.
X | A 256-bit vector of [8 x i32]. |
N | An immediate integer operand with bits [2:0] determining which vector element is extracted and returned. |
Definition at line 1986 of file avxintrin.h.
#define _mm256_extract_epi8 | ( | X, | |
N | |||
) |
Takes a [32 x i8] vector and returns the vector element value indexed by the immediate constant operand.
This intrinsic corresponds to the VEXTRACTF128+COMPOSITE
instruction.
X | A 256-bit integer vector of [32 x i8]. |
N | An immediate integer operand with bits [4:0] determining which vector element is extracted and returned. |
Definition at line 2031 of file avxintrin.h.
#define _mm256_extractf128_pd | ( | V, | |
M | |||
) | ((__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M))) |
Extracts either the upper or the lower 128 bits from a 256-bit vector of [4 x double], as determined by the immediate integer parameter, and returns the extracted bits as a 128-bit vector of [2 x double].
This intrinsic corresponds to the VEXTRACTF128
instruction.
V | A 256-bit vector of [4 x double]. |
M | An immediate integer. The least significant bit determines which bits are extracted from the first parameter: If bit [0] of M is 0, bits [127:0] of V are copied to the result. If bit [0] of M is 1, bits [255:128] of V are copied to the result. |
Definition at line 4800 of file avxintrin.h.
#define _mm256_extractf128_ps | ( | V, | |
M | |||
) | ((__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M))) |
Extracts either the upper or the lower 128 bits from a 256-bit vector of [8 x float], as determined by the immediate integer parameter, and returns the extracted bits as a 128-bit vector of [4 x float].
This intrinsic corresponds to the VEXTRACTF128
instruction.
V | A 256-bit vector of [8 x float]. |
M | An immediate integer. The least significant bit determines which bits are extracted from the first parameter: If bit [0] of M is 0, bits [127:0] of V are copied to the result. If bit [0] of M is 1, bits [255:128] of V are copied to the result. |
Definition at line 4776 of file avxintrin.h.
#define _mm256_extractf128_si256 | ( | V, | |
M | |||
) | ((__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M))) |
Extracts either the upper or the lower 128 bits from a 256-bit integer vector, as determined by the immediate integer parameter, and returns the extracted bits as a 128-bit integer vector.
This intrinsic corresponds to the VEXTRACTF128
instruction.
V | A 256-bit integer vector. |
M | An immediate integer. The least significant bit determines which bits are extracted from the first parameter: If bit [0] of M is 0, bits [127:0] of V are copied to the result. If bit [0] of M is 1, bits [255:128] of V are copied to the result. |
Definition at line 4824 of file avxintrin.h.
#define _mm256_floor_pd | ( | V | ) | _mm256_round_pd((V), _MM_FROUND_FLOOR) |
Rounds down the values stored in a 256-bit vector of [4 x double].
The source values are rounded down to integer values and returned as 64-bit double-precision floating-point values.
This intrinsic corresponds to the VROUNDPD
instruction.
V | A 256-bit vector of [4 x double]. |
Definition at line 492 of file avxintrin.h.
#define _mm256_floor_ps | ( | V | ) | _mm256_round_ps((V), _MM_FROUND_FLOOR) |
Rounds down the values stored in a 256-bit vector of [8 x float].
The source values are rounded down to integer values and returned as floating-point values.
This intrinsic corresponds to the VROUNDPS
instruction.
V | A 256-bit vector of [8 x float]. |
Definition at line 526 of file avxintrin.h.
#define _mm256_insert_epi16 | ( | X, | |
I, | |||
N | |||
) |
Takes a [16 x i16] vector and replaces the vector element value indexed by the immediate constant operand with a new value.
Returns the modified vector.
This intrinsic corresponds to the VINSERTF128+COMPOSITE
instruction.
X | A vector of [16 x i16] to be used by the insert operation. |
I | An i16 integer value. The replacement value for the insert operation. |
N | An immediate integer specifying the index of the vector element to be replaced. |
Definition at line 2108 of file avxintrin.h.
#define _mm256_insert_epi32 | ( | X, | |
I, | |||
N | |||
) |
Takes a [8 x i32] vector and replaces the vector element value indexed by the immediate constant operand by a new value.
Returns the modified vector.
This intrinsic corresponds to the VINSERTF128+COMPOSITE
instruction.
X | A vector of [8 x i32] to be used by the insert operation. |
I | An integer value. The replacement value for the insert operation. |
N | An immediate integer specifying the index of the vector element to be replaced. |
Definition at line 2081 of file avxintrin.h.
#define _mm256_insert_epi8 | ( | X, | |
I, | |||
N | |||
) |
Takes a [32 x i8] vector and replaces the vector element value indexed by the immediate constant operand with a new value.
Returns the modified vector.
This intrinsic corresponds to the VINSERTF128+COMPOSITE
instruction.
X | A vector of [32 x i8] to be used by the insert operation. |
I | An i8 integer value. The replacement value for the insert operation. |
N | An immediate integer specifying the index of the vector element to be replaced. |
Definition at line 2134 of file avxintrin.h.
#define _mm256_insertf128_pd | ( | V1, | |
V2, | |||
M | |||
) |
Constructs a new 256-bit vector of [4 x double] by first duplicating a 256-bit vector of [4 x double] given in the first parameter, and then replacing either the upper or the lower 128 bits with the contents of a 128-bit vector of [2 x double] in the second parameter.
The immediate integer parameter determines between the upper or the lower 128 bits.
This intrinsic corresponds to the VINSERTF128
instruction.
V1 | A 256-bit vector of [4 x double]. This vector is copied to the result first, and then either the upper or the lower 128 bits of the result will be replaced by the contents of V2. |
V2 | A 128-bit vector of [2 x double]. The contents of this parameter are written to either the upper or the lower 128 bits of the result depending on the value of parameter M. |
M | An immediate integer. The least significant bit determines how the values from the two parameters are interleaved: If bit [0] of M is 0, V2 are copied to bits [127:0] of the result, and bits [255:128] of V1 are copied to bits [255:128] of the result. If bit [0] of M is 1, V2 are copied to bits [255:128] of the result, and bits [127:0] of V1 are copied to bits [127:0] of the result. |
Definition at line 4708 of file avxintrin.h.
#define _mm256_insertf128_ps | ( | V1, | |
V2, | |||
M | |||
) |
Constructs a new 256-bit vector of [8 x float] by first duplicating a 256-bit vector of [8 x float] given in the first parameter, and then replacing either the upper or the lower 128 bits with the contents of a 128-bit vector of [4 x float] in the second parameter.
The immediate integer parameter determines between the upper or the lower 128 bits.
This intrinsic corresponds to the VINSERTF128
instruction.
V1 | A 256-bit vector of [8 x float]. This vector is copied to the result first, and then either the upper or the lower 128 bits of the result will be replaced by the contents of V2. |
V2 | A 128-bit vector of [4 x float]. The contents of this parameter are written to either the upper or the lower 128 bits of the result depending on the value of parameter M. |
M | An immediate integer. The least significant bit determines how the values from the two parameters are interleaved: If bit [0] of M is 0, V2 are copied to bits [127:0] of the result, and bits [255:128] of V1 are copied to bits [255:128] of the result. If bit [0] of M is 1, V2 are copied to bits [255:128] of the result, and bits [127:0] of V1 are copied to bits [127:0] of the result. |
Definition at line 4670 of file avxintrin.h.
#define _mm256_insertf128_si256 | ( | V1, | |
V2, | |||
M | |||
) |
Constructs a new 256-bit integer vector by first duplicating a 256-bit integer vector given in the first parameter, and then replacing either the upper or the lower 128 bits with the contents of a 128-bit integer vector in the second parameter.
The immediate integer parameter determines between the upper or the lower 128 bits.
This intrinsic corresponds to the VINSERTF128
instruction.
V1 | A 256-bit integer vector. This vector is copied to the result first, and then either the upper or the lower 128 bits of the result will be replaced by the contents of V2. |
V2 | A 128-bit integer vector. The contents of this parameter are written to either the upper or the lower 128 bits of the result depending on the value of parameter M. |
M | An immediate integer. The least significant bit determines how the values from the two parameters are interleaved: If bit [0] of M is 0, V2 are copied to bits [127:0] of the result, and bits [255:128] of V1 are copied to bits [255:128] of the result. If bit [0] of M is 1, V2 are copied to bits [255:128] of the result, and bits [127:0] of V1 are copied to bits [127:0] of the result. |
Definition at line 4746 of file avxintrin.h.
#define _mm256_permute2f128_pd | ( | V1, | |
V2, | |||
M | |||
) |
Permutes 128-bit data values stored in two 256-bit vectors of [4 x double], as specified by the immediate integer operand.
This intrinsic corresponds to the VPERM2F128
instruction.
V1 | A 256-bit vector of [4 x double]. |
V2 | A 256-bit vector of [4 x double. |
M | An immediate integer operand specifying how the values are to be permuted. Bits [1:0]: 00: Bits [127:0] of operand V1 are copied to bits [127:0] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [127:0] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [127:0] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [127:0] of the destination. Bits [5:4]: 00: Bits [127:0] of operand V1 are copied to bits [255:128] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [255:128] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [255:128] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [255:128] of the destination. |
Definition at line 1242 of file avxintrin.h.
#define _mm256_permute2f128_ps | ( | V1, | |
V2, | |||
M | |||
) |
Permutes 128-bit data values stored in two 256-bit vectors of [8 x float], as specified by the immediate integer operand.
This intrinsic corresponds to the VPERM2F128
instruction.
V1 | A 256-bit vector of [8 x float]. |
V2 | A 256-bit vector of [8 x float]. |
M | An immediate integer operand specifying how the values are to be permuted. Bits [1:0]: 00: Bits [127:0] of operand V1 are copied to bits [127:0] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [127:0] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [127:0] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [127:0] of the destination. Bits [5:4]: 00: Bits [127:0] of operand V1 are copied to bits [255:128] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [255:128] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [255:128] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [255:128] of the destination. |
Definition at line 1283 of file avxintrin.h.
#define _mm256_permute2f128_si256 | ( | V1, | |
V2, | |||
M | |||
) |
Permutes 128-bit data values stored in two 256-bit integer vectors, as specified by the immediate integer operand.
This intrinsic corresponds to the VPERM2F128
instruction.
V1 | A 256-bit integer vector. |
V2 | A 256-bit integer vector. |
M | An immediate integer operand specifying how the values are to be copied. Bits [1:0]: 00: Bits [127:0] of operand V1 are copied to bits [127:0] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [127:0] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [127:0] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [127:0] of the destination. Bits [5:4]: 00: Bits [127:0] of operand V1 are copied to bits [255:128] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [255:128] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [255:128] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [255:128] of the destination. |
Definition at line 1323 of file avxintrin.h.
#define _mm256_permute_pd | ( | A, | |
C | |||
) | ((__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C))) |
Copies the values in a 256-bit vector of [4 x double] as specified by the immediate integer operand.
This intrinsic corresponds to the VPERMILPD
instruction.
A | A 256-bit vector of [4 x double]. |
C | An immediate integer operand specifying how the values are to be copied. Bit [0]: 0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector. 1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector. Bit [1]: 0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector. 1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector. Bit [2]: 0: Bits [191:128] of the source are copied to bits [191:128] of the returned vector. 1: Bits [255:192] of the source are copied to bits [191:128] of the returned vector. Bit [3]: 0: Bits [191:128] of the source are copied to bits [255:192] of the returned vector. 1: Bits [255:192] of the source are copied to bits [255:192] of the returned vector. |
Definition at line 1054 of file avxintrin.h.
#define _mm256_permute_ps | ( | A, | |
C | |||
) | ((__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C))) |
Copies the values in a 256-bit vector of [8 x float] as specified by the immediate integer operand.
This intrinsic corresponds to the VPERMILPS
instruction.
A | A 256-bit vector of [8 x float]. |
C | An immediate integer operand specifying how the values are to be copied. Bits [1:0]: 00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector. 01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector. 10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector. 11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector. Bits [3:2]: 00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector. 01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector. 10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector. 11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector. Bits [5:4]: 00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector. 01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector. 10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector. 11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector. Bits [7:6]: 00: Bits [31:0] of the source are copied to bits [127:96] of the returned vector. 01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector. 10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector. 11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector. Bits [1:0]: 00: Bits [159:128] of the source are copied to bits [159:128] of the returned vector. 01: Bits [191:160] of the source are copied to bits [159:128] of the returned vector. 10: Bits [223:192] of the source are copied to bits [159:128] of the returned vector. 11: Bits [255:224] of the source are copied to bits [159:128] of the returned vector. Bits [3:2]: 00: Bits [159:128] of the source are copied to bits [191:160] of the returned vector. 01: Bits [191:160] of the source are copied to bits [191:160] of the returned vector. 10: Bits [223:192] of the source are copied to bits [191:160] of the returned vector. 11: Bits [255:224] of the source are copied to bits [191:160] of the returned vector. Bits [5:4]: 00: Bits [159:128] of the source are copied to bits [223:192] of the returned vector. 01: Bits [191:160] of the source are copied to bits [223:192] of the returned vector. 10: Bits [223:192] of the source are copied to bits [223:192] of the returned vector. 11: Bits [255:224] of the source are copied to bits [223:192] of the returned vector. Bits [7:6]: 00: Bits [159:128] of the source are copied to bits [255:224] of the returned vector. 01: Bits [191:160] of the source are copied to bits [255:224] of the returned vector. 10: Bits [223:192] of the source are copied to bits [255:224] of the returned vector. 11: Bits [255:224] of the source are copied to bits [255:224] of the returned vector. |
Definition at line 1202 of file avxintrin.h.
Rounds the values in a 256-bit vector of [4 x double] as specified by the byte operand.
The source values are rounded to integer values and returned as 64-bit double-precision floating-point values.
This intrinsic corresponds to the VROUNDPD
instruction.
V | A 256-bit vector of [4 x double]. |
M | An integer value that specifies the rounding operation. Bits [7:4] are reserved. Bit [3] is a precision exception value: 0: A normal PE exception is used. 1: The PE field is not updated. Bit [2] is the rounding control source: 0: Use bits [1:0] of M. 1: Use the current MXCSR setting. Bits [1:0] contain the rounding control definition: 00: Nearest. 01: Downward (toward negative infinity). 10: Upward (toward positive infinity). 11: Truncated. |
Definition at line 424 of file avxintrin.h.
Rounds the values stored in a 256-bit vector of [8 x float] as specified by the byte operand.
The source values are rounded to integer values and returned as floating-point values.
This intrinsic corresponds to the VROUNDPS
instruction.
V | A 256-bit vector of [8 x float]. |
M | An integer value that specifies the rounding operation. Bits [7:4] are reserved. Bit [3] is a precision exception value: 0: A normal PE exception is used. 1: The PE field is not updated. Bit [2] is the rounding control source: 0: Use bits [1:0] of M. 1: Use the current MXCSR setting. Bits [1:0] contain the rounding control definition: 00: Nearest. 01: Downward (toward negative infinity). 10: Upward (toward positive infinity). 11: Truncated. |
Definition at line 456 of file avxintrin.h.
#define _mm256_shuffle_pd | ( | a, | |
b, | |||
mask | |||
) |
Selects four double-precision values from the 256-bit operands of [4 x double], as specified by the immediate value operand.
The selected elements from the first 256-bit operand are copied to bits [63:0] and bits [191:128] in the destination, and the selected elements from the second 256-bit operand are copied to bits [127:64] and bits [255:192] in the destination. For example, if bits [3:0] of the immediate operand contain a value of 0xF, the 256-bit destination vector would contain the following values: b[3], a[3], b[1], a[1].
This intrinsic corresponds to the VSHUFPD
instruction.
a | A 256-bit vector of [4 x double]. |
b | A 256-bit vector of [4 x double]. |
mask | An immediate value containing 8-bit values specifying which elements to copy from a and b: Bit [0]=0: Bits [63:0] are copied from a to bits [63:0] of the destination. Bit [0]=1: Bits [127:64] are copied from a to bits [63:0] of the destination. Bit [1]=0: Bits [63:0] are copied from b to bits [127:64] of the destination. Bit [1]=1: Bits [127:64] are copied from b to bits [127:64] of the destination. Bit [2]=0: Bits [191:128] are copied from a to bits [191:128] of the destination. Bit [2]=1: Bits [255:192] are copied from a to bits [191:128] of the destination. Bit [3]=0: Bits [191:128] are copied from b to bits [255:192] of the destination. Bit [3]=1: Bits [255:192] are copied from b to bits [255:192] of the destination. |
Definition at line 1581 of file avxintrin.h.
#define _mm256_shuffle_ps | ( | a, | |
b, | |||
mask | |||
) |
Selects 8 float values from the 256-bit operands of [8 x float], as specified by the immediate value operand.
The four selected elements in each operand are copied to the destination according to the bits specified in the immediate operand. The selected elements from the first 256-bit operand are copied to bits [63:0] and bits [191:128] of the destination, and the selected elements from the second 256-bit operand are copied to bits [127:64] and bits [255:192] of the destination. For example, if bits [7:0] of the immediate operand contain a value of 0xFF, the 256-bit destination vector would contain the following values: b[7], b[7], a[7], a[7], b[3], b[3], a[3], a[3].
This intrinsic corresponds to the VSHUFPS
instruction.
a | A 256-bit vector of [8 x float]. The four selected elements in this operand are copied to bits [63:0] and bits [191:128] in the destination, according to the bits specified in the immediate operand. |
b | A 256-bit vector of [8 x float]. The four selected elements in this operand are copied to bits [127:64] and bits [255:192] in the destination, according to the bits specified in the immediate operand. |
mask | An immediate value containing an 8-bit value specifying which elements to copy from a and b . Bits [3:0] specify the values copied from operand a. Bits [7:4] specify the values copied from operand b. The destinations within the 256-bit destination are assigned values as follows, according to the bit value assignments described below: Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the destination. Bits [3:2] are used to assign values to bits [63:32] and [191:160] in the destination. Bits [5:4] are used to assign values to bits [95:64] and [223:192] in the destination. Bits [7:6] are used to assign values to bits [127:96] and [255:224] in the destination. Bit value assignments: 00: Bits [31:0] and [159:128] are copied from the selected operand. 01: Bits [63:32] and [191:160] are copied from the selected operand. 10: Bits [95:64] and [223:192] are copied from the selected operand. 11: Bits [127:96] and [255:224] are copied from the selected operand. Note: To generate a mask, you can use the _MM_SHUFFLE macro. _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form [b6, b4, b2, b0] . |
Definition at line 1535 of file avxintrin.h.
#define _mm_permute_pd | ( | A, | |
C | |||
) | ((__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C))) |
Copies the values in a 128-bit vector of [2 x double] as specified by the immediate integer operand.
This intrinsic corresponds to the VPERMILPD
instruction.
A | A 128-bit vector of [2 x double]. |
C | An immediate integer operand specifying how the values are to be copied. Bit [0]: 0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector. 1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector. Bit [1]: 0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector. 1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector. |
Definition at line 1014 of file avxintrin.h.
#define _mm_permute_ps | ( | A, | |
C | |||
) | ((__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C))) |
Copies the values in a 128-bit vector of [4 x float] as specified by the immediate integer operand.
This intrinsic corresponds to the VPERMILPS
instruction.
A | A 128-bit vector of [4 x float]. |
C | An immediate integer operand specifying how the values are to be copied. Bits [1:0]: 00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector. 01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector. 10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector. 11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector. Bits [3:2]: 00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector. 01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector. 10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector. 11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector. Bits [5:4]: 00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector. 01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector. 10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector. 11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector. Bits [7:6]: 00: Bits [31:0] of the source are copied to bits [127:96] of the returned vector. 01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector. 10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector. 11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector. |
Definition at line 1110 of file avxintrin.h.
typedef long long __m256i_u __attribute__((__vector_size__(32), __aligned__(1))) |
Definition at line 17 of file avxintrin.h.
|
static |
Zeroes the contents of all XMM or YMM registers.
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
This intrinsic corresponds to the VZEROALL
instruction.
This intrinsic corresponds to the VZEROUPPER
instruction.
Definition at line 3005 of file avxintrin.h.
|
static |
Adds two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VADDPD
instruction.
__a | A 256-bit vector of [4 x double] containing one of the source operands. |
__b | A 256-bit vector of [4 x double] containing one of the source operands. |
Definition at line 74 of file avxintrin.h.
Referenced by _mm256_mask_add_pd(), and _mm256_maskz_add_pd().
|
static |
Adds two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VADDPS
instruction.
__a | A 256-bit vector of [8 x float] containing one of the source operands. |
__b | A 256-bit vector of [8 x float] containing one of the source operands. |
Definition at line 92 of file avxintrin.h.
Referenced by _mm256_mask_add_ps(), and _mm256_maskz_add_ps().
|
static |
Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VADDSUBPD
instruction.
__a | A 256-bit vector of [4 x double] containing the left source operand. |
__b | A 256-bit vector of [4 x double] containing the right source operand. |
Definition at line 147 of file avxintrin.h.
|
static |
Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VADDSUBPS
instruction.
__a | A 256-bit vector of [8 x float] containing the left source operand. |
__b | A 256-bit vector of [8 x float] containing the right source operand. |
Definition at line 166 of file avxintrin.h.
|
static |
Performs a bitwise AND of two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VANDPD
instruction.
__a | A 256-bit vector of [4 x double] containing one of the source operands. |
__b | A 256-bit vector of [4 x double] containing one of the source operands. |
Definition at line 542 of file avxintrin.h.
Referenced by _mm256_mask_and_pd(), and _mm256_maskz_and_pd().
|
static |
Performs a bitwise AND of two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VANDPS
instruction.
__a | A 256-bit vector of [8 x float] containing one of the source operands. |
__b | A 256-bit vector of [8 x float] containing one of the source operands. |
Definition at line 560 of file avxintrin.h.
Referenced by _mm256_mask_and_ps(), and _mm256_maskz_and_ps().
|
static |
Performs a bitwise AND of two 256-bit vectors of [4 x double], using the one's complement of the values contained in the first source operand.
This intrinsic corresponds to the VANDNPD
instruction.
__a | A 256-bit vector of [4 x double] containing the left source operand. The one's complement of this value is used in the bitwise AND. |
__b | A 256-bit vector of [4 x double] containing the right source operand. |
Definition at line 581 of file avxintrin.h.
Referenced by _mm256_mask_andnot_pd(), and _mm256_maskz_andnot_pd().
|
static |
Performs a bitwise AND of two 256-bit vectors of [8 x float], using the one's complement of the values contained in the first source operand.
This intrinsic corresponds to the VANDNPS
instruction.
__a | A 256-bit vector of [8 x float] containing the left source operand. The one's complement of this value is used in the bitwise AND. |
__b | A 256-bit vector of [8 x float] containing the right source operand. |
Definition at line 602 of file avxintrin.h.
Referenced by _mm256_mask_andnot_ps(), and _mm256_maskz_andnot_ps().
|
static |
Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the 256-bit vector operand.
This intrinsic corresponds to the VBLENDVPD
instruction.
__a | A 256-bit vector of [4 x double]. |
__b | A 256-bit vector of [4 x double]. |
__c | A 256-bit vector operand, with mask bits 255, 191, 127, and 63 specifying how the values are to be copied. The position of the mask bit corresponds to the most significant bit of a copied value. When a mask bit is 0, the corresponding 64-bit element in operand __a is copied to the same position in the destination. When a mask bit is 1, the corresponding 64-bit element in operand __b is copied to the same position in the destination. |
Definition at line 1406 of file avxintrin.h.
|
static |
Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the 256-bit vector operand.
This intrinsic corresponds to the VBLENDVPS
instruction.
__a | A 256-bit vector of [8 x float]. |
__b | A 256-bit vector of [8 x float]. |
__c | A 256-bit vector operand, with mask bits 255, 223, 191, 159, 127, 95, 63, and 31 specifying how the values are to be copied. The position of the mask bit corresponds to the most significant bit of a copied value. When a mask bit is 0, the corresponding 32-bit element in operand __a is copied to the same position in the destination. When a mask bit is 1, the corresponding 32-bit element in operand __b is copied to the same position in the destination. |
Definition at line 1434 of file avxintrin.h.
|
static |
Loads the data from a 128-bit vector of [2 x double] from the specified address pointed to by __a and broadcasts it to 128-bit elements in a 256-bit vector of [4 x double].
This intrinsic corresponds to the VBROADCASTF128
instruction.
__a | The 128-bit vector of [2 x double] to be broadcast. |
Definition at line 3102 of file avxintrin.h.
References __a, __b, and _mm_loadu_pd().
|
static |
Loads the data from a 128-bit vector of [4 x float] from the specified address pointed to by __a and broadcasts it to 128-bit elements in a 256-bit vector of [8 x float].
This intrinsic corresponds to the VBROADCASTF128
instruction.
__a | The 128-bit vector of [4 x float] to be broadcast. |
Definition at line 3122 of file avxintrin.h.
References __a, __b, and _mm_loadu_ps().
|
static |
Loads a scalar double-precision floating point value from the specified address pointed to by __a and broadcasts it to the elements of a [4 x double] vector.
This intrinsic corresponds to the VBROADCASTSD
instruction.
__a | The double-precision floating point value to be broadcast. |
Definition at line 3058 of file avxintrin.h.
References __a.
|
static |
Loads a scalar single-precision floating point value from the specified address pointed to by __a and broadcasts it to the elements of a [8 x float] vector.
This intrinsic corresponds to the VBROADCASTSS
instruction.
__a | The single-precision floating point value to be broadcast. |
Definition at line 3080 of file avxintrin.h.
References __a.
|
static |
Constructs a 256-bit floating-point vector of [4 x double] from a 128-bit floating-point vector of [2 x double].
The lower 128 bits contain the value of the source vector. The contents of the upper 128 bits are undefined.
This intrinsic has no corresponding instruction.
__a | A 128-bit vector of [2 x double]. |
Definition at line 4527 of file avxintrin.h.
References __a.
|
static |
Returns the lower 128 bits of a 256-bit floating-point vector of [4 x double] as a 128-bit floating-point vector of [2 x double].
This intrinsic has no corresponding instruction.
__a | A 256-bit floating-point vector of [4 x double]. |
Definition at line 4473 of file avxintrin.h.
References __a.
Referenced by _mm256_storeu2_m128d().
|
static |
Casts a 256-bit floating-point vector of [4 x double] into a 256-bit floating-point vector of [8 x float].
This intrinsic has no corresponding instruction.
__a | A 256-bit floating-point vector of [4 x double]. |
Definition at line 4371 of file avxintrin.h.
References __a.
|
static |
Casts a 256-bit floating-point vector of [4 x double] into a 256-bit integer vector.
This intrinsic has no corresponding instruction.
__a | A 256-bit floating-point vector of [4 x double]. |
Definition at line 4388 of file avxintrin.h.
References __a.
|
static |
Constructs a 256-bit floating-point vector of [8 x float] from a 128-bit floating-point vector of [4 x float].
The lower 128 bits contain the value of the source vector. The contents of the upper 128 bits are undefined.
This intrinsic has no corresponding instruction.
__a | A 128-bit vector of [4 x float]. |
Definition at line 4549 of file avxintrin.h.
References __a.
|
static |
Returns the lower 128 bits of a 256-bit floating-point vector of [8 x float] as a 128-bit floating-point vector of [4 x float].
This intrinsic has no corresponding instruction.
__a | A 256-bit floating-point vector of [8 x float]. |
Definition at line 4490 of file avxintrin.h.
References __a.
Referenced by _mm256_storeu2_m128().
|
static |
Casts a 256-bit floating-point vector of [8 x float] into a 256-bit floating-point vector of [4 x double].
This intrinsic has no corresponding instruction.
__a | A 256-bit floating-point vector of [8 x float]. |
Definition at line 4405 of file avxintrin.h.
References __a.
|
static |
Casts a 256-bit floating-point vector of [8 x float] into a 256-bit integer vector.
This intrinsic has no corresponding instruction.
__a | A 256-bit floating-point vector of [8 x float]. |
Definition at line 4422 of file avxintrin.h.
References __a.
|
static |
Constructs a 256-bit integer vector from a 128-bit integer vector.
The lower 128 bits contain the value of the source vector. The contents of the upper 128 bits are undefined.
This intrinsic has no corresponding instruction.
__a | A 128-bit integer vector. |
Definition at line 4570 of file avxintrin.h.
References __a.
|
static |
Casts a 256-bit integer vector into a 256-bit floating-point vector of [4 x double].
This intrinsic has no corresponding instruction.
__a | A 256-bit integer vector. |
Definition at line 4456 of file avxintrin.h.
References __a.
|
static |
Casts a 256-bit integer vector into a 256-bit floating-point vector of [8 x float].
This intrinsic has no corresponding instruction.
__a | A 256-bit integer vector. |
Definition at line 4439 of file avxintrin.h.
References __a.
|
static |
Truncates a 256-bit integer vector into a 128-bit integer vector.
This intrinsic has no corresponding instruction.
__a | A 256-bit integer vector. |
Definition at line 4506 of file avxintrin.h.
References __a.
Referenced by _mm256_storeu2_m128i().
|
static |
Converts a vector of [4 x i32] into a vector of [4 x double].
This intrinsic corresponds to the VCVTDQ2PD
instruction.
__a | A 128-bit integer vector of [4 x i32]. |
Definition at line 2177 of file avxintrin.h.
References __a.
Referenced by _mm256_mask_cvtepi32_pd(), and _mm256_maskz_cvtepi32_pd().
|
static |
Converts a vector of [8 x i32] into a vector of [8 x float].
This intrinsic corresponds to the VCVTDQ2PS
instruction.
__a | A 256-bit integer vector. |
Definition at line 2192 of file avxintrin.h.
References __a.
Referenced by _mm256_mask_cvtepi32_ps(), and _mm256_maskz_cvtepi32_ps().
|
static |
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x i32].
If a converted value does not fit in a 32-bit integer, raises a floating-point invalid exception. If the exception is masked, returns the most negative integer.
This intrinsic corresponds to the VCVTPD2DQ
instruction.
__a | A 256-bit vector of [4 x double]. |
Definition at line 2284 of file avxintrin.h.
References __a.
Referenced by _mm256_mask_cvtpd_epi32(), and _mm256_maskz_cvtpd_epi32().
|
static |
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x float].
This intrinsic corresponds to the VCVTPD2PS
instruction.
__a | A 256-bit vector of [4 x double]. |
Definition at line 2208 of file avxintrin.h.
References __a.
Referenced by _mm256_mask_cvtpd_ps(), and _mm256_maskz_cvtpd_ps().
|
static |
Converts a vector of [8 x float] into a vector of [8 x i32].
If a converted value does not fit in a 32-bit integer, raises a floating-point invalid exception. If the exception is masked, returns the most negative integer.
This intrinsic corresponds to the VCVTPS2DQ
instruction.
__a | A 256-bit vector of [8 x float]. |
Definition at line 2227 of file avxintrin.h.
References __a.
Referenced by _mm256_mask_cvtps_epi32(), and _mm256_maskz_cvtps_epi32().
|
static |
Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4 x double].
This intrinsic corresponds to the VCVTPS2PD
instruction.
__a | A 128-bit vector of [4 x float]. |
Definition at line 2243 of file avxintrin.h.
References __a.
Referenced by _mm256_mask_cvtps_pd(), and _mm256_maskz_cvtps_pd().
|
static |
Returns the first element of the input vector of [4 x double].
This intrinsic is a utility function and does not correspond to a specific instruction.
__a | A 256-bit vector of [4 x double]. |
Definition at line 2320 of file avxintrin.h.
References __a.
|
static |
Returns the first element of the input vector of [8 x i32].
This intrinsic is a utility function and does not correspond to a specific instruction.
__a | A 256-bit vector of [8 x i32]. |
Definition at line 2336 of file avxintrin.h.
|
static |
Returns the first element of the input vector of [8 x float].
This intrinsic is a utility function and does not correspond to a specific instruction.
__a | A 256-bit vector of [8 x float]. |
Definition at line 2353 of file avxintrin.h.
References __a.
|
static |
Converts a 256-bit vector of [4 x double] into four signed truncated (rounded toward zero) 32-bit integers returned in a 128-bit vector of [4 x i32].
If a converted value does not fit in a 32-bit integer, raises a floating-point invalid exception. If the exception is masked, returns the most negative integer.
This intrinsic corresponds to the VCVTTPD2DQ
instruction.
__a | A 256-bit vector of [4 x double]. |
Definition at line 2264 of file avxintrin.h.
References __a.
Referenced by _mm256_mask_cvttpd_epi32(), and _mm256_maskz_cvttpd_epi32().
|
static |
Converts a vector of [8 x float] into eight signed truncated (rounded toward zero) 32-bit integers returned in a vector of [8 x i32].
If a converted value does not fit in a 32-bit integer, raises a floating-point invalid exception. If the exception is masked, returns the most negative integer.
This intrinsic corresponds to the VCVTTPS2DQ
instruction.
__a | A 256-bit vector of [8 x float]. |
Definition at line 2304 of file avxintrin.h.
References __a.
Referenced by _mm256_mask_cvttps_epi32(), and _mm256_maskz_cvttps_epi32().
|
static |
Divides two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VDIVPD
instruction.
__a | A 256-bit vector of [4 x double] containing the dividend. |
__b | A 256-bit vector of [4 x double] containing the divisor. |
Definition at line 184 of file avxintrin.h.
Referenced by _mm256_mask_div_pd(), and _mm256_maskz_div_pd().
|
static |
Divides two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VDIVPS
instruction.
__a | A 256-bit vector of [8 x float] containing the dividend. |
__b | A 256-bit vector of [8 x float] containing the divisor. |
Definition at line 202 of file avxintrin.h.
Referenced by _mm256_mask_div_ps(), and _mm256_maskz_div_ps().
|
static |
Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VHADDPD
instruction.
__a | A 256-bit vector of [4 x double] containing one of the source operands. The horizontal sums of the values are returned in the even-indexed elements of a vector of [4 x double]. |
__b | A 256-bit vector of [4 x double] containing one of the source operands. The horizontal sums of the values are returned in the odd-indexed elements of a vector of [4 x double]. |
Definition at line 698 of file avxintrin.h.
|
static |
Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VHADDPS
instruction.
__a | A 256-bit vector of [8 x float] containing one of the source operands. The horizontal sums of the values are returned in the elements with index 0, 1, 4, 5 of a vector of [8 x float]. |
__b | A 256-bit vector of [8 x float] containing one of the source operands. The horizontal sums of the values are returned in the elements with index 2, 3, 6, 7 of a vector of [8 x float]. |
Definition at line 721 of file avxintrin.h.
|
static |
Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VHSUBPD
instruction.
__a | A 256-bit vector of [4 x double] containing one of the source operands. The horizontal differences between the values are returned in the even-indexed elements of a vector of [4 x double]. |
__b | A 256-bit vector of [4 x double] containing one of the source operands. The horizontal differences between the values are returned in the odd-indexed elements of a vector of [4 x double]. |
Definition at line 744 of file avxintrin.h.
|
static |
Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VHSUBPS
instruction.
__a | A 256-bit vector of [8 x float] containing one of the source operands. The horizontal differences between the values are returned in the elements with index 0, 1, 4, 5 of a vector of [8 x float]. |
__b | A 256-bit vector of [8 x float] containing one of the source operands. The horizontal differences between the values are returned in the elements with index 2, 3, 6, 7 of a vector of [8 x float]. |
Definition at line 767 of file avxintrin.h.
|
static |
Loads 256 bits of integer data from an unaligned memory location pointed to by __p into a 256-bit integer vector.
This intrinsic may perform better than _mm256_loadu_si256
when the data crosses a cache line boundary.
This intrinsic corresponds to the VLDDQU
instruction.
__p | A pointer to a 256-bit integer vector containing integer values. |
Definition at line 3252 of file avxintrin.h.
References __p.
|
static |
Loads 4 double-precision floating point values from a 32-byte aligned memory location pointed to by __p into a vector of [4 x double].
This intrinsic corresponds to the VMOVAPD
instruction.
__p | A 32-byte aligned pointer to a memory location containing double-precision floating point values. |
Definition at line 3142 of file avxintrin.h.
References __p.
|
static |
Loads 8 single-precision floating point values from a 32-byte aligned memory location pointed to by __p into a vector of [8 x float].
This intrinsic corresponds to the VMOVAPS
instruction.
__p | A 32-byte aligned pointer to a memory location containing float values. |
Definition at line 3158 of file avxintrin.h.
References __p.
|
static |
Loads 256 bits of integer data from a 32-byte aligned memory location pointed to by __p into elements of a 256-bit integer vector.
This intrinsic corresponds to the VMOVDQA
instruction.
__p | A 32-byte aligned pointer to a 256-bit integer vector containing integer values. |
Definition at line 3215 of file avxintrin.h.
References __p.
|
static |
Loads two 128-bit floating-point vectors of [4 x float] from unaligned memory locations and constructs a 256-bit floating-point vector of [8 x float] by concatenating the two 128-bit vectors.
This intrinsic corresponds to load instructions followed by the VINSERTF128
instruction.
__addr_hi | A pointer to a 128-bit memory location containing 4 consecutive single-precision floating-point values. These values are to be copied to bits[255:128] of the result. The address of the memory location does not have to be aligned. |
__addr_lo | A pointer to a 128-bit memory location containing 4 consecutive single-precision floating-point values. These values are to be copied to bits[127:0] of the result. The address of the memory location does not have to be aligned. |
Definition at line 4979 of file avxintrin.h.
References _mm256_set_m128(), and _mm_loadu_ps().
|
static |
Loads two 128-bit floating-point vectors of [2 x double] from unaligned memory locations and constructs a 256-bit floating-point vector of [4 x double] by concatenating the two 128-bit vectors.
This intrinsic corresponds to load instructions followed by the VINSERTF128
instruction.
__addr_hi | A pointer to a 128-bit memory location containing two consecutive double-precision floating-point values. These values are to be copied to bits[255:128] of the result. The address of the memory location does not have to be aligned. |
__addr_lo | A pointer to a 128-bit memory location containing two consecutive double-precision floating-point values. These values are to be copied to bits[127:0] of the result. The address of the memory location does not have to be aligned. |
Definition at line 5006 of file avxintrin.h.
References _mm256_set_m128d(), and _mm_loadu_pd().
|
static |
Loads two 128-bit integer vectors from unaligned memory locations and constructs a 256-bit integer vector by concatenating the two 128-bit vectors.
This intrinsic corresponds to load instructions followed by the VINSERTF128
instruction.
__addr_hi | A pointer to a 128-bit memory location containing a 128-bit integer vector. This vector is to be copied to bits[255:128] of the result. The address of the memory location does not have to be aligned. |
__addr_lo | A pointer to a 128-bit memory location containing a 128-bit integer vector. This vector is to be copied to bits[127:0] of the result. The address of the memory location does not have to be aligned. |
Definition at line 5030 of file avxintrin.h.
References _mm256_set_m128i(), and _mm_loadu_si128().
|
static |
Loads 4 double-precision floating point values from an unaligned memory location pointed to by __p into a vector of [4 x double].
This intrinsic corresponds to the VMOVUPD
instruction.
__p | A pointer to a memory location containing double-precision floating point values. |
Definition at line 3175 of file avxintrin.h.
|
static |
Loads 8 single-precision floating point values from an unaligned memory location pointed to by __p into a vector of [8 x float].
This intrinsic corresponds to the VMOVUPS
instruction.
__p | A pointer to a memory location containing single-precision floating point values. |
Definition at line 3195 of file avxintrin.h.
|
static |
Loads 256 bits of integer data from an unaligned memory location pointed to by __p into a 256-bit integer vector.
This intrinsic corresponds to the VMOVDQU
instruction.
__p | A pointer to a 256-bit integer vector containing integer values. |
Definition at line 3231 of file avxintrin.h.
|
static |
Conditionally loads double-precision floating point elements from a memory location pointed to by __p into a 256-bit vector of [4 x double], depending on the mask bits associated with each data element.
This intrinsic corresponds to the VMASKMOVPD
instruction.
__p | A pointer to a memory location that contains the double-precision floating point values. |
__m | A 256-bit integer vector of [4 x quadword] containing the mask. The most significant bit of each quadword element represents the mask bits. If a mask bit is zero, the corresponding value in the memory location is not loaded and the corresponding field in the return value is set to zero. |
Definition at line 3418 of file avxintrin.h.
References __p.
|
static |
Conditionally loads single-precision floating point elements from a memory location pointed to by __p into a 256-bit vector of [8 x float], depending on the mask bits associated with each data element.
This intrinsic corresponds to the VMASKMOVPS
instruction.
__p | A pointer to a memory location that contains the single-precision floating point values. |
__m | A 256-bit integer vector of [8 x dword] containing the mask. The most significant bit of each dword element represents the mask bits. If a mask bit is zero, the corresponding value in the memory location is not loaded and the corresponding field in the return value is set to zero. |
Definition at line 3467 of file avxintrin.h.
References __p.
|
static |
Moves double-precision values from a 256-bit vector of [4 x double] to a memory location pointed to by __p, according to the specified mask.
This intrinsic corresponds to the VMASKMOVPD
instruction.
__p | A pointer to a memory location that will receive the float values. |
__m | A 256-bit integer vector of [4 x quadword] containing the mask. The most significant bit of each quadword element in the mask vector represents the mask bits. If a mask bit is zero, the corresponding value from vector __a is not stored and the corresponding field in the memory location pointed to by __p is not changed. |
__a | A 256-bit vector of [4 x double] containing the values to be stored. |
Definition at line 3540 of file avxintrin.h.
|
static |
Moves single-precision floating point values from a 256-bit vector of [8 x float] to a memory location pointed to by __p, according to the specified mask.
This intrinsic corresponds to the VMASKMOVPS
instruction.
__p | A pointer to a memory location that will receive the float values. |
__m | A 256-bit integer vector of [8 x dword] containing the mask. The most significant bit of each dword element in the mask vector represents the mask bits. If a mask bit is zero, the corresponding value from vector __a is not stored and the corresponding field in the memory location pointed to by __p is not changed. |
__a | A 256-bit vector of [8 x float] containing the values to be stored. |
Definition at line 3492 of file avxintrin.h.
|
static |
Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values.
If either value in a comparison is NaN, returns the value from __b.
This intrinsic corresponds to the VMAXPD
instruction.
__a | A 256-bit vector of [4 x double] containing one of the operands. |
__b | A 256-bit vector of [4 x double] containing one of the operands. |
Definition at line 223 of file avxintrin.h.
Referenced by _mm256_mask_max_pd(), and _mm256_maskz_max_pd().
|
static |
Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values.
If either value in a comparison is NaN, returns the value from __b.
This intrinsic corresponds to the VMAXPS
instruction.
__a | A 256-bit vector of [8 x float] containing one of the operands. |
__b | A 256-bit vector of [8 x float] containing one of the operands. |
Definition at line 244 of file avxintrin.h.
Referenced by _mm256_mask_max_ps(), and _mm256_maskz_max_ps().
|
static |
Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values.
If either value in a comparison is NaN, returns the value from __b.
This intrinsic corresponds to the VMINPD
instruction.
__a | A 256-bit vector of [4 x double] containing one of the operands. |
__b | A 256-bit vector of [4 x double] containing one of the operands. |
Definition at line 265 of file avxintrin.h.
Referenced by _mm256_mask_min_pd(), and _mm256_maskz_min_pd().
|
static |
Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values.
If either value in a comparison is NaN, returns the value from __b.
This intrinsic corresponds to the VMINPS
instruction.
__a | A 256-bit vector of [8 x float] containing one of the operands. |
__b | A 256-bit vector of [8 x float] containing one of the operands. |
Definition at line 286 of file avxintrin.h.
Referenced by _mm256_mask_min_ps(), and _mm256_maskz_min_ps().
|
static |
Moves and duplicates double-precision floating point values from a 256-bit vector of [4 x double] to double-precision values in a 256-bit vector of [4 x double].
This intrinsic corresponds to the VMOVDDUP
instruction.
__a | A 256-bit vector of [4 x double]. Bits [63:0] of __a are written to bits [127:64] and [63:0] of the return value. Bits [191:128] of __a are written to bits [255:192] and [191:128] of the return value. |
Definition at line 2426 of file avxintrin.h.
References __a.
Referenced by _mm256_mask_movedup_pd(), and _mm256_maskz_movedup_pd().
|
static |
Moves and duplicates odd-indexed values from a 256-bit vector of [8 x float] to float values in a 256-bit vector of [8 x float].
This intrinsic corresponds to the VMOVSHDUP
instruction.
__a | A 256-bit vector of [8 x float]. Bits [255:224] of __a are written to bits [255:224] and [223:192] of the return value. Bits [191:160] of __a are written to bits [191:160] and [159:128] of the return value. Bits [127:96] of __a are written to bits [127:96] and [95:64] of the return value. Bits [63:32] of __a are written to bits [63:32] and [31:0] of the return value. |
Definition at line 2379 of file avxintrin.h.
References __a.
Referenced by _mm256_mask_movehdup_ps(), and _mm256_maskz_movehdup_ps().
|
static |
Moves and duplicates even-indexed values from a 256-bit vector of [8 x float] to float values in a 256-bit vector of [8 x float].
This intrinsic corresponds to the VMOVSLDUP
instruction.
__a | A 256-bit vector of [8 x float]. Bits [223:192] of __a are written to bits [255:224] and [223:192] of the return value. Bits [159:128] of __a are written to bits [191:160] and [159:128] of the return value. Bits [95:64] of __a are written to bits [127:96] and [95:64] of the return value. Bits [31:0] of __a are written to bits [63:32] and [31:0] of the return value. |
Definition at line 2404 of file avxintrin.h.
References __a.
Referenced by _mm256_mask_moveldup_ps(), and _mm256_maskz_moveldup_ps().
|
static |
Extracts the sign bits of double-precision floating point elements in a 256-bit vector of [4 x double] and writes them to the lower order bits of the return value.
This intrinsic corresponds to the VMOVMSKPD
instruction.
__a | A 256-bit vector of [4 x double] containing the double-precision floating point values with sign bits to be extracted. |
Definition at line 2976 of file avxintrin.h.
References __a.
|
static |
Extracts the sign bits of single-precision floating point elements in a 256-bit vector of [8 x float] and writes them to the lower order bits of the return value.
This intrinsic corresponds to the VMOVMSKPS
instruction.
__a | A 256-bit vector of [8 x float] containing the single-precision floating point values with sign bits to be extracted. |
Definition at line 2994 of file avxintrin.h.
References __a.
|
static |
Multiplies two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VMULPD
instruction.
__a | A 256-bit vector of [4 x double] containing one of the operands. |
__b | A 256-bit vector of [4 x double] containing one of the operands. |
Definition at line 304 of file avxintrin.h.
Referenced by _mm256_mask_mul_pd(), and _mm256_maskz_mul_pd().
|
static |
Multiplies two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VMULPS
instruction.
__a | A 256-bit vector of [8 x float] containing one of the operands. |
__b | A 256-bit vector of [8 x float] containing one of the operands. |
Definition at line 322 of file avxintrin.h.
Referenced by _mm256_mask_mul_ps(), and _mm256_maskz_mul_ps().
|
static |
Performs a bitwise OR of two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VORPD
instruction.
__a | A 256-bit vector of [4 x double] containing one of the source operands. |
__b | A 256-bit vector of [4 x double] containing one of the source operands. |
Definition at line 620 of file avxintrin.h.
Referenced by _mm256_mask_or_pd(), and _mm256_maskz_or_pd().
|
static |
Performs a bitwise OR of two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VORPS
instruction.
__a | A 256-bit vector of [8 x float] containing one of the source operands. |
__b | A 256-bit vector of [8 x float] containing one of the source operands. |
Definition at line 638 of file avxintrin.h.
Referenced by _mm256_mask_or_ps(), and _mm256_maskz_or_ps().
|
static |
Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector operand.
This intrinsic corresponds to the VPERMILPD
instruction.
__a | A 256-bit vector of [4 x double]. |
__c | A 256-bit integer vector operand specifying how the values are to be copied. Bit [1]: 0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector. 1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector. Bit [65]: 0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector. 1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector. Bit [129]: 0: Bits [191:128] of the source are copied to bits [191:128] of the returned vector. 1: Bits [255:192] of the source are copied to bits [191:128] of the returned vector. Bit [193]: 0: Bits [191:128] of the source are copied to bits [255:192] of the returned vector. 1: Bits [255:192] of the source are copied to bits [255:192] of the returned vector. |
Definition at line 836 of file avxintrin.h.
Referenced by _mm256_mask_permutevar_pd(), and _mm256_maskz_permutevar_pd().
|
static |
Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vector operand.
This intrinsic corresponds to the VPERMILPS
instruction.
__a | A 256-bit vector of [8 x float]. |
__c | A 256-bit integer vector operand specifying how the values are to be copied. Bits [1:0]: 00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector. 01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector. 10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector. 11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector. Bits [33:32]: 00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector. 01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector. 10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector. 11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector. Bits [65:64]: 00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector. 01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector. 10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector. 11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector. Bits [97:96]: 00: Bits [31:0] of the source are copied to bits [127:96] of the returned vector. 01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector. 10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector. 11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector. Bits [129:128]: 00: Bits [159:128] of the source are copied to bits [159:128] of the returned vector. 01: Bits [191:160] of the source are copied to bits [159:128] of the returned vector. 10: Bits [223:192] of the source are copied to bits [159:128] of the returned vector. 11: Bits [255:224] of the source are copied to bits [159:128] of the returned vector. Bits [161:160]: 00: Bits [159:128] of the source are copied to bits [191:160] of the returned vector. 01: Bits [191:160] of the source are copied to bits [191:160] of the returned vector. 10: Bits [223:192] of the source are copied to bits [191:160] of the returned vector. 11: Bits [255:224] of the source are copied to bits [191:160] of the returned vector. Bits [193:192]: 00: Bits [159:128] of the source are copied to bits [223:192] of the returned vector. 01: Bits [191:160] of the source are copied to bits [223:192] of the returned vector. 10: Bits [223:192] of the source are copied to bits [223:192] of the returned vector. 11: Bits [255:224] of the source are copied to bits [223:192] of the returned vector. Bits [225:224]: 00: Bits [159:128] of the source are copied to bits [255:224] of the returned vector. 01: Bits [191:160] of the source are copied to bits [255:224] of the returned vector. 10: Bits [223:192] of the source are copied to bits [255:224] of the returned vector. 11: Bits [255:224] of the source are copied to bits [255:224] of the returned vector. |
Definition at line 982 of file avxintrin.h.
Referenced by _mm256_mask_permutevar_ps(), and _mm256_maskz_permutevar_ps().
|
static |
Calculates the reciprocals of the values in a 256-bit vector of [8 x float].
This intrinsic corresponds to the VRCPPS
instruction.
__a | A 256-bit vector of [8 x float]. |
Definition at line 390 of file avxintrin.h.
References __a.
|
static |
Calculates the reciprocal square roots of the values in a 256-bit vector of [8 x float].
This intrinsic corresponds to the VRSQRTPS
instruction.
__a | A 256-bit vector of [8 x float]. |
Definition at line 373 of file avxintrin.h.
References __a.
|
static |
Constructs a 256-bit integer vector of [16 x i16], with each of the 16-bit integral vector elements set to the specified 16-bit integral value.
This intrinsic corresponds to the VPSHUFB+VINSERTF128
instruction.
__w | A 16-bit integral value used to initialize each vector element of the result. |
Definition at line 4272 of file avxintrin.h.
References _mm256_set_epi16().
Referenced by _mm256_mask_reduce_and_epi16(), _mm256_mask_reduce_max_epi16(), _mm256_mask_reduce_min_epi16(), _mm256_mask_reduce_min_epu16(), _mm256_mask_reduce_mul_epi16(), _mm256_mask_set1_epi16(), and _mm256_maskz_set1_epi16().
|
static |
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements set to the specified 32-bit integral value.
This intrinsic corresponds to the VPERMILPS+VINSERTF128
instruction.
__i | A 32-bit integral value used to initialize each vector element of the result. |
Definition at line 4254 of file avxintrin.h.
References _mm256_set_epi32().
Referenced by _mm256_broadcastmw_epi32(), _mm256_mask_set1_epi32(), and _mm256_maskz_set1_epi32().
|
static |
Constructs a 256-bit integer vector of [4 x i64], with each of the 64-bit integral vector elements set to the specified 64-bit integral value.
This intrinsic corresponds to the VMOVDDUP+VINSERTF128
instruction.
__q | A 64-bit integral value used to initialize each vector element of the result. |
Definition at line 4311 of file avxintrin.h.
References _mm256_set_epi64x().
Referenced by _mm256_broadcastmb_epi64(), _mm256_mask_set1_epi64(), and _mm256_maskz_set1_epi64().
|
static |
Constructs a 256-bit integer vector of [32 x i8], with each of the 8-bit integral vector elements set to the specified 8-bit integral value.
This intrinsic corresponds to the VPSHUFB+VINSERTF128
instruction.
__b | An 8-bit integral value used to initialize each vector element of the result. |
Definition at line 4290 of file avxintrin.h.
References __b, and _mm256_set_epi8().
Referenced by _mm256_mask_reduce_and_epi8(), _mm256_mask_reduce_max_epi8(), _mm256_mask_reduce_min_epi8(), _mm256_mask_reduce_min_epu8(), _mm256_mask_reduce_mul_epi8(), _mm256_mask_set1_epi8(), and _mm256_maskz_set1_epi8().
|
static |
Constructs a 256-bit floating-point vector of [4 x double], with each of the four double-precision floating-point vector elements set to the specified double-precision floating-point value.
This intrinsic corresponds to the VMOVDDUP+VINSERTF128
instruction.
__w | A double-precision floating-point value used to initialize each vector element of the result. |
Definition at line 4216 of file avxintrin.h.
References _mm256_set_pd().
|
static |
Constructs a 256-bit floating-point vector of [8 x float], with each of the eight single-precision floating-point vector elements set to the specified single-precision floating-point value.
This intrinsic corresponds to the VPERMILPS+VINSERTF128
instruction.
__w | A single-precision floating-point value used to initialize each vector element of the result. |
Definition at line 4235 of file avxintrin.h.
References _mm256_set_ps().
|
static |
Constructs a 256-bit integer vector initialized with the specified 16-bit integral values.
This intrinsic is a utility function and does not correspond to a specific instruction.
__w15 | A 16-bit integral value used to initialize bits [255:240] of the result. |
__w14 | A 16-bit integral value used to initialize bits [239:224] of the result. |
__w13 | A 16-bit integral value used to initialize bits [223:208] of the result. |
__w12 | A 16-bit integral value used to initialize bits [207:192] of the result. |
__w11 | A 16-bit integral value used to initialize bits [191:176] of the result. |
__w10 | A 16-bit integral value used to initialize bits [175:160] of the result. |
__w09 | A 16-bit integral value used to initialize bits [159:144] of the result. |
__w08 | A 16-bit integral value used to initialize bits [143:128] of the result. |
__w07 | A 16-bit integral value used to initialize bits [127:112] of the result. |
__w06 | A 16-bit integral value used to initialize bits [111:96] of the result. |
__w05 | A 16-bit integral value used to initialize bits [95:80] of the result. |
__w04 | A 16-bit integral value used to initialize bits [79:64] of the result. |
__w03 | A 16-bit integral value used to initialize bits [63:48] of the result. |
__w02 | A 16-bit integral value used to initialize bits [47:32] of the result. |
__w01 | A 16-bit integral value used to initialize bits [31:16] of the result. |
__w00 | A 16-bit integral value used to initialize bits [15:0] of the result. |
Definition at line 3812 of file avxintrin.h.
Referenced by _mm256_set1_epi16(), and _mm256_setr_epi16().
|
static |
Constructs a 256-bit integer vector initialized with the specified 32-bit integral values.
This intrinsic is a utility function and does not correspond to a specific instruction.
__i0 | A 32-bit integral value used to initialize bits [255:224] of the result. |
__i1 | A 32-bit integral value used to initialize bits [223:192] of the result. |
__i2 | A 32-bit integral value used to initialize bits [191:160] of the result. |
__i3 | A 32-bit integral value used to initialize bits [159:128] of the result. |
__i4 | A 32-bit integral value used to initialize bits [127:96] of the result. |
__i5 | A 32-bit integral value used to initialize bits [95:64] of the result. |
__i6 | A 32-bit integral value used to initialize bits [63:32] of the result. |
__i7 | A 32-bit integral value used to initialize bits [31:0] of the result. |
Definition at line 3764 of file avxintrin.h.
Referenced by _mm256_set1_epi32(), and _mm256_setr_epi32().
|
static |
Constructs a 256-bit integer vector initialized with the specified 64-bit integral values.
This intrinsic corresponds to the VPUNPCKLQDQ+VINSERTF128
instruction.
__a | A 64-bit integral value used to initialize bits [255:192] of the result. |
__b | A 64-bit integral value used to initialize bits [191:128] of the result. |
__c | A 64-bit integral value used to initialize bits [127:64] of the result. |
__d | A 64-bit integral value used to initialize bits [63:0] of the result. |
Definition at line 3930 of file avxintrin.h.
Referenced by _mm256_set1_epi64x(), and _mm256_setr_epi64x().
|
static |
Constructs a 256-bit integer vector initialized with the specified 8-bit integral values.
This intrinsic is a utility function and does not correspond to a specific instruction.
__b31 | An 8-bit integral value used to initialize bits [255:248] of the result. |
__b30 | An 8-bit integral value used to initialize bits [247:240] of the result. |
__b29 | An 8-bit integral value used to initialize bits [239:232] of the result. |
__b28 | An 8-bit integral value used to initialize bits [231:224] of the result. |
__b27 | An 8-bit integral value used to initialize bits [223:216] of the result. |
__b26 | An 8-bit integral value used to initialize bits [215:208] of the result. |
__b25 | An 8-bit integral value used to initialize bits [207:200] of the result. |
__b24 | An 8-bit integral value used to initialize bits [199:192] of the result. |
__b23 | An 8-bit integral value used to initialize bits [191:184] of the result. |
__b22 | An 8-bit integral value used to initialize bits [183:176] of the result. |
__b21 | An 8-bit integral value used to initialize bits [175:168] of the result. |
__b20 | An 8-bit integral value used to initialize bits [167:160] of the result. |
__b19 | An 8-bit integral value used to initialize bits [159:152] of the result. |
__b18 | An 8-bit integral value used to initialize bits [151:144] of the result. |
__b17 | An 8-bit integral value used to initialize bits [143:136] of the result. |
__b16 | An 8-bit integral value used to initialize bits [135:128] of the result. |
__b15 | An 8-bit integral value used to initialize bits [127:120] of the result. |
__b14 | An 8-bit integral value used to initialize bits [119:112] of the result. |
__b13 | An 8-bit integral value used to initialize bits [111:104] of the result. |
__b12 | An 8-bit integral value used to initialize bits [103:96] of the result. |
__b11 | An 8-bit integral value used to initialize bits [95:88] of the result. |
__b10 | An 8-bit integral value used to initialize bits [87:80] of the result. |
__b09 | An 8-bit integral value used to initialize bits [79:72] of the result. |
__b08 | An 8-bit integral value used to initialize bits [71:64] of the result. |
__b07 | An 8-bit integral value used to initialize bits [63:56] of the result. |
__b06 | An 8-bit integral value used to initialize bits [55:48] of the result. |
__b05 | An 8-bit integral value used to initialize bits [47:40] of the result. |
__b04 | An 8-bit integral value used to initialize bits [39:32] of the result. |
__b03 | An 8-bit integral value used to initialize bits [31:24] of the result. |
__b02 | An 8-bit integral value used to initialize bits [23:16] of the result. |
__b01 | An 8-bit integral value used to initialize bits [15:8] of the result. |
__b00 | An 8-bit integral value used to initialize bits [7:0] of the result. |
Definition at line 3895 of file avxintrin.h.
Referenced by _mm256_set1_epi8(), and _mm256_setr_epi8().
|
static |
Constructs a 256-bit floating-point vector of [8 x float] by concatenating two 128-bit floating-point vectors of [4 x float].
This intrinsic corresponds to the VINSERTF128
instruction.
__hi | A 128-bit floating-point vector of [4 x float] to be copied to the upper 128 bits of the result. |
__lo | A 128-bit floating-point vector of [4 x float] to be copied to the lower 128 bits of the result. |
Definition at line 4843 of file avxintrin.h.
Referenced by _mm256_loadu2_m128(), and _mm256_setr_m128().
|
static |
Constructs a 256-bit floating-point vector of [4 x double] by concatenating two 128-bit floating-point vectors of [2 x double].
This intrinsic corresponds to the VINSERTF128
instruction.
__hi | A 128-bit floating-point vector of [2 x double] to be copied to the upper 128 bits of the result. |
__lo | A 128-bit floating-point vector of [2 x double] to be copied to the lower 128 bits of the result. |
Definition at line 4864 of file avxintrin.h.
Referenced by _mm256_loadu2_m128d(), and _mm256_setr_m128d().
|
static |
Constructs a 256-bit integer vector by concatenating two 128-bit integer vectors.
This intrinsic corresponds to the VINSERTF128
instruction.
__hi | A 128-bit integer vector to be copied to the upper 128 bits of the result. |
__lo | A 128-bit integer vector to be copied to the lower 128 bits of the result. |
Definition at line 4884 of file avxintrin.h.
Referenced by _mm256_loadu2_m128i(), and _mm256_setr_m128i().
|
static |
Constructs a 256-bit floating-point vector of [4 x double] initialized with the specified double-precision floating-point values.
This intrinsic corresponds to the VUNPCKLPD+VINSERTF128
instruction.
__a | A double-precision floating-point value used to initialize bits [255:192] of the result. |
__b | A double-precision floating-point value used to initialize bits [191:128] of the result. |
__c | A double-precision floating-point value used to initialize bits [127:64] of the result. |
__d | A double-precision floating-point value used to initialize bits [63:0] of the result. |
Definition at line 3693 of file avxintrin.h.
Referenced by _mm256_set1_pd(), and _mm256_setr_pd().
|
static |
Constructs a 256-bit floating-point vector of [8 x float] initialized with the specified single-precision floating-point values.
This intrinsic is a utility function and does not correspond to a specific instruction.
__a | A single-precision floating-point value used to initialize bits [255:224] of the result. |
__b | A single-precision floating-point value used to initialize bits [223:192] of the result. |
__c | A single-precision floating-point value used to initialize bits [191:160] of the result. |
__d | A single-precision floating-point value used to initialize bits [159:128] of the result. |
__e | A single-precision floating-point value used to initialize bits [127:96] of the result. |
__f | A single-precision floating-point value used to initialize bits [95:64] of the result. |
__g | A single-precision floating-point value used to initialize bits [63:32] of the result. |
__h | A single-precision floating-point value used to initialize bits [31:0] of the result. |
Definition at line 3732 of file avxintrin.h.
Referenced by _mm256_set1_ps(), and _mm256_setr_ps().
|
static |
Constructs a 256-bit integer vector, initialized in reverse order with the specified 16-bit integral values.
This intrinsic is a utility function and does not correspond to a specific instruction.
__w15 | A 16-bit integral value used to initialize bits [15:0] of the result. |
__w14 | A 16-bit integral value used to initialize bits [31:16] of the result. |
__w13 | A 16-bit integral value used to initialize bits [47:32] of the result. |
__w12 | A 16-bit integral value used to initialize bits [63:48] of the result. |
__w11 | A 16-bit integral value used to initialize bits [79:64] of the result. |
__w10 | A 16-bit integral value used to initialize bits [95:80] of the result. |
__w09 | A 16-bit integral value used to initialize bits [111:96] of the result. |
__w08 | A 16-bit integral value used to initialize bits [127:112] of the result. |
__w07 | A 16-bit integral value used to initialize bits [143:128] of the result. |
__w06 | A 16-bit integral value used to initialize bits [159:144] of the result. |
__w05 | A 16-bit integral value used to initialize bits [175:160] of the result. |
__w04 | A 16-bit integral value used to initialize bits [191:176] of the result. |
__w03 | A 16-bit integral value used to initialize bits [207:192] of the result. |
__w02 | A 16-bit integral value used to initialize bits [223:208] of the result. |
__w01 | A 16-bit integral value used to initialize bits [239:224] of the result. |
__w00 | A 16-bit integral value used to initialize bits [255:240] of the result. |
Definition at line 4079 of file avxintrin.h.
References _mm256_set_epi16().
|
static |
Constructs a 256-bit integer vector, initialized in reverse order with the specified 32-bit integral values.
This intrinsic is a utility function and does not correspond to a specific instruction.
__i0 | A 32-bit integral value used to initialize bits [31:0] of the result. |
__i1 | A 32-bit integral value used to initialize bits [63:32] of the result. |
__i2 | A 32-bit integral value used to initialize bits [95:64] of the result. |
__i3 | A 32-bit integral value used to initialize bits [127:96] of the result. |
__i4 | A 32-bit integral value used to initialize bits [159:128] of the result. |
__i5 | A 32-bit integral value used to initialize bits [191:160] of the result. |
__i6 | A 32-bit integral value used to initialize bits [223:192] of the result. |
__i7 | A 32-bit integral value used to initialize bits [255:224] of the result. |
Definition at line 4031 of file avxintrin.h.
References _mm256_set_epi32().
|
static |
Constructs a 256-bit integer vector, initialized in reverse order with the specified 64-bit integral values.
This intrinsic corresponds to the VPUNPCKLQDQ+VINSERTF128
instruction.
__a | A 64-bit integral value used to initialize bits [63:0] of the result. |
__b | A 64-bit integral value used to initialize bits [127:64] of the result. |
__c | A 64-bit integral value used to initialize bits [191:128] of the result. |
__d | A 64-bit integral value used to initialize bits [255:192] of the result. |
Definition at line 4197 of file avxintrin.h.
References __a, __b, __c, and _mm256_set_epi64x().
|
static |
Constructs a 256-bit integer vector, initialized in reverse order with the specified 8-bit integral values.
This intrinsic is a utility function and does not correspond to a specific instruction.
__b31 | An 8-bit integral value used to initialize bits [7:0] of the result. |
__b30 | An 8-bit integral value used to initialize bits [15:8] of the result. |
__b29 | An 8-bit integral value used to initialize bits [23:16] of the result. |
__b28 | An 8-bit integral value used to initialize bits [31:24] of the result. |
__b27 | An 8-bit integral value used to initialize bits [39:32] of the result. |
__b26 | An 8-bit integral value used to initialize bits [47:40] of the result. |
__b25 | An 8-bit integral value used to initialize bits [55:48] of the result. |
__b24 | An 8-bit integral value used to initialize bits [63:56] of the result. |
__b23 | An 8-bit integral value used to initialize bits [71:64] of the result. |
__b22 | An 8-bit integral value used to initialize bits [79:72] of the result. |
__b21 | An 8-bit integral value used to initialize bits [87:80] of the result. |
__b20 | An 8-bit integral value used to initialize bits [95:88] of the result. |
__b19 | An 8-bit integral value used to initialize bits [103:96] of the result. |
__b18 | An 8-bit integral value used to initialize bits [111:104] of the result. |
__b17 | An 8-bit integral value used to initialize bits [119:112] of the result. |
__b16 | An 8-bit integral value used to initialize bits [127:120] of the result. |
__b15 | An 8-bit integral value used to initialize bits [135:128] of the result. |
__b14 | An 8-bit integral value used to initialize bits [143:136] of the result. |
__b13 | An 8-bit integral value used to initialize bits [151:144] of the result. |
__b12 | An 8-bit integral value used to initialize bits [159:152] of the result. |
__b11 | An 8-bit integral value used to initialize bits [167:160] of the result. |
__b10 | An 8-bit integral value used to initialize bits [175:168] of the result. |
__b09 | An 8-bit integral value used to initialize bits [183:176] of the result. |
__b08 | An 8-bit integral value used to initialize bits [191:184] of the result. |
__b07 | An 8-bit integral value used to initialize bits [199:192] of the result. |
__b06 | An 8-bit integral value used to initialize bits [207:200] of the result. |
__b05 | An 8-bit integral value used to initialize bits [215:208] of the result. |
__b04 | An 8-bit integral value used to initialize bits [223:216] of the result. |
__b03 | An 8-bit integral value used to initialize bits [231:224] of the result. |
__b02 | An 8-bit integral value used to initialize bits [239:232] of the result. |
__b01 | An 8-bit integral value used to initialize bits [247:240] of the result. |
__b00 | An 8-bit integral value used to initialize bits [255:248] of the result. |
Definition at line 4164 of file avxintrin.h.
References _mm256_set_epi8().
|
static |
Constructs a 256-bit floating-point vector of [8 x float] by concatenating two 128-bit floating-point vectors of [4 x float].
This is similar to _mm256_set_m128, but the order of the input parameters is swapped.
This intrinsic corresponds to the VINSERTF128
instruction.
__lo | A 128-bit floating-point vector of [4 x float] to be copied to the lower 128 bits of the result. |
__hi | A 128-bit floating-point vector of [4 x float] to be copied to the upper 128 bits of the result. |
Definition at line 4907 of file avxintrin.h.
References _mm256_set_m128().
|
static |
Constructs a 256-bit floating-point vector of [4 x double] by concatenating two 128-bit floating-point vectors of [2 x double].
This is similar to _mm256_set_m128d, but the order of the input parameters is swapped.
This intrinsic corresponds to the VINSERTF128
instruction.
__lo | A 128-bit floating-point vector of [2 x double] to be copied to the lower 128 bits of the result. |
__hi | A 128-bit floating-point vector of [2 x double] to be copied to the upper 128 bits of the result. |
Definition at line 4930 of file avxintrin.h.
References _mm256_set_m128d().
|
static |
Constructs a 256-bit integer vector by concatenating two 128-bit integer vectors.
This is similar to _mm256_set_m128i, but the order of the input parameters is swapped.
This intrinsic corresponds to the VINSERTF128
instruction.
__lo | A 128-bit integer vector to be copied to the lower 128 bits of the result. |
__hi | A 128-bit integer vector to be copied to the upper 128 bits of the result. |
Definition at line 4951 of file avxintrin.h.
References _mm256_set_m128i().
|
static |
Constructs a 256-bit floating-point vector of [4 x double], initialized in reverse order with the specified double-precision floating-point values.
This intrinsic corresponds to the VUNPCKLPD+VINSERTF128
instruction.
__a | A double-precision floating-point value used to initialize bits [63:0] of the result. |
__b | A double-precision floating-point value used to initialize bits [127:64] of the result. |
__c | A double-precision floating-point value used to initialize bits [191:128] of the result. |
__d | A double-precision floating-point value used to initialize bits [255:192] of the result. |
Definition at line 3959 of file avxintrin.h.
References __a, __b, __c, and _mm256_set_pd().
|
static |
Constructs a 256-bit floating-point vector of [8 x float], initialized in reverse order with the specified single-precision float-point values.
This intrinsic is a utility function and does not correspond to a specific instruction.
__a | A single-precision floating-point value used to initialize bits [31:0] of the result. |
__b | A single-precision floating-point value used to initialize bits [63:32] of the result. |
__c | A single-precision floating-point value used to initialize bits [95:64] of the result. |
__d | A single-precision floating-point value used to initialize bits [127:96] of the result. |
__e | A single-precision floating-point value used to initialize bits [159:128] of the result. |
__f | A single-precision floating-point value used to initialize bits [191:160] of the result. |
__g | A single-precision floating-point value used to initialize bits [223:192] of the result. |
__h | A single-precision floating-point value used to initialize bits [255:224] of the result. |
Definition at line 3999 of file avxintrin.h.
References __a, __b, __c, and _mm256_set_ps().
|
static |
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to zero.
This intrinsic corresponds to the VXORPS
instruction.
Definition at line 4326 of file avxintrin.h.
Referenced by _mm256_getexp_pd(), _mm256_maskz_add_pd(), _mm256_maskz_and_pd(), _mm256_maskz_andnot_pd(), _mm256_maskz_broadcast_f64x2(), _mm256_maskz_broadcastsd_pd(), _mm256_maskz_compress_pd(), _mm256_maskz_cvtepi32_pd(), _mm256_maskz_cvtepi64_pd(), _mm256_maskz_cvtepu32_pd(), _mm256_maskz_cvtepu64_pd(), _mm256_maskz_cvtps_pd(), _mm256_maskz_div_pd(), _mm256_maskz_expand_pd(), _mm256_maskz_expandloadu_pd(), _mm256_maskz_fmadd_pd(), _mm256_maskz_fmaddsub_pd(), _mm256_maskz_fmsub_pd(), _mm256_maskz_fmsubadd_pd(), _mm256_maskz_fnmadd_pd(), _mm256_maskz_fnmsub_pd(), _mm256_maskz_getexp_pd(), _mm256_maskz_load_pd(), _mm256_maskz_loadu_pd(), _mm256_maskz_max_pd(), _mm256_maskz_min_pd(), _mm256_maskz_mov_pd(), _mm256_maskz_movedup_pd(), _mm256_maskz_mul_pd(), _mm256_maskz_or_pd(), _mm256_maskz_permutevar_pd(), _mm256_maskz_permutex2var_pd(), _mm256_maskz_permutexvar_pd(), _mm256_maskz_rcp14_pd(), _mm256_maskz_rsqrt14_pd(), _mm256_maskz_scalef_pd(), _mm256_maskz_sqrt_pd(), _mm256_maskz_sub_pd(), _mm256_maskz_unpackhi_pd(), _mm256_maskz_unpacklo_pd(), _mm256_maskz_xor_pd(), _mm256_rcp14_pd(), _mm256_rsqrt14_pd(), _mm256_scalef_pd(), and _mm512_zextpd256_pd512().
|
static |
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zero.
This intrinsic corresponds to the VXORPS
instruction.
Definition at line 4340 of file avxintrin.h.
Referenced by _mm256_getexp_ps(), _mm256_maskz_add_ps(), _mm256_maskz_and_ps(), _mm256_maskz_andnot_ps(), _mm256_maskz_broadcast_f32x2(), _mm256_maskz_broadcast_f32x4(), _mm256_maskz_broadcastss_ps(), _mm256_maskz_compress_ps(), _mm256_maskz_cvtepi32_ps(), _mm256_maskz_cvtepu32_ps(), _mm256_maskz_cvtph_ps(), _mm256_maskz_div_ps(), _mm256_maskz_expand_ps(), _mm256_maskz_expandloadu_ps(), _mm256_maskz_fmadd_ps(), _mm256_maskz_fmaddsub_ps(), _mm256_maskz_fmsub_ps(), _mm256_maskz_fmsubadd_ps(), _mm256_maskz_fnmadd_ps(), _mm256_maskz_fnmsub_ps(), _mm256_maskz_getexp_ps(), _mm256_maskz_load_ps(), _mm256_maskz_loadu_ps(), _mm256_maskz_max_ps(), _mm256_maskz_min_ps(), _mm256_maskz_mov_ps(), _mm256_maskz_movehdup_ps(), _mm256_maskz_moveldup_ps(), _mm256_maskz_mul_ps(), _mm256_maskz_or_ps(), _mm256_maskz_permutevar_ps(), _mm256_maskz_permutex2var_ps(), _mm256_maskz_permutexvar_ps(), _mm256_maskz_rcp14_ps(), _mm256_maskz_rsqrt14_ps(), _mm256_maskz_scalef_ps(), _mm256_maskz_sqrt_ps(), _mm256_maskz_sub_ps(), _mm256_maskz_unpackhi_ps(), _mm256_maskz_unpacklo_ps(), _mm256_maskz_xor_ps(), _mm256_rcp14_ps(), _mm256_rsqrt14_ps(), _mm256_scalef_ps(), _mm512_cvtepi64_ps(), _mm512_cvtepu64_ps(), _mm512_cvtpd_pslo(), _mm512_mask_cvtpd_pslo(), _mm512_maskz_cvtepi64_ps(), _mm512_maskz_cvtepu64_ps(), _mm512_maskz_cvtpd_ps(), and _mm512_zextps256_ps512().
|
static |
Constructs a 256-bit integer vector initialized to zero.
This intrinsic corresponds to the VXORPS
instruction.
Definition at line 4353 of file avxintrin.h.
Referenced by _mm256_cvtpd_epi64(), _mm256_cvtpd_epu64(), _mm256_cvtps_epi64(), _mm256_cvtps_epu32(), _mm256_cvtps_epu64(), _mm256_cvttpd_epi64(), _mm256_cvttpd_epu64(), _mm256_cvttps_epi64(), _mm256_cvttps_epu32(), _mm256_cvttps_epu64(), _mm256_mask_test_epi16_mask(), _mm256_mask_test_epi32_mask(), _mm256_mask_test_epi64_mask(), _mm256_mask_test_epi8_mask(), _mm256_mask_testn_epi16_mask(), _mm256_mask_testn_epi32_mask(), _mm256_mask_testn_epi64_mask(), _mm256_mask_testn_epi8_mask(), _mm256_maskz_abs_epi16(), _mm256_maskz_abs_epi32(), _mm256_maskz_abs_epi64(), _mm256_maskz_abs_epi8(), _mm256_maskz_add_epi16(), _mm256_maskz_add_epi32(), _mm256_maskz_add_epi64(), _mm256_maskz_add_epi8(), _mm256_maskz_adds_epi16(), _mm256_maskz_adds_epi8(), _mm256_maskz_adds_epu16(), _mm256_maskz_adds_epu8(), _mm256_maskz_and_epi32(), _mm256_maskz_and_epi64(), _mm256_maskz_andnot_epi32(), _mm256_maskz_andnot_epi64(), _mm256_maskz_avg_epu16(), _mm256_maskz_avg_epu8(), _mm256_maskz_broadcast_i32x2(), _mm256_maskz_broadcast_i32x4(), _mm256_maskz_broadcast_i64x2(), _mm256_maskz_broadcastb_epi8(), _mm256_maskz_broadcastd_epi32(), _mm256_maskz_broadcastq_epi64(), _mm256_maskz_broadcastw_epi16(), _mm256_maskz_compress_epi16(), _mm256_maskz_compress_epi32(), _mm256_maskz_compress_epi64(), _mm256_maskz_compress_epi8(), _mm256_maskz_conflict_epi32(), _mm256_maskz_conflict_epi64(), _mm256_maskz_cvtepi16_epi32(), _mm256_maskz_cvtepi16_epi64(), _mm256_maskz_cvtepi32_epi64(), _mm256_maskz_cvtepi8_epi16(), _mm256_maskz_cvtepi8_epi32(), _mm256_maskz_cvtepi8_epi64(), _mm256_maskz_cvtepu16_epi32(), _mm256_maskz_cvtepu16_epi64(), _mm256_maskz_cvtepu32_epi64(), _mm256_maskz_cvtepu8_epi16(), _mm256_maskz_cvtepu8_epi32(), _mm256_maskz_cvtepu8_epi64(), _mm256_maskz_cvtpd_epi64(), _mm256_maskz_cvtpd_epu64(), _mm256_maskz_cvtps_epi32(), _mm256_maskz_cvtps_epi64(), _mm256_maskz_cvtps_epu32(), _mm256_maskz_cvtps_epu64(), _mm256_maskz_cvttpd_epi64(), _mm256_maskz_cvttpd_epu64(), _mm256_maskz_cvttps_epi32(), _mm256_maskz_cvttps_epi64(), _mm256_maskz_cvttps_epu32(), _mm256_maskz_cvttps_epu64(), _mm256_maskz_dpbusd_epi32(), _mm256_maskz_dpbusds_epi32(), _mm256_maskz_dpwssd_epi32(), _mm256_maskz_dpwssds_epi32(), _mm256_maskz_expand_epi16(), _mm256_maskz_expand_epi32(), _mm256_maskz_expand_epi64(), _mm256_maskz_expand_epi8(), _mm256_maskz_expandloadu_epi16(), _mm256_maskz_expandloadu_epi32(), _mm256_maskz_expandloadu_epi64(), _mm256_maskz_expandloadu_epi8(), _mm256_maskz_load_epi32(), _mm256_maskz_load_epi64(), _mm256_maskz_loadu_epi16(), _mm256_maskz_loadu_epi32(), _mm256_maskz_loadu_epi64(), _mm256_maskz_loadu_epi8(), _mm256_maskz_lzcnt_epi32(), _mm256_maskz_lzcnt_epi64(), _mm256_maskz_madd52hi_epu64(), _mm256_maskz_madd52lo_epu64(), _mm256_maskz_madd_epi16(), _mm256_maskz_maddubs_epi16(), _mm256_maskz_max_epi16(), _mm256_maskz_max_epi32(), _mm256_maskz_max_epi64(), _mm256_maskz_max_epi8(), _mm256_maskz_max_epu16(), _mm256_maskz_max_epu32(), _mm256_maskz_max_epu64(), _mm256_maskz_max_epu8(), _mm256_maskz_min_epi16(), _mm256_maskz_min_epi32(), _mm256_maskz_min_epi64(), _mm256_maskz_min_epi8(), _mm256_maskz_min_epu16(), _mm256_maskz_min_epu32(), _mm256_maskz_min_epu64(), _mm256_maskz_min_epu8(), _mm256_maskz_mov_epi16(), _mm256_maskz_mov_epi32(), _mm256_maskz_mov_epi64(), _mm256_maskz_mov_epi8(), _mm256_maskz_mul_epi32(), _mm256_maskz_mul_epu32(), _mm256_maskz_mulhi_epi16(), _mm256_maskz_mulhi_epu16(), _mm256_maskz_mulhrs_epi16(), _mm256_maskz_mullo_epi16(), _mm256_maskz_mullo_epi32(), _mm256_maskz_mullo_epi64(), _mm256_maskz_multishift_epi64_epi8(), _mm256_maskz_or_epi32(), _mm256_maskz_or_epi64(), _mm256_maskz_packs_epi16(), _mm256_maskz_packs_epi32(), _mm256_maskz_packus_epi16(), _mm256_maskz_packus_epi32(), _mm256_maskz_permutex2var_epi16(), _mm256_maskz_permutex2var_epi32(), _mm256_maskz_permutex2var_epi64(), _mm256_maskz_permutex2var_epi8(), _mm256_maskz_permutexvar_epi16(), _mm256_maskz_permutexvar_epi32(), _mm256_maskz_permutexvar_epi64(), _mm256_maskz_permutexvar_epi8(), _mm256_maskz_popcnt_epi16(), _mm256_maskz_popcnt_epi32(), _mm256_maskz_popcnt_epi64(), _mm256_maskz_popcnt_epi8(), _mm256_maskz_rolv_epi32(), _mm256_maskz_rolv_epi64(), _mm256_maskz_rorv_epi32(), _mm256_maskz_rorv_epi64(), _mm256_maskz_set1_epi16(), _mm256_maskz_set1_epi32(), _mm256_maskz_set1_epi64(), _mm256_maskz_set1_epi8(), _mm256_maskz_shldv_epi16(), _mm256_maskz_shldv_epi32(), _mm256_maskz_shldv_epi64(), _mm256_maskz_shrdv_epi16(), _mm256_maskz_shrdv_epi32(), _mm256_maskz_shrdv_epi64(), _mm256_maskz_shuffle_epi8(), _mm256_maskz_sll_epi16(), _mm256_maskz_sll_epi32(), _mm256_maskz_sll_epi64(), _mm256_maskz_slli_epi16(), _mm256_maskz_slli_epi32(), _mm256_maskz_slli_epi64(), _mm256_maskz_sllv_epi16(), _mm256_maskz_sllv_epi32(), _mm256_maskz_sllv_epi64(), _mm256_maskz_sra_epi16(), _mm256_maskz_sra_epi32(), _mm256_maskz_sra_epi64(), _mm256_maskz_srai_epi16(), _mm256_maskz_srai_epi32(), _mm256_maskz_srai_epi64(), _mm256_maskz_srav_epi16(), _mm256_maskz_srav_epi32(), _mm256_maskz_srav_epi64(), _mm256_maskz_srl_epi16(), _mm256_maskz_srl_epi32(), _mm256_maskz_srl_epi64(), _mm256_maskz_srli_epi16(), _mm256_maskz_srli_epi32(), _mm256_maskz_srli_epi64(), _mm256_maskz_srlv_epi16(), _mm256_maskz_srlv_epi32(), _mm256_maskz_srlv_epi64(), _mm256_maskz_sub_epi16(), _mm256_maskz_sub_epi32(), _mm256_maskz_sub_epi64(), _mm256_maskz_sub_epi8(), _mm256_maskz_subs_epi16(), _mm256_maskz_subs_epi8(), _mm256_maskz_subs_epu16(), _mm256_maskz_subs_epu8(), _mm256_maskz_unpackhi_epi16(), _mm256_maskz_unpackhi_epi32(), _mm256_maskz_unpackhi_epi64(), _mm256_maskz_unpackhi_epi8(), _mm256_maskz_unpacklo_epi16(), _mm256_maskz_unpacklo_epi32(), _mm256_maskz_unpacklo_epi64(), _mm256_maskz_unpacklo_epi8(), _mm256_maskz_xor_epi32(), _mm256_maskz_xor_epi64(), _mm256_test_epi16_mask(), _mm256_test_epi32_mask(), _mm256_test_epi64_mask(), _mm256_test_epi8_mask(), _mm256_testn_epi16_mask(), _mm256_testn_epi32_mask(), _mm256_testn_epi64_mask(), _mm256_testn_epi8_mask(), _mm512_cvtsepi16_epi8(), _mm512_cvttpd_epi32(), _mm512_cvtusepi16_epi8(), _mm512_maskz_cvtepi16_epi8(), _mm512_maskz_cvtepi32_epi16(), _mm512_maskz_cvtepi64_epi32(), _mm512_maskz_cvtpd_epi32(), _mm512_maskz_cvtpd_epu32(), _mm512_maskz_cvtsepi16_epi8(), _mm512_maskz_cvtsepi32_epi16(), _mm512_maskz_cvtsepi64_epi32(), _mm512_maskz_cvttpd_epi32(), _mm512_maskz_cvttpd_epu32(), _mm512_maskz_cvtusepi16_epi8(), _mm512_maskz_cvtusepi32_epi16(), _mm512_maskz_cvtusepi64_epi32(), and _mm512_zextsi256_si512().
|
static |
Calculates the square roots of the values in a 256-bit vector of [4 x double].
This intrinsic corresponds to the VSQRTPD
instruction.
__a | A 256-bit vector of [4 x double]. |
Definition at line 339 of file avxintrin.h.
References __a.
Referenced by _mm256_mask_sqrt_pd(), and _mm256_maskz_sqrt_pd().
|
static |
Calculates the square roots of the values in a 256-bit vector of [8 x float].
This intrinsic corresponds to the VSQRTPS
instruction.
__a | A 256-bit vector of [8 x float]. |
Definition at line 356 of file avxintrin.h.
References __a.
Referenced by _mm256_mask_sqrt_ps(), and _mm256_maskz_sqrt_ps().
|
static |
Stores double-precision floating point values from a 256-bit vector of [4 x double] to a 32-byte aligned memory location pointed to by __p.
This intrinsic corresponds to the VMOVAPD
instruction.
__p | A 32-byte aligned pointer to a memory location that will receive the double-precision floaing point values. |
__a | A 256-bit vector of [4 x double] containing the values to be moved. |
Definition at line 3272 of file avxintrin.h.
|
static |
Stores single-precision floating point values from a 256-bit vector of [8 x float] to a 32-byte aligned memory location pointed to by __p.
This intrinsic corresponds to the VMOVAPS
instruction.
__p | A 32-byte aligned pointer to a memory location that will receive the float values. |
__a | A 256-bit vector of [8 x float] containing the values to be moved. |
Definition at line 3290 of file avxintrin.h.
|
static |
Stores integer values from a 256-bit integer vector to a 32-byte aligned memory location pointed to by __p.
This intrinsic corresponds to the VMOVDQA
instruction.
__p | A 32-byte aligned pointer to a memory location that will receive the integer values. |
__a | A 256-bit integer vector containing the values to be moved. |
Definition at line 3349 of file avxintrin.h.
|
static |
Stores the upper and lower 128 bits of a 256-bit floating-point vector of [8 x float] into two different unaligned memory locations.
This intrinsic corresponds to the VEXTRACTF128
instruction and the store instructions.
__addr_hi | A pointer to a 128-bit memory location. Bits[255:128] of __a are to be copied to this memory location. The address of this memory location does not have to be aligned. |
__addr_lo | A pointer to a 128-bit memory location. Bits[127:0] of __a are to be copied to this memory location. The address of this memory location does not have to be aligned. |
__a | A 256-bit floating-point vector of [8 x float]. |
Definition at line 5055 of file avxintrin.h.
References __a, _mm256_castps256_ps128(), _mm256_extractf128_ps, and _mm_storeu_ps().
|
static |
Stores the upper and lower 128 bits of a 256-bit floating-point vector of [4 x double] into two different unaligned memory locations.
This intrinsic corresponds to the VEXTRACTF128
instruction and the store instructions.
__addr_hi | A pointer to a 128-bit memory location. Bits[255:128] of __a are to be copied to this memory location. The address of this memory location does not have to be aligned. |
__addr_lo | A pointer to a 128-bit memory location. Bits[127:0] of __a are to be copied to this memory location. The address of this memory location does not have to be aligned. |
__a | A 256-bit floating-point vector of [4 x double]. |
Definition at line 5084 of file avxintrin.h.
References __a, _mm256_castpd256_pd128(), _mm256_extractf128_pd, and _mm_storeu_pd().
|
static |
Stores the upper and lower 128 bits of a 256-bit integer vector into two different unaligned memory locations.
This intrinsic corresponds to the VEXTRACTF128
instruction and the store instructions.
__addr_hi | A pointer to a 128-bit memory location. Bits[255:128] of __a are to be copied to this memory location. The address of this memory location does not have to be aligned. |
__addr_lo | A pointer to a 128-bit memory location. Bits[127:0] of __a are to be copied to this memory location. The address of this memory location does not have to be aligned. |
__a | A 256-bit integer vector. |
Definition at line 5113 of file avxintrin.h.
References __a, _mm256_castsi256_si128(), _mm256_extractf128_si256, and _mm_storeu_si128().
|
static |
Stores double-precision floating point values from a 256-bit vector of [4 x double] to an unaligned memory location pointed to by __p.
This intrinsic corresponds to the VMOVUPD
instruction.
__p | A pointer to a memory location that will receive the double-precision floating point values. |
__a | A 256-bit vector of [4 x double] containing the values to be moved. |
Definition at line 3308 of file avxintrin.h.
|
static |
Stores single-precision floating point values from a 256-bit vector of [8 x float] to an unaligned memory location pointed to by __p.
This intrinsic corresponds to the VMOVUPS
instruction.
__p | A pointer to a memory location that will receive the float values. |
__a | A 256-bit vector of [8 x float] containing the values to be moved. |
Definition at line 3328 of file avxintrin.h.
|
static |
Stores integer values from a 256-bit integer vector to an unaligned memory location pointed to by __p.
This intrinsic corresponds to the VMOVDQU
instruction.
__p | A pointer to a memory location that will receive the integer values. |
__a | A 256-bit integer vector containing the values to be moved. |
Definition at line 3366 of file avxintrin.h.
|
static |
Moves double-precision values from a 256-bit vector of [4 x double] to a 32-byte aligned memory location.
To minimize caching, the data is flagged as non-temporal (unlikely to be used again soon).
This intrinsic corresponds to the VMOVNTPD
instruction.
__a | A pointer to a 32-byte aligned memory location that will receive the double-precision floating-point values. |
__b | A 256-bit vector of [4 x double] containing the values to be moved. |
Definition at line 3604 of file avxintrin.h.
|
static |
Moves single-precision floating point values from a 256-bit vector of [8 x float] to a 32-byte aligned memory location.
To minimize caching, the data is flagged as non-temporal (unlikely to be used again soon).
This intrinsic corresponds to the VMOVNTPS
instruction.
__p | A pointer to a 32-byte aligned memory location that will receive the single-precision floating point values. |
__a | A 256-bit vector of [8 x float] containing the values to be moved. |
Definition at line 3625 of file avxintrin.h.
|
static |
Moves integer data from a 256-bit integer vector to a 32-byte aligned memory location.
To minimize caching, the data is flagged as non-temporal (unlikely to be used again soon).
This intrinsic corresponds to the VMOVNTDQ
instruction.
__a | A pointer to a 32-byte aligned memory location that will receive the integer values. |
__b | A 256-bit integer vector containing the values to be moved. |
Definition at line 3584 of file avxintrin.h.
|
static |
Subtracts two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VSUBPD
instruction.
__a | A 256-bit vector of [4 x double] containing the minuend. |
__b | A 256-bit vector of [4 x double] containing the subtrahend. |
Definition at line 110 of file avxintrin.h.
Referenced by _mm256_mask_sub_pd(), and _mm256_maskz_sub_pd().
|
static |
Subtracts two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VSUBPS
instruction.
__a | A 256-bit vector of [8 x float] containing the minuend. |
__b | A 256-bit vector of [8 x float] containing the subtrahend. |
Definition at line 128 of file avxintrin.h.
Referenced by _mm256_mask_sub_ps(), and _mm256_maskz_sub_ps().
|
static |
Given two 256-bit floating-point vectors of [4 x double], perform an element-by-element comparison of the double-precision elements in the first source vector and the corresponding elements in the second source vector.
The EFLAGS register is updated as follows:
If there is at least one pair of double-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of double-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the CF flag.
This intrinsic corresponds to the VTESTPD
instruction.
__a | A 256-bit vector of [4 x double]. |
__b | A 256-bit vector of [4 x double]. |
Definition at line 2760 of file avxintrin.h.
|
static |
Given two 256-bit floating-point vectors of [8 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector.
The EFLAGS register is updated as follows:
If there is at least one pair of single-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of single-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the CF flag.
This intrinsic corresponds to the VTESTPS
instruction.
__a | A 256-bit vector of [8 x float]. |
__b | A 256-bit vector of [8 x float]. |
Definition at line 2848 of file avxintrin.h.
|
static |
Given two 256-bit integer vectors, perform a bit-by-bit comparison of the two source vectors.
The EFLAGS register is updated as follows:
If there is at least one pair of bits where both bits are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of bits where the bit from the first source vector is 0 and the bit from the second source vector is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the CF flag.
This intrinsic corresponds to the VPTEST
instruction.
__a | A 256-bit integer vector. |
__b | A 256-bit integer vector. |
Definition at line 2930 of file avxintrin.h.
|
static |
Given two 256-bit floating-point vectors of [4 x double], perform an element-by-element comparison of the double-precision elements in the first source vector and the corresponding elements in the second source vector.
The EFLAGS register is updated as follows:
If there is at least one pair of double-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of double-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns 1 if both the ZF and CF flags are set to 0, otherwise it returns 0.
This intrinsic corresponds to the VTESTPD
instruction.
__a | A 256-bit vector of [4 x double]. |
__b | A 256-bit vector of [4 x double]. |
Definition at line 2790 of file avxintrin.h.
|
static |
Given two 256-bit floating-point vectors of [8 x float], perform an element-by-element comparison of the single-precision elements in the first source vector and the corresponding elements in the second source vector.
The EFLAGS register is updated as follows:
If there is at least one pair of single-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of single-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns 1 if both the ZF and CF flags are set to 0, otherwise it returns 0.
This intrinsic corresponds to the VTESTPS
instruction.
__a | A 256-bit vector of [8 x float]. |
__b | A 256-bit vector of [8 x float]. |
Definition at line 2878 of file avxintrin.h.
|
static |
Given two 256-bit integer vectors, perform a bit-by-bit comparison of the two source vectors.
The EFLAGS register is updated as follows:
If there is at least one pair of bits where both bits are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of bits where the bit from the first source vector is 0 and the bit from the second source vector is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns 1 if both the ZF and CF flags are set to 0, otherwise it returns 0.
This intrinsic corresponds to the VPTEST
instruction.
__a | A 256-bit integer vector. |
__b | A 256-bit integer vector. |
Definition at line 2957 of file avxintrin.h.
|
static |
Given two 256-bit floating-point vectors of [4 x double], perform an element-by-element comparison of the double-precision elements in the first source vector and the corresponding elements in the second source vector.
The EFLAGS register is updated as follows:
If there is at least one pair of double-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of double-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the ZF flag.
This intrinsic corresponds to the VTESTPD
instruction.
__a | A 256-bit vector of [4 x double]. |
__b | A 256-bit vector of [4 x double]. |
Definition at line 2731 of file avxintrin.h.
|
static |
Given two 256-bit floating-point vectors of [8 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector.
The EFLAGS register is updated as follows:
If there is at least one pair of single-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of single-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the ZF flag.
This intrinsic corresponds to the VTESTPS
instruction.
__a | A 256-bit vector of [8 x float]. |
__b | A 256-bit vector of [8 x float]. |
Definition at line 2819 of file avxintrin.h.
|
static |
Given two 256-bit integer vectors, perform a bit-by-bit comparison of the two source vectors.
The EFLAGS register is updated as follows:
If there is at least one pair of bits where both bits are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of bits where the bit from the first source vector is 0 and the bit from the second source vector is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the ZF flag.
This intrinsic corresponds to the VPTEST
instruction.
__a | A 256-bit integer vector. |
__b | A 256-bit integer vector. |
Definition at line 2904 of file avxintrin.h.
|
static |
Create a 256-bit vector of [4 x double] with undefined values.
This intrinsic has no corresponding instruction.
Definition at line 3640 of file avxintrin.h.
|
static |
Create a 256-bit vector of [8 x float] with undefined values.
This intrinsic has no corresponding instruction.
Definition at line 3653 of file avxintrin.h.
Referenced by _mm512_cvtpd_ps().
|
static |
Create a 256-bit integer vector with undefined values.
This intrinsic has no corresponding instruction.
Definition at line 3666 of file avxintrin.h.
Referenced by _mm512_cvtepi16_epi8(), _mm512_cvtepi32_epi16(), _mm512_cvtepi64_epi32(), _mm512_cvtpd_epi32(), _mm512_cvtpd_epu32(), _mm512_cvtsepi32_epi16(), _mm512_cvtsepi64_epi32(), _mm512_cvttpd_epu32(), _mm512_cvtusepi32_epi16(), and _mm512_cvtusepi64_epi32().
|
static |
Unpacks the odd-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them into a 256-bit vector of [4 x double].
This intrinsic corresponds to the VUNPCKHPD
instruction.
__a | A 256-bit floating-point vector of [4 x double]. Bits [127:64] are written to bits [63:0] of the return value. Bits [255:192] are written to bits [191:128] of the return value. |
__b | A 256-bit floating-point vector of [4 x double]. Bits [127:64] are written to bits [127:64] of the return value. Bits [255:192] are written to bits [255:192] of the return value. |
Definition at line 2449 of file avxintrin.h.
Referenced by _mm256_mask_unpackhi_pd(), and _mm256_maskz_unpackhi_pd().
|
static |
Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the two 256-bit vectors of [8 x float] and interleaves them into a 256-bit vector of [8 x float].
This intrinsic corresponds to the VUNPCKHPS
instruction.
__a | A 256-bit vector of [8 x float]. Bits [95:64] are written to bits [31:0] of the return value. Bits [127:96] are written to bits [95:64] of the return value. Bits [223:192] are written to bits [159:128] of the return value. Bits [255:224] are written to bits [223:192] of the return value. |
__b | A 256-bit vector of [8 x float]. Bits [95:64] are written to bits [63:32] of the return value. Bits [127:96] are written to bits [127:96] of the return value. Bits [223:192] are written to bits [191:160] of the return value. Bits [255:224] are written to bits [255:224] of the return value. |
Definition at line 2498 of file avxintrin.h.
Referenced by _mm256_mask_unpackhi_ps(), and _mm256_maskz_unpackhi_ps().
|
static |
Unpacks the even-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them into a 256-bit vector of [4 x double].
This intrinsic corresponds to the VUNPCKLPD
instruction.
__a | A 256-bit floating-point vector of [4 x double]. Bits [63:0] are written to bits [63:0] of the return value. Bits [191:128] are written to bits [191:128] of the return value. |
__b | A 256-bit floating-point vector of [4 x double]. Bits [63:0] are written to bits [127:64] of the return value. Bits [191:128] are written to bits [255:192] of the return value. |
Definition at line 2471 of file avxintrin.h.
Referenced by _mm256_mask_unpacklo_pd(), and _mm256_maskz_unpacklo_pd().
|
static |
Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the two 256-bit vectors of [8 x float] and interleaves them into a 256-bit vector of [8 x float].
This intrinsic corresponds to the VUNPCKLPS
instruction.
__a | A 256-bit vector of [8 x float]. Bits [31:0] are written to bits [31:0] of the return value. Bits [63:32] are written to bits [95:64] of the return value. Bits [159:128] are written to bits [159:128] of the return value. Bits [191:160] are written to bits [223:192] of the return value. |
__b | A 256-bit vector of [8 x float]. Bits [31:0] are written to bits [63:32] of the return value. Bits [63:32] are written to bits [127:96] of the return value. Bits [159:128] are written to bits [191:160] of the return value. Bits [191:160] are written to bits [255:224] of the return value. |
Definition at line 2525 of file avxintrin.h.
Referenced by _mm256_mask_unpacklo_ps(), and _mm256_maskz_unpacklo_ps().
|
static |
Performs a bitwise XOR of two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VXORPD
instruction.
__a | A 256-bit vector of [4 x double] containing one of the source operands. |
__b | A 256-bit vector of [4 x double] containing one of the source operands. |
Definition at line 656 of file avxintrin.h.
Referenced by _mm256_mask_xor_pd(), and _mm256_maskz_xor_pd().
|
static |
Performs a bitwise XOR of two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VXORPS
instruction.
__a | A 256-bit vector of [8 x float] containing one of the source operands. |
__b | A 256-bit vector of [8 x float] containing one of the source operands. |
Definition at line 674 of file avxintrin.h.
Referenced by _mm256_mask_xor_ps(), and _mm256_maskz_xor_ps().
|
static |
Constructs a 256-bit floating-point vector of [4 x double] from a 128-bit floating-point vector of [2 x double].
The lower 128 bits contain the value of the source vector. The upper 128 bits are set to zero.
This intrinsic has no corresponding instruction.
__a | A 128-bit vector of [2 x double]. |
Definition at line 4590 of file avxintrin.h.
References __a, and _mm_setzero_pd().
|
static |
Constructs a 256-bit floating-point vector of [8 x float] from a 128-bit floating-point vector of [4 x float].
The lower 128 bits contain the value of the source vector. The upper 128 bits are set to zero.
This intrinsic has no corresponding instruction.
__a | A 128-bit vector of [4 x float]. |
Definition at line 4608 of file avxintrin.h.
References __a, and _mm_setzero_ps().
|
static |
Constructs a 256-bit integer vector from a 128-bit integer vector.
The lower 128 bits contain the value of the source vector. The upper 128 bits are set to zero.
This intrinsic has no corresponding instruction.
__a | A 128-bit integer vector. |
Definition at line 4626 of file avxintrin.h.
References __a, and _mm_setzero_si128().
|
static |
Loads a scalar single-precision floating point value from the specified address pointed to by __a and broadcasts it to the elements of a [4 x float] vector.
This intrinsic corresponds to the VBROADCASTSS
instruction.
__a | The single-precision floating point value to be broadcast. |
Definition at line 3036 of file avxintrin.h.
References __a.
|
static |
Conditionally loads double-precision floating point elements from a memory location pointed to by __p into a 128-bit vector of [2 x double], depending on the mask bits associated with each data element.
This intrinsic corresponds to the VMASKMOVPD
instruction.
__p | A pointer to a memory location that contains the double-precision floating point values. |
__m | A 128-bit integer vector containing the mask. The most significant bit of each data element represents the mask bits. If a mask bit is zero, the corresponding value in the memory location is not loaded and the corresponding field in the return value is set to zero. |
Definition at line 3394 of file avxintrin.h.
References __p.
|
static |
Conditionally loads single-precision floating point elements from a memory location pointed to by __p into a 128-bit vector of [4 x float], depending on the mask bits associated with each data element.
This intrinsic corresponds to the VMASKMOVPS
instruction.
__p | A pointer to a memory location that contains the single-precision floating point values. |
__m | A 128-bit integer vector containing the mask. The most significant bit of each data element represents the mask bits. If a mask bit is zero, the corresponding value in the memory location is not loaded and the corresponding field in the return value is set to zero. |
Definition at line 3443 of file avxintrin.h.
References __p.
|
static |
Moves double-precision values from a 128-bit vector of [2 x double] to a memory location pointed to by __p, according to the specified mask.
This intrinsic corresponds to the VMASKMOVPD
instruction.
__p | A pointer to a memory location that will receive the float values. |
__m | A 128-bit integer vector containing the mask. The most significant bit of each field in the mask vector represents the mask bits. If a mask bit is zero, the corresponding value from vector __a is not stored and the corresponding field in the memory location pointed to by __p is not changed. |
__a | A 128-bit vector of [2 x double] containing the values to be stored. |
Definition at line 3516 of file avxintrin.h.
|
static |
Moves single-precision floating point values from a 128-bit vector of [4 x float] to a memory location pointed to by __p, according to the specified mask.
This intrinsic corresponds to the VMASKMOVPS
instruction.
__p | A pointer to a memory location that will receive the float values. |
__m | A 128-bit integer vector containing the mask. The most significant bit of each field in the mask vector represents the mask bits. If a mask bit is zero, the corresponding value from vector __a is not stored and the corresponding field in the memory location pointed to by __p is not changed. |
__a | A 128-bit vector of [4 x float] containing the values to be stored. |
Definition at line 3564 of file avxintrin.h.
|
static |
Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector operand.
This intrinsic corresponds to the VPERMILPD
instruction.
__a | A 128-bit vector of [2 x double]. |
__c | A 128-bit integer vector operand specifying how the values are to be copied. Bit [1]: 0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector. 1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector. Bit [65]: 0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector. 1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector. |
Definition at line 797 of file avxintrin.h.
Referenced by _mm_mask_permutevar_pd(), and _mm_maskz_permutevar_pd().
|
static |
Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vector operand.
This intrinsic corresponds to the VPERMILPS
instruction.
__a | A 128-bit vector of [4 x float]. |
__c | A 128-bit integer vector operand specifying how the values are to be copied. Bits [1:0]: 00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector. 01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector. 10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector. 11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector. Bits [33:32]: 00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector. 01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector. 10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector. 11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector. Bits [65:64]: 00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector. 01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector. 10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector. 11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector. Bits [97:96]: 00: Bits [31:0] of the source are copied to bits [127:96] of the returned vector. 01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector. 10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector. 11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector. |
Definition at line 891 of file avxintrin.h.
Referenced by _mm_mask_permutevar_ps(), and _mm_maskz_permutevar_ps().
|
static |
Given two 128-bit floating-point vectors of [2 x double], perform an element-by-element comparison of the double-precision element in the first source vector and the corresponding element in the second source vector.
The EFLAGS register is updated as follows:
If there is at least one pair of double-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of double-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the CF flag.
This intrinsic corresponds to the VTESTPD
instruction.
__a | A 128-bit vector of [2 x double]. |
__b | A 128-bit vector of [2 x double]. |
Definition at line 2584 of file avxintrin.h.
|
static |
Given two 128-bit floating-point vectors of [4 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector.
The EFLAGS register is updated as follows:
If there is at least one pair of single-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of single-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the CF flag.
This intrinsic corresponds to the VTESTPS
instruction.
__a | A 128-bit vector of [4 x float]. |
__b | A 128-bit vector of [4 x float]. |
Definition at line 2672 of file avxintrin.h.
|
static |
Given two 128-bit floating-point vectors of [2 x double], perform an element-by-element comparison of the double-precision element in the first source vector and the corresponding element in the second source vector.
The EFLAGS register is updated as follows:
If there is at least one pair of double-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of double-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns 1 if both the ZF and CF flags are set to 0, otherwise it returns 0.
This intrinsic corresponds to the VTESTPD
instruction.
__a | A 128-bit vector of [2 x double]. |
__b | A 128-bit vector of [2 x double]. |
Definition at line 2614 of file avxintrin.h.
|
static |
Given two 128-bit floating-point vectors of [4 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector.
The EFLAGS register is updated as follows:
If there is at least one pair of single-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of single-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns 1 if both the ZF and CF flags are set to 0, otherwise it returns 0.
This intrinsic corresponds to the VTESTPS
instruction.
__a | A 128-bit vector of [4 x float]. |
__b | A 128-bit vector of [4 x float]. |
Definition at line 2702 of file avxintrin.h.
|
static |
Given two 128-bit floating-point vectors of [2 x double], perform an element-by-element comparison of the double-precision element in the first source vector and the corresponding element in the second source vector.
The EFLAGS register is updated as follows:
If there is at least one pair of double-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of double-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the ZF flag.
This intrinsic corresponds to the VTESTPD
instruction.
__a | A 128-bit vector of [2 x double]. |
__b | A 128-bit vector of [2 x double]. |
Definition at line 2555 of file avxintrin.h.
|
static |
Given two 128-bit floating-point vectors of [4 x float], perform an element-by-element comparison of the single-precision element in the first source vector and the corresponding element in the second source vector.
The EFLAGS register is updated as follows:
If there is at least one pair of single-precision elements where the sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the ZF flag is set to 1.
If there is at least one pair of single-precision elements where the sign-bit of the first element is 0 and the sign-bit of the second element is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1.
This intrinsic returns the value of the ZF flag.
This intrinsic corresponds to the VTESTPS
instruction.
__a | A 128-bit vector of [4 x float]. |
__b | A 128-bit vector of [4 x float]. |
Definition at line 2643 of file avxintrin.h.