clang
3.9.0
|
Go to the source code of this file.
Macros | |
#define | __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"))) |
#define | _mm256_round_pd(V, M) |
Rounds the values in a 256-bit vector of [4 x double] as specified by the byte operand. More... | |
#define | _mm256_round_ps(V, M) |
Rounds the values stored in a 256-bit vector of [8 x float] as specified by the byte operand. More... | |
#define | _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL) |
Rounds up the values stored in a 256-bit vector of [4 x double]. More... | |
#define | _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR) |
Rounds down the values stored in a 256-bit vector of [4 x double]. More... | |
#define | _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL) |
Rounds up the values stored in a 256-bit vector of [8 x float]. More... | |
#define | _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR) |
Rounds down the values stored in a 256-bit vector of [8 x float]. More... | |
#define | _mm_permute_pd(A, C) |
Copies the values in a 128-bit vector of [2 x double] as specified by the immediate integer operand. More... | |
#define | _mm256_permute_pd(A, C) |
Copies the values in a 256-bit vector of [4 x double] as specified by the immediate integer operand. More... | |
#define | _mm_permute_ps(A, C) |
Copies the values in a 128-bit vector of [4 x float] as specified by the immediate integer operand. More... | |
#define | _mm256_permute_ps(A, C) |
Copies the values in a 256-bit vector of [8 x float] as specified by the immediate integer operand. More... | |
#define | _mm256_permute2f128_pd(V1, V2, M) |
Permutes 128-bit data values stored in two 256-bit vectors of [4 x double], as specified by the immediate integer operand. More... | |
#define | _mm256_permute2f128_ps(V1, V2, M) |
Permutes 128-bit data values stored in two 256-bit vectors of [8 x float], as specified by the immediate integer operand. More... | |
#define | _mm256_permute2f128_si256(V1, V2, M) |
Permutes 128-bit data values stored in two 256-bit integer vectors, as specified by the immediate integer operand. More... | |
#define | _mm256_blend_pd(V1, V2, M) |
Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the immediate integer operand. More... | |
#define | _mm256_blend_ps(V1, V2, M) |
Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the immediate integer operand. More... | |
#define | _mm256_dp_ps(V1, V2, M) |
Computes two dot products in parallel, using the lower and upper halves of two [8 x float] vectors as input to the two computations, and returning the two dot products in the lower and upper halves of the [8 x float] result. More... | |
#define | _mm256_shuffle_ps(a, b, mask) |
Selects 8 float values from the 256-bit operands of [8 x float], as specified by the immediate value operand. More... | |
#define | _mm256_shuffle_pd(a, b, mask) |
Selects four double-precision values from the 256-bit operands of [4 x double], as specified by the immediate value operand. More... | |
#define | _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ |
#define | _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */ |
#define | _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */ |
#define | _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */ |
#define | _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */ |
#define | _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */ |
#define | _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */ |
#define | _CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */ |
#define | _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */ |
#define | _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */ |
#define | _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */ |
#define | _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */ |
#define | _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */ |
#define | _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */ |
#define | _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */ |
#define | _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */ |
#define | _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */ |
#define | _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */ |
#define | _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */ |
#define | _CMP_UNORD_S 0x13 /* Unordered (signaling) */ |
#define | _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */ |
#define | _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */ |
#define | _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */ |
#define | _CMP_ORD_S 0x17 /* Ordered (signaling) */ |
#define | _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */ |
#define | _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */ |
#define | _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */ |
#define | _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */ |
#define | _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */ |
#define | _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */ |
#define | _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ |
#define | _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ |
#define | _mm_cmp_pd(a, b, c) |
Compares each of the corresponding double-precision values of two 128-bit vectors of [2 x double], using the operation specified by the immediate integer operand. More... | |
#define | _mm_cmp_ps(a, b, c) |
Compares each of the corresponding values of two 128-bit vectors of [4 x float], using the operation specified by the immediate integer operand. More... | |
#define | _mm256_cmp_pd(a, b, c) |
Compares each of the corresponding double-precision values of two 256-bit vectors of [4 x double], using the operation specified by the immediate integer operand. More... | |
#define | _mm256_cmp_ps(a, b, c) |
Compares each of the corresponding values of two 256-bit vectors of [8 x float], using the operation specified by the immediate integer operand. More... | |
#define | _mm_cmp_sd(a, b, c) |
Compares each of the corresponding scalar double-precision values of two 128-bit vectors of [2 x double], using the operation specified by the immediate integer operand. More... | |
#define | _mm_cmp_ss(a, b, c) |
Compares each of the corresponding scalar values of two 128-bit vectors of [4 x float], using the operation specified by the immediate integer operand. More... | |
#define | _mm256_insertf128_ps(V1, V2, M) |
#define | _mm256_insertf128_pd(V1, V2, M) |
#define | _mm256_insertf128_si256(V1, V2, M) |
#define | _mm256_extractf128_ps(V, M) |
#define | _mm256_extractf128_pd(V, M) |
#define | _mm256_extractf128_si256(V, M) |
Typedefs | |
typedef double __v4df | __attribute__ ((__vector_size__(32))) |
Functions | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_add_pd (__m256d __a, __m256d __b) |
Adds two 256-bit vectors of [4 x double]. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_add_ps (__m256 __a, __m256 __b) |
Adds two 256-bit vectors of [8 x float]. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_sub_pd (__m256d __a, __m256d __b) |
Subtracts two 256-bit vectors of [4 x double]. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_sub_ps (__m256 __a, __m256 __b) |
Subtracts two 256-bit vectors of [8 x float]. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_addsub_pd (__m256d __a, __m256d __b) |
Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [4 x double]. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_addsub_ps (__m256 __a, __m256 __b) |
Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [8 x float]. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_div_pd (__m256d __a, __m256d __b) |
Divides two 256-bit vectors of [4 x double]. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_div_ps (__m256 __a, __m256 __b) |
Divides two 256-bit vectors of [8 x float]. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_max_pd (__m256d __a, __m256d __b) |
Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_max_ps (__m256 __a, __m256 __b) |
Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_min_pd (__m256d __a, __m256d __b) |
Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_min_ps (__m256 __a, __m256 __b) |
Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_mul_pd (__m256d __a, __m256d __b) |
Multiplies two 256-bit vectors of [4 x double]. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_mul_ps (__m256 __a, __m256 __b) |
Multiplies two 256-bit vectors of [8 x float]. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_sqrt_pd (__m256d __a) |
Calculates the square roots of the values in a 256-bit vector of [4 x double]. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_sqrt_ps (__m256 __a) |
Calculates the square roots of the values in a 256-bit vector of [8 x float]. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_rsqrt_ps (__m256 __a) |
Calculates the reciprocal square roots of the values in a 256-bit vector of [8 x float]. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_rcp_ps (__m256 __a) |
Calculates the reciprocals of the values in a 256-bit vector of [8 x float]. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_and_pd (__m256d __a, __m256d __b) |
Performs a bitwise AND of two 256-bit vectors of [4 x double]. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_and_ps (__m256 __a, __m256 __b) |
Performs a bitwise AND of two 256-bit vectors of [8 x float]. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_andnot_pd (__m256d __a, __m256d __b) |
Performs a bitwise AND of two 256-bit vectors of [4 x double], using the one's complement of the values contained in the first source operand. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_andnot_ps (__m256 __a, __m256 __b) |
Performs a bitwise AND of two 256-bit vectors of [8 x float], using the one's complement of the values contained in the first source operand. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_or_pd (__m256d __a, __m256d __b) |
Performs a bitwise OR of two 256-bit vectors of [4 x double]. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_or_ps (__m256 __a, __m256 __b) |
Performs a bitwise OR of two 256-bit vectors of [8 x float]. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_xor_pd (__m256d __a, __m256d __b) |
Performs a bitwise XOR of two 256-bit vectors of [4 x double]. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_xor_ps (__m256 __a, __m256 __b) |
Performs a bitwise XOR of two 256-bit vectors of [8 x float]. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_hadd_pd (__m256d __a, __m256d __b) |
Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [4 x double]. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_hadd_ps (__m256 __a, __m256 __b) |
Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [8 x float]. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_hsub_pd (__m256d __a, __m256d __b) |
Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [4 x double]. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_hsub_ps (__m256 __a, __m256 __b) |
Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [8 x float]. More... | |
static __inline __m128d __DEFAULT_FN_ATTRS | _mm_permutevar_pd (__m128d __a, __m128i __c) |
Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector operand. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_permutevar_pd (__m256d __a, __m256i __c) |
Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector operand. More... | |
static __inline __m128 __DEFAULT_FN_ATTRS | _mm_permutevar_ps (__m128 __a, __m128i __c) |
Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vector operand. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_permutevar_ps (__m256 __a, __m256i __c) |
Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vector operand. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_blendv_pd (__m256d __a, __m256d __b, __m256d __c) |
Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the 256-bit vector operand. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_blendv_ps (__m256 __a, __m256 __b, __m256 __c) |
Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the 256-bit vector operand. More... | |
static __inline int __DEFAULT_FN_ATTRS | _mm256_extract_epi32 (__m256i __a, const int __imm) |
Takes a [8 x i32] vector and returns the vector element value indexed by the immediate constant operand. More... | |
static __inline int __DEFAULT_FN_ATTRS | _mm256_extract_epi16 (__m256i __a, const int __imm) |
Takes a [16 x i16] vector and returns the vector element value indexed by the immediate constant operand. More... | |
static __inline int __DEFAULT_FN_ATTRS | _mm256_extract_epi8 (__m256i __a, const int __imm) |
Takes a [32 x i8] vector and returns the vector element value indexed by the immediate constant operand. More... | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_insert_epi32 (__m256i __a, int __b, int const __imm) |
Takes a [8 x i32] vector and replaces the vector element value indexed by the immediate constant operand by a new value. More... | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_insert_epi16 (__m256i __a, int __b, int const __imm) |
Takes a [16 x i16] vector and replaces the vector element value indexed by the immediate constant operand with a new value. More... | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_insert_epi8 (__m256i __a, int __b, int const __imm) |
Takes a [32 x i8] vector and replaces the vector element value indexed by the immediate constant operand with a new value. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_cvtepi32_pd (__m128i __a) |
Converts a vector of [4 x i32] into a vector of [4 x double]. More... | |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_cvtepi32_ps (__m256i __a) |
Converts a vector of [8 x i32] into a vector of [8 x float]. More... | |
static __inline __m128 __DEFAULT_FN_ATTRS | _mm256_cvtpd_ps (__m256d __a) |
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x float]. More... | |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_cvtps_epi32 (__m256 __a) |
Converts a vector of [8 x float] into a vector of [8 x i32]. More... | |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_cvtps_pd (__m128 __a) |
static __inline __m128i __DEFAULT_FN_ATTRS | _mm256_cvttpd_epi32 (__m256d __a) |
static __inline __m128i __DEFAULT_FN_ATTRS | _mm256_cvtpd_epi32 (__m256d __a) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_cvttps_epi32 (__m256 __a) |
static __inline double __DEFAULT_FN_ATTRS | _mm256_cvtsd_f64 (__m256d __a) |
static __inline int __DEFAULT_FN_ATTRS | _mm256_cvtsi256_si32 (__m256i __a) |
static __inline float __DEFAULT_FN_ATTRS | _mm256_cvtss_f32 (__m256 __a) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_movehdup_ps (__m256 __a) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_moveldup_ps (__m256 __a) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_movedup_pd (__m256d __a) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_unpackhi_pd (__m256d __a, __m256d __b) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_unpacklo_pd (__m256d __a, __m256d __b) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_unpackhi_ps (__m256 __a, __m256 __b) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_unpacklo_ps (__m256 __a, __m256 __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm_testz_pd (__m128d __a, __m128d __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm_testc_pd (__m128d __a, __m128d __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm_testnzc_pd (__m128d __a, __m128d __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm_testz_ps (__m128 __a, __m128 __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm_testc_ps (__m128 __a, __m128 __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm_testnzc_ps (__m128 __a, __m128 __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testz_pd (__m256d __a, __m256d __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testc_pd (__m256d __a, __m256d __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testnzc_pd (__m256d __a, __m256d __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testz_ps (__m256 __a, __m256 __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testc_ps (__m256 __a, __m256 __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testnzc_ps (__m256 __a, __m256 __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testz_si256 (__m256i __a, __m256i __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testc_si256 (__m256i __a, __m256i __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm256_testnzc_si256 (__m256i __a, __m256i __b) |
static __inline int __DEFAULT_FN_ATTRS | _mm256_movemask_pd (__m256d __a) |
static __inline int __DEFAULT_FN_ATTRS | _mm256_movemask_ps (__m256 __a) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_zeroall (void) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_zeroupper (void) |
static __inline __m128 __DEFAULT_FN_ATTRS | _mm_broadcast_ss (float const *__a) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_broadcast_sd (double const *__a) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_broadcast_ss (float const *__a) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_broadcast_pd (__m128d const *__a) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_broadcast_ps (__m128 const *__a) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_load_pd (double const *__p) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_load_ps (float const *__p) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_loadu_pd (double const *__p) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_loadu_ps (float const *__p) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_load_si256 (__m256i const *__p) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_loadu_si256 (__m256i const *__p) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_lddqu_si256 (__m256i const *__p) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_store_pd (double *__p, __m256d __a) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_store_ps (float *__p, __m256 __a) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_storeu_pd (double *__p, __m256d __a) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_storeu_ps (float *__p, __m256 __a) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_store_si256 (__m256i *__p, __m256i __a) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_storeu_si256 (__m256i *__p, __m256i __a) |
static __inline __m128d __DEFAULT_FN_ATTRS | _mm_maskload_pd (double const *__p, __m128i __m) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_maskload_pd (double const *__p, __m256i __m) |
static __inline __m128 __DEFAULT_FN_ATTRS | _mm_maskload_ps (float const *__p, __m128i __m) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_maskload_ps (float const *__p, __m256i __m) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_maskstore_ps (float *__p, __m256i __m, __m256 __a) |
static __inline void __DEFAULT_FN_ATTRS | _mm_maskstore_pd (double *__p, __m128i __m, __m128d __a) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_maskstore_pd (double *__p, __m256i __m, __m256d __a) |
static __inline void __DEFAULT_FN_ATTRS | _mm_maskstore_ps (float *__p, __m128i __m, __m128 __a) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_stream_si256 (__m256i *__a, __m256i __b) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_stream_pd (double *__a, __m256d __b) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_stream_ps (float *__p, __m256 __a) |
static __inline__ __m256d __DEFAULT_FN_ATTRS | _mm256_undefined_pd (void) |
static __inline__ __m256 __DEFAULT_FN_ATTRS | _mm256_undefined_ps (void) |
static __inline__ __m256i __DEFAULT_FN_ATTRS | _mm256_undefined_si256 (void) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_set_pd (double __a, double __b, double __c, double __d) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_set_ps (float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set_epi32 (int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set_epi16 (short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, short __w03, short __w02, short __w01, short __w00) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set_epi8 (char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, char __b19, char __b18, char __b17, char __b16, char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b09, char __b08, char __b07, char __b06, char __b05, char __b04, char __b03, char __b02, char __b01, char __b00) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set_epi64x (long long __a, long long __b, long long __c, long long __d) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_setr_pd (double __a, double __b, double __c, double __d) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_setr_ps (float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_setr_epi32 (int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_setr_epi16 (short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, short __w03, short __w02, short __w01, short __w00) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_setr_epi8 (char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, char __b19, char __b18, char __b17, char __b16, char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b09, char __b08, char __b07, char __b06, char __b05, char __b04, char __b03, char __b02, char __b01, char __b00) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_setr_epi64x (long long __a, long long __b, long long __c, long long __d) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_set1_pd (double __w) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_set1_ps (float __w) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set1_epi32 (int __i) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set1_epi16 (short __w) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set1_epi8 (char __b) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set1_epi64x (long long __q) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_setzero_pd (void) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_setzero_ps (void) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_setzero_si256 (void) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_castpd_ps (__m256d __a) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_castpd_si256 (__m256d __a) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_castps_pd (__m256 __a) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_castps_si256 (__m256 __a) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_castsi256_ps (__m256i __a) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_castsi256_pd (__m256i __a) |
static __inline __m128d __DEFAULT_FN_ATTRS | _mm256_castpd256_pd128 (__m256d __a) |
static __inline __m128 __DEFAULT_FN_ATTRS | _mm256_castps256_ps128 (__m256 __a) |
static __inline __m128i __DEFAULT_FN_ATTRS | _mm256_castsi256_si128 (__m256i __a) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_castpd128_pd256 (__m128d __a) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_castps128_ps256 (__m128 __a) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_castsi128_si256 (__m128i __a) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_loadu2_m128 (float const *__addr_hi, float const *__addr_lo) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_loadu2_m128d (double const *__addr_hi, double const *__addr_lo) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_loadu2_m128i (__m128i const *__addr_hi, __m128i const *__addr_lo) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_storeu2_m128 (float *__addr_hi, float *__addr_lo, __m256 __a) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_storeu2_m128d (double *__addr_hi, double *__addr_lo, __m256d __a) |
static __inline void __DEFAULT_FN_ATTRS | _mm256_storeu2_m128i (__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_set_m128 (__m128 __hi, __m128 __lo) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_set_m128d (__m128d __hi, __m128d __lo) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_set_m128i (__m128i __hi, __m128i __lo) |
static __inline __m256 __DEFAULT_FN_ATTRS | _mm256_setr_m128 (__m128 __lo, __m128 __hi) |
static __inline __m256d __DEFAULT_FN_ATTRS | _mm256_setr_m128d (__m128d __lo, __m128d __hi) |
static __inline __m256i __DEFAULT_FN_ATTRS | _mm256_setr_m128i (__m128i __lo, __m128i __hi) |
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"))) |
Definition at line 53 of file avxintrin.h.
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ |
Definition at line 1605 of file avxintrin.h.
#define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */ |
Definition at line 1621 of file avxintrin.h.
#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */ |
Definition at line 1613 of file avxintrin.h.
#define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */ |
Definition at line 1629 of file avxintrin.h.
#define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */ |
Definition at line 1616 of file avxintrin.h.
#define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */ |
Definition at line 1632 of file avxintrin.h.
#define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */ |
Definition at line 1634 of file avxintrin.h.
#define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */ |
Definition at line 1618 of file avxintrin.h.
#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ |
Definition at line 1635 of file avxintrin.h.
#define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */ |
Definition at line 1619 of file avxintrin.h.
#define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */ |
Definition at line 1623 of file avxintrin.h.
#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */ |
Definition at line 1607 of file avxintrin.h.
#define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */ |
Definition at line 1622 of file avxintrin.h.
#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */ |
Definition at line 1606 of file avxintrin.h.
#define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */ |
Definition at line 1617 of file avxintrin.h.
#define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */ |
Definition at line 1633 of file avxintrin.h.
#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */ |
Definition at line 1609 of file avxintrin.h.
#define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */ |
Definition at line 1625 of file avxintrin.h.
#define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */ |
Definition at line 1630 of file avxintrin.h.
#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */ |
Definition at line 1614 of file avxintrin.h.
#define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */ |
Definition at line 1631 of file avxintrin.h.
#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */ |
Definition at line 1615 of file avxintrin.h.
#define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */ |
Definition at line 1627 of file avxintrin.h.
#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */ |
Definition at line 1611 of file avxintrin.h.
#define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */ |
Definition at line 1626 of file avxintrin.h.
#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */ |
Definition at line 1610 of file avxintrin.h.
#define _CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */ |
Definition at line 1612 of file avxintrin.h.
#define _CMP_ORD_S 0x17 /* Ordered (signaling) */ |
Definition at line 1628 of file avxintrin.h.
#define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */ |
Definition at line 1620 of file avxintrin.h.
#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ |
Definition at line 1636 of file avxintrin.h.
#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */ |
Definition at line 1608 of file avxintrin.h.
#define _CMP_UNORD_S 0x13 /* Unordered (signaling) */ |
Definition at line 1624 of file avxintrin.h.
#define _mm256_blend_pd | ( | V1, | |
V2, | |||
M | |||
) |
Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the immediate integer operand.
This intrinsic corresponds to the VBLENDPD
/ BLENDPD instruction.
V1 | A 256-bit vector of [4 x double]. |
V2 | A 256-bit vector of [4 x double]. |
M | An immediate integer operand, with mask bits [3:0] specifying how the values are to be copied. The position of the mask bit corresponds to the index of a copied value. When a mask bit is 0, the corresponding 64-bit element in operand V1 is copied to the same position in the destination. When a mask bit is 1, the corresponding 64-bit element in operand V2 is copied to the same position in the destination. |
Definition at line 1354 of file avxintrin.h.
#define _mm256_blend_ps | ( | V1, | |
V2, | |||
M | |||
) |
Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the immediate integer operand.
This intrinsic corresponds to the VBLENDPS
/ BLENDPS instruction.
V1 | A 256-bit vector of [8 x float]. |
V2 | A 256-bit vector of [8 x float]. |
M | An immediate integer operand, with mask bits [7:0] specifying how the values are to be copied. The position of the mask bit corresponds to the index of a copied value. When a mask bit is 0, the corresponding 32-bit element in operand V1 is copied to the same position in the destination. When a mask bit is 1, the corresponding 32-bit element in operand V2 is copied to the same position in the destination. |
Definition at line 1386 of file avxintrin.h.
#define _mm256_ceil_pd | ( | V | ) | _mm256_round_pd((V), _MM_FROUND_CEIL) |
Rounds up the values stored in a 256-bit vector of [4 x double].
The source values are rounded up to integer values and returned as 64-bit double-precision floating-point values.
This intrinsic corresponds to the VROUNDPD
/ ROUNDPD instruction.
V | A 256-bit vector of [4 x double]. |
Definition at line 461 of file avxintrin.h.
#define _mm256_ceil_ps | ( | V | ) | _mm256_round_ps((V), _MM_FROUND_CEIL) |
Rounds up the values stored in a 256-bit vector of [8 x float].
The source values are rounded up to integer values and returned as floating-point values.
This intrinsic corresponds to the VROUNDPS
/ ROUNDPS instruction.
V | A 256-bit vector of [8 x float]. |
Definition at line 496 of file avxintrin.h.
#define _mm256_cmp_pd | ( | a, | |
b, | |||
c | |||
) |
Compares each of the corresponding double-precision values of two 256-bit vectors of [4 x double], using the operation specified by the immediate integer operand.
Returns a [4 x double] vector consisting of four doubles corresponding to the four comparison results: zero if the comparison is false, and all 1's if the comparison is true.
This intrinsic corresponds to the VCMPPD
/ CMPPD instruction.
a | A 256-bit vector of [4 x double]. |
b | A 256-bit vector of [4 x double]. |
c | An immediate integer operand, with bits [4:0] specifying which comparison operation to use: 00h, 08h, 10h, 18h: Equal 01h, 09h, 11h, 19h: Less than 02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped operands) 03h, 0Bh, 13h, 1Bh: Unordered 04h, 0Ch, 14h, 1Ch: Not equal 05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands) 06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal (swapped operands) 07h, 0Fh, 17h, 1Fh: Ordered |
Definition at line 1742 of file avxintrin.h.
#define _mm256_cmp_ps | ( | a, | |
b, | |||
c | |||
) |
Compares each of the corresponding values of two 256-bit vectors of [8 x float], using the operation specified by the immediate integer operand.
Returns a [8 x float] vector consisting of eight floats corresponding to the eight comparison results: zero if the comparison is false, and all 1's if the comparison is true.
This intrinsic corresponds to the VCMPPS
/ CMPPS instruction.
a | A 256-bit vector of [8 x float]. |
b | A 256-bit vector of [8 x float]. |
c | An immediate integer operand, with bits [4:0] specifying which comparison operation to use: 00h, 08h, 10h, 18h: Equal 01h, 09h, 11h, 19h: Less than 02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped operands) 03h, 0Bh, 13h, 1Bh: Unordered 04h, 0Ch, 14h, 1Ch: Not equal 05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands) 06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal (swapped operands) 07h, 0Fh, 17h, 1Fh: Ordered |
Definition at line 1778 of file avxintrin.h.
#define _mm256_dp_ps | ( | V1, | |
V2, | |||
M | |||
) |
Computes two dot products in parallel, using the lower and upper halves of two [8 x float] vectors as input to the two computations, and returning the two dot products in the lower and upper halves of the [8 x float] result.
The immediate integer operand controls which input elements will contribute to the dot product, and where the final results are returned. In general, for each dot product, the four corresponding elements of the input vectors are multiplied; the first two and second two products are summed, then the two sums are added to form the final result.
This intrinsic corresponds to the VDPPS
/ DPPS instruction.
V1 | A vector of [8 x float] values, treated as two [4 x float] vectors. |
V2 | A vector of [8 x float] values, treated as two [4 x float] vectors. |
M | An immediate integer argument. Bits [7:4] determine which elements of the input vectors are used, with bit [4] corresponding to the lowest element and bit [7] corresponding to the highest element of each [4 x float] subvector. If a bit is set, the corresponding elements from the two input vectors are used as an input for dot product; otherwise that input is treated as zero. Bits [3:0] determine which elements of the result will receive a copy of the final dot product, with bit [0] corresponding to the lowest element and bit [3] corresponding to the highest element of each [4 x float] subvector. If a bit is set, the dot product is returned in the corresponding element; otherwise that element is set to zero. The bitmask is applied in the same way to each of the two parallel dot product computations. |
Definition at line 1491 of file avxintrin.h.
#define _mm256_extractf128_pd | ( | V, | |
M | |||
) |
Definition at line 2826 of file avxintrin.h.
Referenced by _mm256_storeu2_m128d().
#define _mm256_extractf128_ps | ( | V, | |
M | |||
) |
Definition at line 2817 of file avxintrin.h.
Referenced by _mm256_storeu2_m128().
#define _mm256_extractf128_si256 | ( | V, | |
M | |||
) |
Definition at line 2833 of file avxintrin.h.
Referenced by _mm256_storeu2_m128i().
#define _mm256_floor_pd | ( | V | ) | _mm256_round_pd((V), _MM_FROUND_FLOOR) |
Rounds down the values stored in a 256-bit vector of [4 x double].
The source values are rounded down to integer values and returned as 64-bit double-precision floating-point values.
This intrinsic corresponds to the VROUNDPD
/ ROUNDPD instruction.
V | A 256-bit vector of [4 x double]. |
Definition at line 479 of file avxintrin.h.
#define _mm256_floor_ps | ( | V | ) | _mm256_round_ps((V), _MM_FROUND_FLOOR) |
Rounds down the values stored in a 256-bit vector of [8 x float].
The source values are rounded down to integer values and returned as floating-point values.
This intrinsic corresponds to the VROUNDPS
/ ROUNDPS instruction.
V | A 256-bit vector of [8 x float]. |
Definition at line 513 of file avxintrin.h.
#define _mm256_insertf128_pd | ( | V1, | |
V2, | |||
M | |||
) |
Definition at line 2794 of file avxintrin.h.
Referenced by _mm256_loadu2_m128d().
#define _mm256_insertf128_ps | ( | V1, | |
V2, | |||
M | |||
) |
Definition at line 2781 of file avxintrin.h.
Referenced by _mm256_loadu2_m128().
#define _mm256_insertf128_si256 | ( | V1, | |
V2, | |||
M | |||
) |
Definition at line 2803 of file avxintrin.h.
Referenced by _mm256_loadu2_m128i().
#define _mm256_permute2f128_pd | ( | V1, | |
V2, | |||
M | |||
) |
Permutes 128-bit data values stored in two 256-bit vectors of [4 x double], as specified by the immediate integer operand.
This intrinsic corresponds to the VPERM2F128
/ PERM2F128 instruction.
V1 | A 256-bit vector of [4 x double]. |
V2 | A 256-bit vector of [4 x double. |
M | An immediate integer operand specifying how the values are to be permuted. Bits [1:0]: 00: Bits [127:0] of operand V1 are copied to bits [127:0] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [127:0] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [127:0] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [127:0] of the destination. Bits [5:4]: 00: Bits [127:0] of operand V1 are copied to bits [255:128] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [255:128] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [255:128] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [255:128] of the destination. |
Definition at line 1244 of file avxintrin.h.
#define _mm256_permute2f128_ps | ( | V1, | |
V2, | |||
M | |||
) |
Permutes 128-bit data values stored in two 256-bit vectors of [8 x float], as specified by the immediate integer operand.
This intrinsic corresponds to the VPERM2F128
/ PERM2F128 instruction.
V1 | A 256-bit vector of [8 x float]. |
V2 | A 256-bit vector of [8 x float]. |
M | An immediate integer operand specifying how the values are to be permuted. Bits [1:0]: 00: Bits [127:0] of operand V1 are copied to bits [127:0] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [127:0] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [127:0] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [127:0] of the destination. Bits [5:4]: 00: Bits [127:0] of operand V1 are copied to bits [255:128] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [255:128] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [255:128] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [255:128] of the destination. |
Definition at line 1285 of file avxintrin.h.
#define _mm256_permute2f128_si256 | ( | V1, | |
V2, | |||
M | |||
) |
Permutes 128-bit data values stored in two 256-bit integer vectors, as specified by the immediate integer operand.
This intrinsic corresponds to the VPERM2F128
/ PERM2F128 instruction.
V1 | A 256-bit integer vector. |
V2 | A 256-bit integer vector. |
M | An immediate integer operand specifying how the values are to be copied. Bits [1:0]: 00: Bits [127:0] of operand V1 are copied to bits [127:0] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [127:0] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [127:0] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [127:0] of the destination. Bits [5:4]: 00: Bits [127:0] of operand V1 are copied to bits [255:128] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [255:128] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [255:128] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [255:128] of the destination. |
Definition at line 1325 of file avxintrin.h.
#define _mm256_permute_pd | ( | A, | |
C | |||
) |
Copies the values in a 256-bit vector of [4 x double] as specified by the immediate integer operand.
This intrinsic corresponds to the VPERMILPD
/ PERMILPD instruction.
A | A 256-bit vector of [4 x double]. |
C | An immediate integer operand specifying how the values are to be copied. Bit [0]: 0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector. 1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector. Bit [1]: 0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector. 1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector. Bit [2]: 0: Bits [191:128] of the source are copied to bits [191:128] of the returned vector. 1: Bits [255:192] of the source are copied to bits [191:128] of the returned vector. Bit [3]: 0: Bits [191:128] of the source are copied to bits [255:192] of the returned vector. 1: Bits [255:192] of the source are copied to bits [255:192] of the returned vector. |
Definition at line 1041 of file avxintrin.h.
#define _mm256_permute_ps | ( | A, | |
C | |||
) |
Copies the values in a 256-bit vector of [8 x float] as specified by the immediate integer operand.
This intrinsic corresponds to the VPERMILPS
/ PERMILPS instruction.
A | A 256-bit vector of [8 x float]. |
C | An immediate integer operand specifying how the values are to be copied. Bits [1:0]: 00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector. 01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector. 10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector. 11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector. Bits [3:2]: 00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector. 01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector. 10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector. 11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector. Bits [5:4]: 00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector. 01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector. 10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector. 11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector. Bits [7:6]: 00: Bits [31:0] of the source are copied to bits [127:96] of the returned vector. 01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector. 10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector. 11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector. Bits [1:0]: 00: Bits [159:128] of the source are copied to bits [159:128] of the returned vector. 01: Bits [191:160] of the source are copied to bits [159:128] of the returned vector. 10: Bits [223:192] of the source are copied to bits [159:128] of the returned vector. 11: Bits [255:224] of the source are copied to bits [159:128] of the returned vector. Bits [3:2]: 00: Bits [159:128] of the source are copied to bits [191:160] of the returned vector. 01: Bits [191:160] of the source are copied to bits [191:160] of the returned vector. 10: Bits [223:192] of the source are copied to bits [191:160] of the returned vector. 11: Bits [255:224] of the source are copied to bits [191:160] of the returned vector. Bits [5:4]: 00: Bits [159:128] of the source are copied to bits [223:192] of the returned vector. 01: Bits [191:160] of the source are copied to bits [223:192] of the returned vector. 10: Bits [223:192] of the source are copied to bits [223:192] of the returned vector. 11: Bits [255:224] of the source are copied to bits [223:192] of the returned vector. Bits [7:6]: 00: Bits [159:128] of the source are copied to bits [255:224] of the returned vector. 01: Bits [191:160] of the source are copied to bits [255:224] of the returned vector. 10: Bits [223:192] of the source are copied to bits [255:224] of the returned vector. 11: Bits [255:224] of the source are copied to bits [255:224] of the returned vector. |
Definition at line 1195 of file avxintrin.h.
#define _mm256_round_pd | ( | V, | |
M | |||
) |
Rounds the values in a 256-bit vector of [4 x double] as specified by the byte operand.
The source values are rounded to integer values and returned as 64-bit double-precision floating-point values.
This intrinsic corresponds to the VROUNDPD
/ ROUNDPD instruction.
V | A 256-bit vector of [4 x double]. |
M | An integer value that specifies the rounding operation. Bits [7:4] are reserved. Bit [3] is a precision exception value: 0: A normal PE exception is used. 1: The PE field is not updated. Bit [2] is the rounding control source: 0: Use bits [1:0] of M. 1: Use the current MXCSR setting. Bits [1:0] contain the rounding control definition: 00: Nearest. 01: Downward (toward negative infinity). 10: Upward (toward positive infinity). 11: Truncated. |
Definition at line 411 of file avxintrin.h.
#define _mm256_round_ps | ( | V, | |
M | |||
) |
Rounds the values stored in a 256-bit vector of [8 x float] as specified by the byte operand.
The source values are rounded to integer values and returned as floating-point values.
This intrinsic corresponds to the VROUNDPS
/ ROUNDPS instruction.
V | A 256-bit vector of [8 x float]. |
M | An integer value that specifies the rounding operation. Bits [7:4] are reserved. Bit [3] is a precision exception value: 0: A normal PE exception is used. 1: The PE field is not updated. Bit [2] is the rounding control source: 0: Use bits [1:0] of M. 1: Use the current MXCSR setting. Bits [1:0] contain the rounding control definition: 00: Nearest. 01: Downward (toward negative infinity). 10: Upward (toward positive infinity). 11: Truncated. |
Definition at line 443 of file avxintrin.h.
#define _mm256_shuffle_pd | ( | a, | |
b, | |||
mask | |||
) |
Selects four double-precision values from the 256-bit operands of [4 x double], as specified by the immediate value operand.
The selected elements from the first 256-bit operand are copied to bits [63:0] and bits [191:128] in the destination, and the selected elements from the second 256-bit operand are copied to bits [127:64] and bits [255:192] in the destination. For example, if bits [3:0] of the immediate operand contain a value of 0xF, the 256-bit destination vector would contain the following values: b[3], a[3], b[1], a[1].
This intrinsic corresponds to the VSHUFPD
/ SHUFPD instruction.
a | A 256-bit vector of [4 x double]. |
b | A 256-bit vector of [4 x double]. |
mask | An immediate value containing 8-bit values specifying which elements to copy from a and b: Bit [0]=0: Bits [63:0] are copied from a to bits [63:0] of the destination. Bit [0]=1: Bits [127:64] are copied from a to bits [63:0] of the destination. Bit [1]=0: Bits [63:0] are copied from b to bits [127:64] of the destination. Bit [1]=1: Bits [127:64] are copied from b to bits [127:64] of the destination. Bit [2]=0: Bits [191:128] are copied from a to bits [191:128] of the destination. Bit [2]=1: Bits [255:192] are copied from a to bits [191:128] of the destination. Bit [3]=0: Bits [191:128] are copied from b to bits [255:192] of the destination. Bit [3]=1: Bits [255:192] are copied from b to bits [255:192] of the destination. |
Definition at line 1596 of file avxintrin.h.
#define _mm256_shuffle_ps | ( | a, | |
b, | |||
mask | |||
) |
Selects 8 float values from the 256-bit operands of [8 x float], as specified by the immediate value operand.
The four selected elements in each operand are copied to the destination according to the bits specified in the immediate operand. The selected elements from the first 256-bit operand are copied to bits [63:0] and bits [191:128] of the destination, and the selected elements from the second 256-bit operand are copied to bits [127:64] and bits [255:192] of the destination. For example, if bits [7:0] of the immediate operand contain a value of 0xFF, the 256-bit destination vector would contain the following values: b[7], b[7], a[7], a[7], b[3], b[3], a[3], a[3].
This intrinsic corresponds to the VSHUFPS
/ SHUFPS instruction.
a | A 256-bit vector of [8 x float]. The four selected elements in this operand are copied to bits [63:0] and bits [191:128] in the destination, according to the bits specified in the immediate operand. |
b | A 256-bit vector of [8 x float]. The four selected elements in this operand are copied to bits [127:64] and bits [255:192] in the destination, according to the bits specified in the immediate operand. |
mask | An immediate value containing an 8-bit value specifying which elements to copy from a and b. Bits [3:0] specify the values copied from operand a. Bits [7:4] specify the values copied from operand b. The destinations within the 256-bit destination are assigned values as follows, according to the bit value assignments described below: Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the destination. Bits [3:2] are used to assign values to bits [63:32] and [191:160] in the destination. Bits [5:4] are used to assign values to bits [95:64] and [223:192] in the destination. Bits [7:6] are used to assign values to bits [127:96] and [255:224] in the destination. Bit value assignments: 00: Bits [31:0] and [159:128] are copied from the selected operand. 01: Bits [63:32] and [191:160] are copied from the selected operand. 10: Bits [95:64] and [223:192] are copied from the selected operand. 11: Bits [127:96] and [255:224] are copied from the selected operand. |
Definition at line 1543 of file avxintrin.h.
#define _mm_cmp_pd | ( | a, | |
b, | |||
c | |||
) |
Compares each of the corresponding double-precision values of two 128-bit vectors of [2 x double], using the operation specified by the immediate integer operand.
Returns a [2 x double] vector consisting of two doubles corresponding to the two comparison results: zero if the comparison is false, and all 1's if the comparison is true.
This intrinsic corresponds to the VCMPPD
/ CMPPD instruction.
a | A 128-bit vector of [2 x double]. |
b | A 128-bit vector of [2 x double]. |
c | An immediate integer operand, with bits [4:0] specifying which comparison operation to use: 00h, 08h, 10h, 18h: Equal 01h, 09h, 11h, 19h: Less than 02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped operands) 03h, 0Bh, 13h, 1Bh: Unordered 04h, 0Ch, 14h, 1Ch: Not equal 05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands) 06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal (swapped operands) 07h, 0Fh, 17h, 1Fh: Ordered |
Definition at line 1670 of file avxintrin.h.
#define _mm_cmp_ps | ( | a, | |
b, | |||
c | |||
) |
Compares each of the corresponding values of two 128-bit vectors of [4 x float], using the operation specified by the immediate integer operand.
Returns a [4 x float] vector consisting of four floats corresponding to the four comparison results: zero if the comparison is false, and all 1's if the comparison is true.
This intrinsic corresponds to the VCMPPS
/ CMPPS instruction.
a | A 128-bit vector of [4 x float]. |
b | A 128-bit vector of [4 x float]. |
c | An immediate integer operand, with bits [4:0] specifying which comparison operation to use: 00h, 08h, 10h, 18h: Equal 01h, 09h, 11h, 19h: Less than 02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped operands) 03h, 0Bh, 13h, 1Bh: Unordered 04h, 0Ch, 14h, 1Ch: Not equal 05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands) 06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal (swapped operands) 07h, 0Fh, 17h, 1Fh: Ordered |
Definition at line 1706 of file avxintrin.h.
#define _mm_cmp_sd | ( | a, | |
b, | |||
c | |||
) |
Compares each of the corresponding scalar double-precision values of two 128-bit vectors of [2 x double], using the operation specified by the immediate integer operand.
If the result is true, all 64 bits of the destination vector are set; otherwise they are cleared.
This intrinsic corresponds to the VCMPSD
/ CMPSD instruction.
a | A 128-bit vector of [2 x double]. |
b | A 128-bit vector of [2 x double]. |
c | An immediate integer operand, with bits [4:0] specifying which comparison operation to use: 00h, 08h, 10h, 18h: Equal 01h, 09h, 11h, 19h: Less than 02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped operands) 03h, 0Bh, 13h, 1Bh: Unordered 04h, 0Ch, 14h, 1Ch: Not equal 05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands) 06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal (swapped operands) 07h, 0Fh, 17h, 1Fh: Ordered |
Definition at line 1813 of file avxintrin.h.
#define _mm_cmp_ss | ( | a, | |
b, | |||
c | |||
) |
Compares each of the corresponding scalar values of two 128-bit vectors of [4 x float], using the operation specified by the immediate integer operand.
If the result is true, all 32 bits of the destination vector are set; otherwise they are cleared.
This intrinsic corresponds to the VCMPSS
/ CMPSS instruction.
a | A 128-bit vector of [4 x float]. |
b | A 128-bit vector of [4 x float]. |
c | An immediate integer operand, with bits [4:0] specifying which comparison operation to use: 00h, 08h, 10h, 18h: Equal 01h, 09h, 11h, 19h: Less than 02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped operands) 03h, 0Bh, 13h, 1Bh: Unordered 04h, 0Ch, 14h, 1Ch: Not equal 05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands) 06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal (swapped operands) 07h, 0Fh, 17h, 1Fh: Ordered |
Definition at line 1848 of file avxintrin.h.
#define _mm_permute_pd | ( | A, | |
C | |||
) |
Copies the values in a 128-bit vector of [2 x double] as specified by the immediate integer operand.
This intrinsic corresponds to the VPERMILPD
/ PERMILPD instruction.
A | A 128-bit vector of [2 x double]. |
C | An immediate integer operand specifying how the values are to be copied. Bit [0]: 0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector. 1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector. Bit [1]: 0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector. 1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector. |
Definition at line 1000 of file avxintrin.h.
#define _mm_permute_ps | ( | A, | |
C | |||
) |
Copies the values in a 128-bit vector of [4 x float] as specified by the immediate integer operand.
This intrinsic corresponds to the VPERMILPS
/ PERMILPS instruction.
A | A 128-bit vector of [4 x float]. |
C | An immediate integer operand specifying how the values are to be copied. Bits [1:0]: 00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector. 01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector. 10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector. 11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector. Bits [3:2]: 00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector. 01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector. 10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector. 11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector. Bits [5:4]: 00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector. 01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector. 10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector. 11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector. Bits [7:6]: 00: Bits [31:0] of the source are copied to bits [127:96] of the returned vector. 01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector. 10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector. 11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector. |
Definition at line 1101 of file avxintrin.h.
typedef long long __m256i __attribute__((__vector_size__(32))) |
Definition at line 31 of file avxintrin.h.
|
static |
Adds two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VADDPD
/ ADDPD instruction.
__a | A 256-bit vector of [4 x double] containing one of the source operands. |
__b | A 256-bit vector of [4 x double] containing one of the source operands. |
Definition at line 69 of file avxintrin.h.
References __b.
|
static |
Adds two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VADDPS
/ ADDPS instruction.
__a | A 256-bit vector of [8 x float] containing one of the source operands. |
__b | A 256-bit vector of [8 x float] containing one of the source operands. |
Definition at line 87 of file avxintrin.h.
References __b.
|
static |
Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VADDSUBPD
/ ADDSUBPD instruction.
__a | A 256-bit vector of [4 x double] containing the left source operand. |
__b | A 256-bit vector of [4 x double] containing the right source operand. |
Definition at line 142 of file avxintrin.h.
|
static |
Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VADDSUBPS
/ ADDSUBPS instruction.
__a | A 256-bit vector of [8 x float] containing the left source operand. |
__b | A 256-bit vector of [8 x float] containing the right source operand. |
Definition at line 161 of file avxintrin.h.
|
static |
Performs a bitwise AND of two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VANDPD
/ ANDPD instruction.
__a | A 256-bit vector of [4 x double] containing one of the source operands. |
__b | A 256-bit vector of [4 x double] containing one of the source operands. |
Definition at line 529 of file avxintrin.h.
References __b.
|
static |
Performs a bitwise AND of two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VANDPS
/ ANDPS instruction.
__a | A 256-bit vector of [8 x float] containing one of the source operands. |
__b | A 256-bit vector of [8 x float] containing one of the source operands. |
Definition at line 547 of file avxintrin.h.
References __b.
|
static |
Performs a bitwise AND of two 256-bit vectors of [4 x double], using the one's complement of the values contained in the first source operand.
This intrinsic corresponds to the VANDNPD
/ ANDNPD instruction.
__a | A 256-bit vector of [4 x double] containing the left source operand. The one's complement of this value is used in the bitwise AND. |
__b | A 256-bit vector of [4 x double] containing the right source operand. |
Definition at line 568 of file avxintrin.h.
References __b.
|
static |
Performs a bitwise AND of two 256-bit vectors of [8 x float], using the one's complement of the values contained in the first source operand.
This intrinsic corresponds to the VANDNPS
/ ANDNPS instruction.
__a | A 256-bit vector of [8 x float] containing the left source operand. The one's complement of this value is used in the bitwise AND. |
__b | A 256-bit vector of [8 x float] containing the right source operand. |
Definition at line 589 of file avxintrin.h.
References __b.
|
static |
Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the 256-bit vector operand.
This intrinsic corresponds to the VBLENDVPD
/ BLENDVPD instruction.
__a | A 256-bit vector of [4 x double]. |
__b | A 256-bit vector of [4 x double]. |
__c | A 256-bit vector operand, with mask bits 255, 191, 127, and 63 specifying how the values are to be copied. The position of the mask bit corresponds to the most significant bit of a copied value. When a mask bit is 0, the corresponding 64-bit element in operand __a is copied to the same position in the destination. When a mask bit is 1, the corresponding 64-bit element in operand __b is copied to the same position in the destination. |
Definition at line 1420 of file avxintrin.h.
|
static |
Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the 256-bit vector operand.
This intrinsic corresponds to the VBLENDVPS
/ BLENDVPS instruction.
__a | A 256-bit vector of [8 x float]. |
__b | A 256-bit vector of [8 x float]. |
__c | A 256-bit vector operand, with mask bits 255, 223, 191, 159, 127, 95, 63, and 31 specifying how the values are to be copied. The position of the mask bit corresponds to the most significant bit of a copied value. When a mask bit is 0, the corresponding 32-bit element in operand __a is copied to the same position in the destination. When a mask bit is 1, the corresponding 32-bit element in operand __b is copied to the same position in the destination. |
Definition at line 1448 of file avxintrin.h.
|
static |
Definition at line 2338 of file avxintrin.h.
|
static |
Definition at line 2344 of file avxintrin.h.
|
static |
Definition at line 2324 of file avxintrin.h.
|
static |
Definition at line 2331 of file avxintrin.h.
|
static |
Definition at line 2759 of file avxintrin.h.
Referenced by _mm256_loadu2_m128d().
|
static |
Definition at line 2741 of file avxintrin.h.
Referenced by _mm256_storeu2_m128d().
|
static |
Definition at line 2705 of file avxintrin.h.
|
static |
Definition at line 2711 of file avxintrin.h.
|
static |
Definition at line 2765 of file avxintrin.h.
Referenced by _mm256_loadu2_m128().
|
static |
Definition at line 2747 of file avxintrin.h.
Referenced by _mm256_storeu2_m128().
|
static |
Definition at line 2717 of file avxintrin.h.
|
static |
Definition at line 2723 of file avxintrin.h.
|
static |
Definition at line 2771 of file avxintrin.h.
Referenced by _mm256_loadu2_m128i().
|
static |
Definition at line 2735 of file avxintrin.h.
|
static |
Definition at line 2729 of file avxintrin.h.
|
static |
Definition at line 2753 of file avxintrin.h.
Referenced by _mm256_storeu2_m128i().
|
static |
Converts a vector of [4 x i32] into a vector of [4 x double].
This intrinsic corresponds to the VCVTDQ2PD
/ CVTDQ2PD instruction.
__a | A 128-bit integer vector of [4 x i32]. |
Definition at line 2060 of file avxintrin.h.
|
static |
Converts a vector of [8 x i32] into a vector of [8 x float].
This intrinsic corresponds to the VCVTDQ2PS
/ CVTDQ2PS instruction.
__a | A 256-bit integer vector. |
Definition at line 2075 of file avxintrin.h.
|
static |
Definition at line 2124 of file avxintrin.h.
|
static |
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x float].
This intrinsic corresponds to the VCVTPD2PS
/ CVTPD2PS instruction.
__a | A 256-bit vector of [4 x double]. |
Definition at line 2091 of file avxintrin.h.
|
static |
Converts a vector of [8 x float] into a vector of [8 x i32].
This intrinsic corresponds to the VCVTPS2DQ
/ CVTPS2DQ instruction.
__a | A 256-bit vector of [8 x float]. |
Definition at line 2106 of file avxintrin.h.
|
static |
Definition at line 2112 of file avxintrin.h.
|
static |
Definition at line 2136 of file avxintrin.h.
|
static |
Definition at line 2142 of file avxintrin.h.
|
static |
Definition at line 2149 of file avxintrin.h.
|
static |
Definition at line 2118 of file avxintrin.h.
|
static |
Definition at line 2130 of file avxintrin.h.
|
static |
Divides two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VDIVPD
/ DIVPD instruction.
__a | A 256-bit vector of [4 x double] containing the dividend. |
__b | A 256-bit vector of [4 x double] containing the divisor. |
Definition at line 179 of file avxintrin.h.
References __b.
|
static |
Divides two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VDIVPS
/ DIVPS instruction.
__a | A 256-bit vector of [8 x float] containing the dividend. |
__b | A 256-bit vector of [8 x float] containing the divisor. |
Definition at line 197 of file avxintrin.h.
References __b.
|
static |
Takes a [16 x i16] vector and returns the vector element value indexed by the immediate constant operand.
This intrinsic corresponds to the VEXTRACTF128+COMPOSITE
/ EXTRACTF128+COMPOSITE instruction.
__a | A 256-bit integer vector of [16 x i16]. |
__imm | An immediate integer operand with bits [3:0] determining which vector element is extracted and returned. |
Definition at line 1890 of file avxintrin.h.
References __b.
|
static |
Takes a [8 x i32] vector and returns the vector element value indexed by the immediate constant operand.
This intrinsic corresponds to the VEXTRACTF128+COMPOSITE
/ EXTRACTF128+COMPOSITE instruction.
__a | A 256-bit vector of [8 x i32]. |
__imm | An immediate integer operand with bits [2:0] determining which vector element is extracted and returned. |
Definition at line 1868 of file avxintrin.h.
References __b.
|
static |
Takes a [32 x i8] vector and returns the vector element value indexed by the immediate constant operand.
This intrinsic corresponds to the VEXTRACTF128+COMPOSITE
/ EXTRACTF128+COMPOSITE instruction.
__a | A 256-bit integer vector of [32 x i8]. |
__imm | An immediate integer operand with bits [4:0] determining which vector element is extracted and returned. |
Definition at line 1912 of file avxintrin.h.
References __b.
|
static |
Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VHADDPD
/ HADDPD instruction.
__a | A 256-bit vector of [4 x double] containing one of the source operands. The horizontal sums of the values are returned in the even-indexed elements of a vector of [4 x double]. |
__b | A 256-bit vector of [4 x double] containing one of the source operands. The horizontal sums of the values are returned in the odd-indexed elements of a vector of [4 x double]. |
Definition at line 685 of file avxintrin.h.
|
static |
Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VHADDPS
/ HADDPS instruction.
__a | A 256-bit vector of [8 x float] containing one of the source operands. The horizontal sums of the values are returned in the elements with index 0, 1, 4, 5 of a vector of [8 x float]. |
__b | A 256-bit vector of [8 x float] containing one of the source operands. The horizontal sums of the values are returned in the elements with index 2, 3, 6, 7 of a vector of [8 x float]. |
Definition at line 708 of file avxintrin.h.
|
static |
Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VHSUBPD
/ HSUBPD instruction.
__a | A 256-bit vector of [4 x double] containing one of the source operands. The horizontal differences between the values are returned in the even-indexed elements of a vector of [4 x double]. |
__b | A 256-bit vector of [4 x double] containing one of the source operands. The horizontal differences between the values are returned in the odd-indexed elements of a vector of [4 x double]. |
Definition at line 731 of file avxintrin.h.
|
static |
Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VHSUBPS
/ HSUBPS instruction.
__a | A 256-bit vector of [8 x float] containing one of the source operands. The horizontal differences between the values are returned in the elements with index 0, 1, 4, 5 of a vector of [8 x float]. |
__b | A 256-bit vector of [8 x float] containing one of the source operands. The horizontal differences between the values are returned in the elements with index 2, 3, 6, 7 of a vector of [8 x float]. |
Definition at line 754 of file avxintrin.h.
|
static |
Takes a [16 x i16] vector and replaces the vector element value indexed by the immediate constant operand with a new value.
Returns the modified vector.
This intrinsic corresponds to the VINSERTF128+COMPOSITE
/ INSERTF128+COMPOSITE instruction.
__a | A vector of [16 x i16] to be used by the insert operation. |
__b | An i16 integer value. The replacement value for the insert operation. |
__imm | An immediate integer specifying the index of the vector element to be replaced. |
Definition at line 1988 of file avxintrin.h.
|
static |
Takes a [8 x i32] vector and replaces the vector element value indexed by the immediate constant operand by a new value.
Returns the modified vector.
This intrinsic corresponds to the VINSERTF128+COMPOSITE
/ INSERTF128+COMPOSITE instruction.
__a | A vector of [8 x i32] to be used by the insert operation. |
__b | An integer value. The replacement value for the insert operation. |
__imm | An immediate integer specifying the index of the vector element to be replaced. |
Definition at line 1961 of file avxintrin.h.
|
static |
Takes a [32 x i8] vector and replaces the vector element value indexed by the immediate constant operand with a new value.
Returns the modified vector.
This intrinsic corresponds to the VINSERTF128+COMPOSITE
/ INSERTF128+COMPOSITE instruction.
__a | A vector of [32 x i8] to be used by the insert operation. |
__b | An i8 integer value. The replacement value for the insert operation. |
__imm | An immediate integer specifying the index of the vector element to be replaced. |
Definition at line 2014 of file avxintrin.h.
|
static |
Definition at line 2396 of file avxintrin.h.
|
static |
Definition at line 2351 of file avxintrin.h.
|
static |
Definition at line 2357 of file avxintrin.h.
|
static |
Definition at line 2381 of file avxintrin.h.
References __p.
|
static |
Definition at line 2842 of file avxintrin.h.
References _mm256_castps128_ps256(), _mm256_insertf128_ps, and _mm_loadu_ps().
|
static |
Definition at line 2849 of file avxintrin.h.
References _mm256_castpd128_pd256(), _mm256_insertf128_pd, and _mm_loadu_pd().
|
static |
Definition at line 2856 of file avxintrin.h.
References _mm256_castsi128_si256(), _mm256_insertf128_si256, and _mm_loadu_si128().
|
static |
Definition at line 2363 of file avxintrin.h.
|
static |
Definition at line 2372 of file avxintrin.h.
|
static |
Definition at line 2387 of file avxintrin.h.
|
static |
Definition at line 2455 of file avxintrin.h.
|
static |
Definition at line 2468 of file avxintrin.h.
|
static |
Definition at line 2487 of file avxintrin.h.
|
static |
Definition at line 2475 of file avxintrin.h.
|
static |
Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values.
This intrinsic corresponds to the VMAXPD
/ MAXPD instruction.
__a | A 256-bit vector of [4 x double] containing one of the operands. |
__b | A 256-bit vector of [4 x double] containing one of the operands. |
Definition at line 216 of file avxintrin.h.
|
static |
Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values.
This intrinsic corresponds to the VMAXPS
/ MAXPS instruction.
__a | A 256-bit vector of [8 x float] containing one of the operands. |
__b | A 256-bit vector of [8 x float] containing one of the operands. |
Definition at line 235 of file avxintrin.h.
|
static |
Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values.
This intrinsic corresponds to the VMINPD
/ MINPD instruction.
__a | A 256-bit vector of [4 x double] containing one of the operands. |
__b | A 256-bit vector of [4 x double] containing one of the operands. |
Definition at line 254 of file avxintrin.h.
|
static |
Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values.
This intrinsic corresponds to the VMINPS
/ MINPS instruction.
__a | A 256-bit vector of [8 x float] containing one of the operands. |
__b | A 256-bit vector of [8 x float] containing one of the operands. |
Definition at line 273 of file avxintrin.h.
|
static |
Definition at line 2168 of file avxintrin.h.
Referenced by _mm256_mask_movedup_pd(), and _mm256_maskz_movedup_pd().
|
static |
Definition at line 2156 of file avxintrin.h.
Referenced by _mm256_mask_movehdup_ps(), and _mm256_maskz_movehdup_ps().
|
static |
Definition at line 2162 of file avxintrin.h.
Referenced by _mm256_mask_moveldup_ps(), and _mm256_maskz_moveldup_ps().
|
static |
Definition at line 2291 of file avxintrin.h.
|
static |
Definition at line 2297 of file avxintrin.h.
|
static |
Multiplies two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VMULPD
/ MULPD instruction.
__a | A 256-bit vector of [4 x double] containing one of the operands. |
__b | A 256-bit vector of [4 x double] containing one of the operands. |
Definition at line 291 of file avxintrin.h.
References __b.
|
static |
Multiplies two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VMULPS
/ MULPS instruction.
__a | A 256-bit vector of [8 x float] containing one of the operands. |
__b | A 256-bit vector of [8 x float] containing one of the operands. |
Definition at line 309 of file avxintrin.h.
References __b.
|
static |
Performs a bitwise OR of two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VORPD
/ ORPD instruction.
__a | A 256-bit vector of [4 x double] containing one of the source operands. |
__b | A 256-bit vector of [4 x double] containing one of the source operands. |
Definition at line 607 of file avxintrin.h.
References __b.
|
static |
Performs a bitwise OR of two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VORPS
/ ORPS instruction.
__a | A 256-bit vector of [8 x float] containing one of the source operands. |
__b | A 256-bit vector of [8 x float] containing one of the source operands. |
Definition at line 625 of file avxintrin.h.
References __b.
|
static |
Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector operand.
This intrinsic corresponds to the VPERMILPD
/ PERMILPD instruction.
__a | A 256-bit vector of [4 x double]. |
__c | A 256-bit integer vector operand specifying how the values are to be copied. Bit [1]: 0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector. 1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector. Bit [65]: 0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector. 1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector. Bit [129]: 0: Bits [191:128] of the source are copied to bits [191:128] of the returned vector. 1: Bits [255:192] of the source are copied to bits [191:128] of the returned vector. Bit [193]: 0: Bits [191:128] of the source are copied to bits [255:192] of the returned vector. 1: Bits [255:192] of the source are copied to bits [255:192] of the returned vector. |
Definition at line 823 of file avxintrin.h.
|
static |
Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vector operand.
This intrinsic corresponds to the VPERMILPS
/ PERMILPS instruction.
__a | A 256-bit vector of [8 x float]. |
__c | A 256-bit integer vector operand specifying how the values are to be copied. Bits [1:0]: 00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector. 01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector. 10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector. 11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector. Bits [33:32]: 00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector. 01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector. 10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector. 11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector. Bits [65:64]: 00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector. 01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector. 10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector. 11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector. Bits [97:96]: 00: Bits [31:0] of the source are copied to bits [127:96] of the returned vector. 01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector. 10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector. 11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector. Bits [129:128]: 00: Bits [159:128] of the source are copied to bits [159:128] of the returned vector. 01: Bits [191:160] of the source are copied to bits [159:128] of the returned vector. 10: Bits [223:192] of the source are copied to bits [159:128] of the returned vector. 11: Bits [255:224] of the source are copied to bits [159:128] of the returned vector. Bits [161:160]: 00: Bits [159:128] of the source are copied to bits [191:160] of the returned vector. 01: Bits [191:160] of the source are copied to bits [191:160] of the returned vector. 10: Bits [223:192] of the source are copied to bits [191:160] of the returned vector. 11: Bits [255:224] of the source are copied to bits [191:160] of the returned vector. Bits [193:192]: 00: Bits [159:128] of the source are copied to bits [223:192] of the returned vector. 01: Bits [191:160] of the source are copied to bits [223:192] of the returned vector. 10: Bits [223:192] of the source are copied to bits [223:192] of the returned vector. 11: Bits [255:224] of the source are copied to bits [223:192] of the returned vector. Bits [225:224]: 00: Bits [159:128] of the source are copied to bits [255:224] of the returned vector. 01: Bits [191:160] of the source are copied to bits [255:224] of the returned vector. 10: Bits [223:192] of the source are copied to bits [255:224] of the returned vector. 11: Bits [255:224] of the source are copied to bits [255:224] of the returned vector. |
Definition at line 969 of file avxintrin.h.
|
static |
Calculates the reciprocals of the values in a 256-bit vector of [8 x float].
This intrinsic corresponds to the VRCPPS
/ RCPPS instruction.
__a | A 256-bit vector of [8 x float]. |
Definition at line 377 of file avxintrin.h.
|
static |
Calculates the reciprocal square roots of the values in a 256-bit vector of [8 x float].
This intrinsic corresponds to the VRSQRTPS
/ RSQRTPS instruction.
__a | A 256-bit vector of [8 x float]. |
Definition at line 360 of file avxintrin.h.
|
static |
Definition at line 2664 of file avxintrin.h.
|
static |
Definition at line 2658 of file avxintrin.h.
|
static |
Definition at line 2679 of file avxintrin.h.
|
static |
Definition at line 2671 of file avxintrin.h.
References __b.
|
static |
Definition at line 2646 of file avxintrin.h.
|
static |
Definition at line 2652 of file avxintrin.h.
|
static |
Definition at line 2557 of file avxintrin.h.
|
static |
Definition at line 2550 of file avxintrin.h.
|
static |
Definition at line 2585 of file avxintrin.h.
|
static |
Definition at line 2567 of file avxintrin.h.
|
static |
Definition at line 2897 of file avxintrin.h.
Referenced by _mm256_set_m128d(), _mm256_set_m128i(), _mm256_setr_m128(), _mm256_setr_m128d(), and _mm256_setr_m128i().
|
static |
Definition at line 2902 of file avxintrin.h.
References _mm256_set_m128().
|
static |
Definition at line 2907 of file avxintrin.h.
References _mm256_set_m128().
|
static |
Definition at line 2537 of file avxintrin.h.
|
static |
Definition at line 2543 of file avxintrin.h.
|
static |
Definition at line 2612 of file avxintrin.h.
|
static |
Definition at line 2605 of file avxintrin.h.
|
static |
Definition at line 2639 of file avxintrin.h.
|
static |
Definition at line 2622 of file avxintrin.h.
|
static |
Definition at line 2912 of file avxintrin.h.
References _mm256_set_m128().
|
static |
Definition at line 2917 of file avxintrin.h.
References _mm256_set_m128().
|
static |
Definition at line 2922 of file avxintrin.h.
References _mm256_set_m128().
|
static |
Definition at line 2592 of file avxintrin.h.
|
static |
Definition at line 2598 of file avxintrin.h.
|
static |
Definition at line 2686 of file avxintrin.h.
Referenced by _mm256_cvtepi64_pd(), _mm256_cvtepu32_pd(), _mm256_cvtepu64_pd(), _mm256_getexp_pd(), _mm256_maskz_add_pd(), _mm256_maskz_and_pd(), _mm256_maskz_andnot_pd(), _mm256_maskz_broadcastsd_pd(), _mm256_maskz_compress_pd(), _mm256_maskz_cvtepi32_pd(), _mm256_maskz_cvtepi64_pd(), _mm256_maskz_cvtepu32_pd(), _mm256_maskz_cvtepu64_pd(), _mm256_maskz_cvtps_pd(), _mm256_maskz_div_pd(), _mm256_maskz_expand_pd(), _mm256_maskz_expandloadu_pd(), _mm256_maskz_getexp_pd(), _mm256_maskz_load_pd(), _mm256_maskz_loadu_pd(), _mm256_maskz_max_pd(), _mm256_maskz_min_pd(), _mm256_maskz_mov_pd(), _mm256_maskz_movedup_pd(), _mm256_maskz_mul_pd(), _mm256_maskz_or_pd(), _mm256_maskz_permutevar_pd(), _mm256_maskz_permutexvar_pd(), _mm256_maskz_rcp14_pd(), _mm256_maskz_rsqrt14_pd(), _mm256_maskz_scalef_pd(), _mm256_maskz_sqrt_pd(), _mm256_maskz_sub_pd(), _mm256_maskz_unpackhi_pd(), _mm256_maskz_unpacklo_pd(), _mm256_maskz_xor_pd(), _mm256_rcp14_pd(), _mm256_rsqrt14_pd(), and _mm256_scalef_pd().
|
static |
Definition at line 2692 of file avxintrin.h.
Referenced by _mm256_cvtepu32_ps(), _mm256_getexp_ps(), _mm256_maskz_add_ps(), _mm256_maskz_and_ps(), _mm256_maskz_andnot_ps(), _mm256_maskz_broadcast_f32x2(), _mm256_maskz_broadcast_f32x4(), _mm256_maskz_broadcast_f64x2(), _mm256_maskz_broadcastss_ps(), _mm256_maskz_compress_ps(), _mm256_maskz_cvtepi32_ps(), _mm256_maskz_cvtepu32_ps(), _mm256_maskz_cvtph_ps(), _mm256_maskz_div_ps(), _mm256_maskz_expand_ps(), _mm256_maskz_expandloadu_ps(), _mm256_maskz_getexp_ps(), _mm256_maskz_load_ps(), _mm256_maskz_loadu_ps(), _mm256_maskz_max_ps(), _mm256_maskz_min_ps(), _mm256_maskz_mov_ps(), _mm256_maskz_movehdup_ps(), _mm256_maskz_moveldup_ps(), _mm256_maskz_mul_ps(), _mm256_maskz_or_ps(), _mm256_maskz_permutevar_ps(), _mm256_maskz_permutexvar_ps(), _mm256_maskz_rcp14_ps(), _mm256_maskz_rsqrt14_ps(), _mm256_maskz_scalef_ps(), _mm256_maskz_sqrt_ps(), _mm256_maskz_sub_ps(), _mm256_maskz_unpackhi_ps(), _mm256_maskz_unpacklo_ps(), _mm256_maskz_xor_ps(), _mm256_rcp14_ps(), _mm256_rsqrt14_ps(), _mm256_scalef_ps(), _mm512_cvtepi64_ps(), _mm512_cvtepu64_ps(), _mm512_maskz_cvtepi64_ps(), _mm512_maskz_cvtepu64_ps(), and _mm512_maskz_cvtpd_ps().
|
static |
Definition at line 2698 of file avxintrin.h.
Referenced by _mm256_abs_epi64(), _mm256_cvtpd_epi64(), _mm256_cvtpd_epu64(), _mm256_cvtps_epi64(), _mm256_cvtps_epu32(), _mm256_cvtps_epu64(), _mm256_cvttpd_epi64(), _mm256_cvttpd_epu64(), _mm256_cvttps_epi64(), _mm256_cvttps_epu32(), _mm256_cvttps_epu64(), _mm256_lzcnt_epi32(), _mm256_lzcnt_epi64(), _mm256_maskz_abs_epi16(), _mm256_maskz_abs_epi32(), _mm256_maskz_abs_epi64(), _mm256_maskz_abs_epi8(), _mm256_maskz_add_epi16(), _mm256_maskz_add_epi32(), _mm256_maskz_add_epi64(), _mm256_maskz_add_epi8(), _mm256_maskz_adds_epi16(), _mm256_maskz_adds_epi8(), _mm256_maskz_adds_epu16(), _mm256_maskz_adds_epu8(), _mm256_maskz_and_epi32(), _mm256_maskz_and_epi64(), _mm256_maskz_andnot_epi32(), _mm256_maskz_andnot_epi64(), _mm256_maskz_avg_epu16(), _mm256_maskz_avg_epu8(), _mm256_maskz_broadcast_i32x2(), _mm256_maskz_broadcast_i32x4(), _mm256_maskz_broadcast_i64x2(), _mm256_maskz_broadcastb_epi8(), _mm256_maskz_broadcastd_epi32(), _mm256_maskz_broadcastq_epi64(), _mm256_maskz_broadcastw_epi16(), _mm256_maskz_compress_epi32(), _mm256_maskz_compress_epi64(), _mm256_maskz_conflict_epi32(), _mm256_maskz_conflict_epi64(), _mm256_maskz_cvtepi16_epi32(), _mm256_maskz_cvtepi16_epi64(), _mm256_maskz_cvtepi32_epi64(), _mm256_maskz_cvtepi8_epi16(), _mm256_maskz_cvtepi8_epi32(), _mm256_maskz_cvtepi8_epi64(), _mm256_maskz_cvtepu16_epi32(), _mm256_maskz_cvtepu16_epi64(), _mm256_maskz_cvtepu32_epi64(), _mm256_maskz_cvtepu8_epi16(), _mm256_maskz_cvtepu8_epi32(), _mm256_maskz_cvtepu8_epi64(), _mm256_maskz_cvtpd_epi64(), _mm256_maskz_cvtpd_epu64(), _mm256_maskz_cvtps_epi32(), _mm256_maskz_cvtps_epi64(), _mm256_maskz_cvtps_epu32(), _mm256_maskz_cvtps_epu64(), _mm256_maskz_cvttpd_epi64(), _mm256_maskz_cvttpd_epu64(), _mm256_maskz_cvttps_epi32(), _mm256_maskz_cvttps_epi64(), _mm256_maskz_cvttps_epu32(), _mm256_maskz_cvttps_epu64(), _mm256_maskz_expand_epi32(), _mm256_maskz_expand_epi64(), _mm256_maskz_expandloadu_epi32(), _mm256_maskz_expandloadu_epi64(), _mm256_maskz_load_epi32(), _mm256_maskz_load_epi64(), _mm256_maskz_loadu_epi16(), _mm256_maskz_loadu_epi32(), _mm256_maskz_loadu_epi64(), _mm256_maskz_loadu_epi8(), _mm256_maskz_lzcnt_epi32(), _mm256_maskz_lzcnt_epi64(), _mm256_maskz_madd_epi16(), _mm256_maskz_maddubs_epi16(), _mm256_maskz_max_epi16(), _mm256_maskz_max_epi32(), _mm256_maskz_max_epi64(), _mm256_maskz_max_epi8(), _mm256_maskz_max_epu16(), _mm256_maskz_max_epu32(), _mm256_maskz_max_epu64(), _mm256_maskz_max_epu8(), _mm256_maskz_min_epi16(), _mm256_maskz_min_epi32(), _mm256_maskz_min_epi64(), _mm256_maskz_min_epi8(), _mm256_maskz_min_epu16(), _mm256_maskz_min_epu32(), _mm256_maskz_min_epu64(), _mm256_maskz_min_epu8(), _mm256_maskz_mov_epi16(), _mm256_maskz_mov_epi32(), _mm256_maskz_mov_epi64(), _mm256_maskz_mov_epi8(), _mm256_maskz_mul_epi32(), _mm256_maskz_mul_epu32(), _mm256_maskz_mulhi_epi16(), _mm256_maskz_mulhi_epu16(), _mm256_maskz_mulhrs_epi16(), _mm256_maskz_mullo_epi16(), _mm256_maskz_mullo_epi32(), _mm256_maskz_mullo_epi64(), _mm256_maskz_multishift_epi64_epi8(), _mm256_maskz_or_epi32(), _mm256_maskz_or_epi64(), _mm256_maskz_packs_epi16(), _mm256_maskz_packs_epi32(), _mm256_maskz_packus_epi16(), _mm256_maskz_packus_epi32(), _mm256_maskz_permutexvar_epi16(), _mm256_maskz_permutexvar_epi32(), _mm256_maskz_permutexvar_epi64(), _mm256_maskz_permutexvar_epi8(), _mm256_maskz_rolv_epi32(), _mm256_maskz_rolv_epi64(), _mm256_maskz_rorv_epi32(), _mm256_maskz_rorv_epi64(), _mm256_maskz_set1_epi16(), _mm256_maskz_set1_epi64(), _mm256_maskz_set1_epi8(), _mm256_maskz_shuffle_epi8(), _mm256_maskz_sll_epi16(), _mm256_maskz_sll_epi32(), _mm256_maskz_sll_epi64(), _mm256_maskz_sllv_epi16(), _mm256_maskz_sllv_epi32(), _mm256_maskz_sllv_epi64(), _mm256_maskz_sra_epi16(), _mm256_maskz_sra_epi32(), _mm256_maskz_sra_epi64(), _mm256_maskz_srav_epi16(), _mm256_maskz_srav_epi32(), _mm256_maskz_srav_epi64(), _mm256_maskz_srl_epi16(), _mm256_maskz_srl_epi32(), _mm256_maskz_srl_epi64(), _mm256_maskz_srlv_epi16(), _mm256_maskz_srlv_epi32(), _mm256_maskz_srlv_epi64(), _mm256_maskz_sub_epi16(), _mm256_maskz_sub_epi32(), _mm256_maskz_sub_epi64(), _mm256_maskz_sub_epi8(), _mm256_maskz_subs_epi16(), _mm256_maskz_subs_epi8(), _mm256_maskz_subs_epu16(), _mm256_maskz_subs_epu8(), _mm256_maskz_unpackhi_epi16(), _mm256_maskz_unpackhi_epi32(), _mm256_maskz_unpackhi_epi64(), _mm256_maskz_unpackhi_epi8(), _mm256_maskz_unpacklo_epi16(), _mm256_maskz_unpacklo_epi32(), _mm256_maskz_unpacklo_epi64(), _mm256_maskz_unpacklo_epi8(), _mm256_maskz_xor_epi32(), _mm256_maskz_xor_epi64(), _mm256_max_epi64(), _mm256_max_epu64(), _mm256_min_epi64(), _mm256_min_epu64(), _mm256_rolv_epi32(), _mm256_rolv_epi64(), _mm256_rorv_epi32(), _mm256_rorv_epi64(), _mm256_sllv_epi16(), _mm256_sra_epi64(), _mm256_srav_epi16(), _mm256_srav_epi64(), _mm256_srlv_epi16(), _mm512_cvtepi16_epi8(), _mm512_cvtsepi16_epi8(), _mm512_cvttpd_epi32(), _mm512_cvtusepi16_epi8(), _mm512_maskz_cvtepi16_epi8(), _mm512_maskz_cvtepi32_epi16(), _mm512_maskz_cvtepi64_epi32(), _mm512_maskz_cvtpd_epi32(), _mm512_maskz_cvtpd_epu32(), _mm512_maskz_cvtsepi16_epi8(), _mm512_maskz_cvtsepi32_epi16(), _mm512_maskz_cvtsepi64_epi32(), _mm512_maskz_cvttpd_epi32(), _mm512_maskz_cvttpd_epu32(), _mm512_maskz_cvtusepi16_epi8(), _mm512_maskz_cvtusepi32_epi16(), and _mm512_maskz_cvtusepi64_epi32().
|
static |
Calculates the square roots of the values in a 256-bit vector of [4 x double].
This intrinsic corresponds to the VSQRTPD
/ SQRTPD instruction.
__a | A 256-bit vector of [4 x double]. |
Definition at line 326 of file avxintrin.h.
|
static |
Calculates the square roots of the values in a 256-bit vector of [8 x float].
This intrinsic corresponds to the VSQRTPS
/ SQRTPS instruction.
__a | A 256-bit vector of [8 x float]. |
Definition at line 343 of file avxintrin.h.
|
static |
Definition at line 2403 of file avxintrin.h.
|
static |
Definition at line 2409 of file avxintrin.h.
|
static |
Definition at line 2433 of file avxintrin.h.
|
static |
Definition at line 2864 of file avxintrin.h.
References _mm256_castps256_ps128(), _mm256_extractf128_ps, and _mm_storeu_ps().
|
static |
Definition at line 2875 of file avxintrin.h.
References _mm256_castpd256_pd128(), _mm256_extractf128_pd, and _mm_storeu_pd().
|
static |
Definition at line 2886 of file avxintrin.h.
References _mm256_castsi256_si128(), _mm256_extractf128_si256, and _mm_storeu_si128().
|
static |
Definition at line 2415 of file avxintrin.h.
|
static |
Definition at line 2424 of file avxintrin.h.
|
static |
Definition at line 2439 of file avxintrin.h.
|
static |
Definition at line 2506 of file avxintrin.h.
|
static |
Definition at line 2512 of file avxintrin.h.
|
static |
Definition at line 2500 of file avxintrin.h.
|
static |
Subtracts two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VSUBPD
/ SUBPD instruction.
__a | A 256-bit vector of [4 x double] containing the minuend. |
__b | A 256-bit vector of [4 x double] containing the subtrahend. |
Definition at line 105 of file avxintrin.h.
References __b.
|
static |
Subtracts two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VSUBPS
/ SUBPS instruction.
__a | A 256-bit vector of [8 x float] containing the minuend. |
__b | A 256-bit vector of [8 x float] containing the subtrahend. |
Definition at line 123 of file avxintrin.h.
References __b.
|
static |
Definition at line 2242 of file avxintrin.h.
|
static |
Definition at line 2260 of file avxintrin.h.
|
static |
Definition at line 2278 of file avxintrin.h.
|
static |
Definition at line 2248 of file avxintrin.h.
|
static |
Definition at line 2266 of file avxintrin.h.
|
static |
Definition at line 2284 of file avxintrin.h.
|
static |
Definition at line 2236 of file avxintrin.h.
|
static |
Definition at line 2254 of file avxintrin.h.
|
static |
Definition at line 2272 of file avxintrin.h.
|
static |
Definition at line 2519 of file avxintrin.h.
Referenced by _mm256_broadcast_f32x4(), and _mm256_broadcast_f64x2().
|
static |
Definition at line 2525 of file avxintrin.h.
Referenced by _mm256_broadcast_f32x2(), and _mm512_cvtpd_ps().
|
static |
Definition at line 2531 of file avxintrin.h.
Referenced by _mm256_broadcast_i32x2(), _mm256_broadcast_i32x4(), _mm256_broadcast_i64x2(), _mm256_conflict_epi32(), _mm256_conflict_epi64(), _mm256_multishift_epi64_epi8(), _mm256_permutexvar_epi16(), _mm256_permutexvar_epi32(), _mm256_permutexvar_epi64(), _mm256_permutexvar_epi8(), _mm256_permutexvar_pd(), _mm256_permutexvar_ps(), _mm512_cvtepi32_epi16(), _mm512_cvtepi64_epi32(), _mm512_cvtpd_epi32(), _mm512_cvtpd_epu32(), _mm512_cvtsepi32_epi16(), _mm512_cvtsepi64_epi32(), _mm512_cvttpd_epu32(), _mm512_cvtusepi32_epi16(), and _mm512_cvtusepi64_epi32().
|
static |
Definition at line 2175 of file avxintrin.h.
Referenced by _mm256_mask_unpackhi_pd(), and _mm256_maskz_unpackhi_pd().
|
static |
Definition at line 2187 of file avxintrin.h.
Referenced by _mm256_mask_unpackhi_ps(), and _mm256_maskz_unpackhi_ps().
|
static |
Definition at line 2181 of file avxintrin.h.
Referenced by _mm256_mask_unpacklo_pd(), and _mm256_maskz_unpacklo_pd().
|
static |
Definition at line 2193 of file avxintrin.h.
Referenced by _mm256_mask_unpacklo_ps(), and _mm256_maskz_unpacklo_ps().
|
static |
Performs a bitwise XOR of two 256-bit vectors of [4 x double].
This intrinsic corresponds to the VXORPD
/ XORPD instruction.
__a | A 256-bit vector of [4 x double] containing one of the source operands. |
__b | A 256-bit vector of [4 x double] containing one of the source operands. |
Definition at line 643 of file avxintrin.h.
References __b.
|
static |
Performs a bitwise XOR of two 256-bit vectors of [8 x float].
This intrinsic corresponds to the VXORPS
/ XORPS instruction.
__a | A 256-bit vector of [8 x float] containing one of the source operands. |
__b | A 256-bit vector of [8 x float] containing one of the source operands. |
Definition at line 661 of file avxintrin.h.
References __b.
|
static |
Definition at line 2304 of file avxintrin.h.
|
static |
Definition at line 2310 of file avxintrin.h.
|
static |
Definition at line 2317 of file avxintrin.h.
|
static |
Definition at line 2449 of file avxintrin.h.
|
static |
Definition at line 2462 of file avxintrin.h.
|
static |
Definition at line 2481 of file avxintrin.h.
|
static |
Definition at line 2493 of file avxintrin.h.
|
static |
Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector operand.
This intrinsic corresponds to the VPERMILPD
/ PERMILPD instruction.
__a | A 128-bit vector of [2 x double]. |
__c | A 128-bit integer vector operand specifying how the values are to be copied. Bit [1]: 0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector. 1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector. Bit [65]: 0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector. 1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector. |
Definition at line 784 of file avxintrin.h.
|
static |
Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vector operand.
This intrinsic corresponds to the VPERMILPS
/ PERMILPS instruction.
__a | A 128-bit vector of [4 x float]. |
__c | A 128-bit integer vector operand specifying how the values are to be copied. Bits [1:0]: 00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector. 01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector. 10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector. 11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector. Bits [33:32]: 00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector. 01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector. 10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector. 11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector. Bits [65:64]: 00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector. 01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector. 10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector. 11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector. Bits [97:96]: 00: Bits [31:0] of the source are copied to bits [127:96] of the returned vector. 01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector. 10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector. 11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector. |
Definition at line 878 of file avxintrin.h.
|
static |
Definition at line 2206 of file avxintrin.h.
|
static |
Definition at line 2224 of file avxintrin.h.
|
static |
Definition at line 2212 of file avxintrin.h.
|
static |
Definition at line 2230 of file avxintrin.h.
|
static |
Definition at line 2200 of file avxintrin.h.
|
static |
Definition at line 2218 of file avxintrin.h.