clang  3.9.0
Macros | Typedefs | Functions
avxintrin.h File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define __DEFAULT_FN_ATTRS   __attribute__((__always_inline__, __nodebug__, __target__("avx")))
 
#define _mm256_round_pd(V, M)
 Rounds the values in a 256-bit vector of [4 x double] as specified by the byte operand. More...
 
#define _mm256_round_ps(V, M)
 Rounds the values stored in a 256-bit vector of [8 x float] as specified by the byte operand. More...
 
#define _mm256_ceil_pd(V)   _mm256_round_pd((V), _MM_FROUND_CEIL)
 Rounds up the values stored in a 256-bit vector of [4 x double]. More...
 
#define _mm256_floor_pd(V)   _mm256_round_pd((V), _MM_FROUND_FLOOR)
 Rounds down the values stored in a 256-bit vector of [4 x double]. More...
 
#define _mm256_ceil_ps(V)   _mm256_round_ps((V), _MM_FROUND_CEIL)
 Rounds up the values stored in a 256-bit vector of [8 x float]. More...
 
#define _mm256_floor_ps(V)   _mm256_round_ps((V), _MM_FROUND_FLOOR)
 Rounds down the values stored in a 256-bit vector of [8 x float]. More...
 
#define _mm_permute_pd(A, C)
 Copies the values in a 128-bit vector of [2 x double] as specified by the immediate integer operand. More...
 
#define _mm256_permute_pd(A, C)
 Copies the values in a 256-bit vector of [4 x double] as specified by the immediate integer operand. More...
 
#define _mm_permute_ps(A, C)
 Copies the values in a 128-bit vector of [4 x float] as specified by the immediate integer operand. More...
 
#define _mm256_permute_ps(A, C)
 Copies the values in a 256-bit vector of [8 x float] as specified by the immediate integer operand. More...
 
#define _mm256_permute2f128_pd(V1, V2, M)
 Permutes 128-bit data values stored in two 256-bit vectors of [4 x double], as specified by the immediate integer operand. More...
 
#define _mm256_permute2f128_ps(V1, V2, M)
 Permutes 128-bit data values stored in two 256-bit vectors of [8 x float], as specified by the immediate integer operand. More...
 
#define _mm256_permute2f128_si256(V1, V2, M)
 Permutes 128-bit data values stored in two 256-bit integer vectors, as specified by the immediate integer operand. More...
 
#define _mm256_blend_pd(V1, V2, M)
 Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the immediate integer operand. More...
 
#define _mm256_blend_ps(V1, V2, M)
 Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the immediate integer operand. More...
 
#define _mm256_dp_ps(V1, V2, M)
 Computes two dot products in parallel, using the lower and upper halves of two [8 x float] vectors as input to the two computations, and returning the two dot products in the lower and upper halves of the [8 x float] result. More...
 
#define _mm256_shuffle_ps(a, b, mask)
 Selects 8 float values from the 256-bit operands of [8 x float], as specified by the immediate value operand. More...
 
#define _mm256_shuffle_pd(a, b, mask)
 Selects four double-precision values from the 256-bit operands of [4 x double], as specified by the immediate value operand. More...
 
#define _CMP_EQ_OQ   0x00 /* Equal (ordered, non-signaling) */
 
#define _CMP_LT_OS   0x01 /* Less-than (ordered, signaling) */
 
#define _CMP_LE_OS   0x02 /* Less-than-or-equal (ordered, signaling) */
 
#define _CMP_UNORD_Q   0x03 /* Unordered (non-signaling) */
 
#define _CMP_NEQ_UQ   0x04 /* Not-equal (unordered, non-signaling) */
 
#define _CMP_NLT_US   0x05 /* Not-less-than (unordered, signaling) */
 
#define _CMP_NLE_US   0x06 /* Not-less-than-or-equal (unordered, signaling) */
 
#define _CMP_ORD_Q   0x07 /* Ordered (nonsignaling) */
 
#define _CMP_EQ_UQ   0x08 /* Equal (unordered, non-signaling) */
 
#define _CMP_NGE_US   0x09 /* Not-greater-than-or-equal (unord, signaling) */
 
#define _CMP_NGT_US   0x0a /* Not-greater-than (unordered, signaling) */
 
#define _CMP_FALSE_OQ   0x0b /* False (ordered, non-signaling) */
 
#define _CMP_NEQ_OQ   0x0c /* Not-equal (ordered, non-signaling) */
 
#define _CMP_GE_OS   0x0d /* Greater-than-or-equal (ordered, signaling) */
 
#define _CMP_GT_OS   0x0e /* Greater-than (ordered, signaling) */
 
#define _CMP_TRUE_UQ   0x0f /* True (unordered, non-signaling) */
 
#define _CMP_EQ_OS   0x10 /* Equal (ordered, signaling) */
 
#define _CMP_LT_OQ   0x11 /* Less-than (ordered, non-signaling) */
 
#define _CMP_LE_OQ   0x12 /* Less-than-or-equal (ordered, non-signaling) */
 
#define _CMP_UNORD_S   0x13 /* Unordered (signaling) */
 
#define _CMP_NEQ_US   0x14 /* Not-equal (unordered, signaling) */
 
#define _CMP_NLT_UQ   0x15 /* Not-less-than (unordered, non-signaling) */
 
#define _CMP_NLE_UQ   0x16 /* Not-less-than-or-equal (unord, non-signaling) */
 
#define _CMP_ORD_S   0x17 /* Ordered (signaling) */
 
#define _CMP_EQ_US   0x18 /* Equal (unordered, signaling) */
 
#define _CMP_NGE_UQ   0x19 /* Not-greater-than-or-equal (unord, non-sign) */
 
#define _CMP_NGT_UQ   0x1a /* Not-greater-than (unordered, non-signaling) */
 
#define _CMP_FALSE_OS   0x1b /* False (ordered, signaling) */
 
#define _CMP_NEQ_OS   0x1c /* Not-equal (ordered, signaling) */
 
#define _CMP_GE_OQ   0x1d /* Greater-than-or-equal (ordered, non-signaling) */
 
#define _CMP_GT_OQ   0x1e /* Greater-than (ordered, non-signaling) */
 
#define _CMP_TRUE_US   0x1f /* True (unordered, signaling) */
 
#define _mm_cmp_pd(a, b, c)
 Compares each of the corresponding double-precision values of two 128-bit vectors of [2 x double], using the operation specified by the immediate integer operand. More...
 
#define _mm_cmp_ps(a, b, c)
 Compares each of the corresponding values of two 128-bit vectors of [4 x float], using the operation specified by the immediate integer operand. More...
 
#define _mm256_cmp_pd(a, b, c)
 Compares each of the corresponding double-precision values of two 256-bit vectors of [4 x double], using the operation specified by the immediate integer operand. More...
 
#define _mm256_cmp_ps(a, b, c)
 Compares each of the corresponding values of two 256-bit vectors of [8 x float], using the operation specified by the immediate integer operand. More...
 
#define _mm_cmp_sd(a, b, c)
 Compares each of the corresponding scalar double-precision values of two 128-bit vectors of [2 x double], using the operation specified by the immediate integer operand. More...
 
#define _mm_cmp_ss(a, b, c)
 Compares each of the corresponding scalar values of two 128-bit vectors of [4 x float], using the operation specified by the immediate integer operand. More...
 
#define _mm256_insertf128_ps(V1, V2, M)
 
#define _mm256_insertf128_pd(V1, V2, M)
 
#define _mm256_insertf128_si256(V1, V2, M)
 
#define _mm256_extractf128_ps(V, M)
 
#define _mm256_extractf128_pd(V, M)
 
#define _mm256_extractf128_si256(V, M)
 

Typedefs

typedef double __v4df __attribute__ ((__vector_size__(32)))
 

Functions

static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_add_pd (__m256d __a, __m256d __b)
 Adds two 256-bit vectors of [4 x double]. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_add_ps (__m256 __a, __m256 __b)
 Adds two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_sub_pd (__m256d __a, __m256d __b)
 Subtracts two 256-bit vectors of [4 x double]. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_sub_ps (__m256 __a, __m256 __b)
 Subtracts two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_addsub_pd (__m256d __a, __m256d __b)
 Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [4 x double]. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_addsub_ps (__m256 __a, __m256 __b)
 Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_div_pd (__m256d __a, __m256d __b)
 Divides two 256-bit vectors of [4 x double]. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_div_ps (__m256 __a, __m256 __b)
 Divides two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_max_pd (__m256d __a, __m256d __b)
 Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_max_ps (__m256 __a, __m256 __b)
 Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_min_pd (__m256d __a, __m256d __b)
 Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_min_ps (__m256 __a, __m256 __b)
 Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_mul_pd (__m256d __a, __m256d __b)
 Multiplies two 256-bit vectors of [4 x double]. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_mul_ps (__m256 __a, __m256 __b)
 Multiplies two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_sqrt_pd (__m256d __a)
 Calculates the square roots of the values in a 256-bit vector of [4 x double]. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_sqrt_ps (__m256 __a)
 Calculates the square roots of the values in a 256-bit vector of [8 x float]. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_rsqrt_ps (__m256 __a)
 Calculates the reciprocal square roots of the values in a 256-bit vector of [8 x float]. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_rcp_ps (__m256 __a)
 Calculates the reciprocals of the values in a 256-bit vector of [8 x float]. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_and_pd (__m256d __a, __m256d __b)
 Performs a bitwise AND of two 256-bit vectors of [4 x double]. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_and_ps (__m256 __a, __m256 __b)
 Performs a bitwise AND of two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_andnot_pd (__m256d __a, __m256d __b)
 Performs a bitwise AND of two 256-bit vectors of [4 x double], using the one's complement of the values contained in the first source operand. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_andnot_ps (__m256 __a, __m256 __b)
 Performs a bitwise AND of two 256-bit vectors of [8 x float], using the one's complement of the values contained in the first source operand. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_or_pd (__m256d __a, __m256d __b)
 Performs a bitwise OR of two 256-bit vectors of [4 x double]. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_or_ps (__m256 __a, __m256 __b)
 Performs a bitwise OR of two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_xor_pd (__m256d __a, __m256d __b)
 Performs a bitwise XOR of two 256-bit vectors of [4 x double]. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_xor_ps (__m256 __a, __m256 __b)
 Performs a bitwise XOR of two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_hadd_pd (__m256d __a, __m256d __b)
 Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [4 x double]. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_hadd_ps (__m256 __a, __m256 __b)
 Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [8 x float]. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_hsub_pd (__m256d __a, __m256d __b)
 Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [4 x double]. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_hsub_ps (__m256 __a, __m256 __b)
 Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [8 x float]. More...
 
static __inline __m128d
__DEFAULT_FN_ATTRS 
_mm_permutevar_pd (__m128d __a, __m128i __c)
 Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector operand. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_permutevar_pd (__m256d __a, __m256i __c)
 Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector operand. More...
 
static __inline __m128
__DEFAULT_FN_ATTRS 
_mm_permutevar_ps (__m128 __a, __m128i __c)
 Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vector operand. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_permutevar_ps (__m256 __a, __m256i __c)
 Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vector operand. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_blendv_pd (__m256d __a, __m256d __b, __m256d __c)
 Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the 256-bit vector operand. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_blendv_ps (__m256 __a, __m256 __b, __m256 __c)
 Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the 256-bit vector operand. More...
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm256_extract_epi32 (__m256i __a, const int __imm)
 Takes a [8 x i32] vector and returns the vector element value indexed by the immediate constant operand. More...
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm256_extract_epi16 (__m256i __a, const int __imm)
 Takes a [16 x i16] vector and returns the vector element value indexed by the immediate constant operand. More...
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm256_extract_epi8 (__m256i __a, const int __imm)
 Takes a [32 x i8] vector and returns the vector element value indexed by the immediate constant operand. More...
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_insert_epi32 (__m256i __a, int __b, int const __imm)
 Takes a [8 x i32] vector and replaces the vector element value indexed by the immediate constant operand by a new value. More...
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_insert_epi16 (__m256i __a, int __b, int const __imm)
 Takes a [16 x i16] vector and replaces the vector element value indexed by the immediate constant operand with a new value. More...
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_insert_epi8 (__m256i __a, int __b, int const __imm)
 Takes a [32 x i8] vector and replaces the vector element value indexed by the immediate constant operand with a new value. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_cvtepi32_pd (__m128i __a)
 Converts a vector of [4 x i32] into a vector of [4 x double]. More...
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_cvtepi32_ps (__m256i __a)
 Converts a vector of [8 x i32] into a vector of [8 x float]. More...
 
static __inline __m128
__DEFAULT_FN_ATTRS 
_mm256_cvtpd_ps (__m256d __a)
 Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x float]. More...
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_cvtps_epi32 (__m256 __a)
 Converts a vector of [8 x float] into a vector of [8 x i32]. More...
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_cvtps_pd (__m128 __a)
 
static __inline __m128i
__DEFAULT_FN_ATTRS 
_mm256_cvttpd_epi32 (__m256d __a)
 
static __inline __m128i
__DEFAULT_FN_ATTRS 
_mm256_cvtpd_epi32 (__m256d __a)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_cvttps_epi32 (__m256 __a)
 
static __inline double
__DEFAULT_FN_ATTRS 
_mm256_cvtsd_f64 (__m256d __a)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm256_cvtsi256_si32 (__m256i __a)
 
static __inline float
__DEFAULT_FN_ATTRS 
_mm256_cvtss_f32 (__m256 __a)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_movehdup_ps (__m256 __a)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_moveldup_ps (__m256 __a)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_movedup_pd (__m256d __a)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_unpackhi_pd (__m256d __a, __m256d __b)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_unpacklo_pd (__m256d __a, __m256d __b)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_unpackhi_ps (__m256 __a, __m256 __b)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_unpacklo_ps (__m256 __a, __m256 __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm_testz_pd (__m128d __a, __m128d __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm_testc_pd (__m128d __a, __m128d __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm_testnzc_pd (__m128d __a, __m128d __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm_testz_ps (__m128 __a, __m128 __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm_testc_ps (__m128 __a, __m128 __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm_testnzc_ps (__m128 __a, __m128 __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm256_testz_pd (__m256d __a, __m256d __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm256_testc_pd (__m256d __a, __m256d __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm256_testnzc_pd (__m256d __a, __m256d __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm256_testz_ps (__m256 __a, __m256 __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm256_testc_ps (__m256 __a, __m256 __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm256_testnzc_ps (__m256 __a, __m256 __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm256_testz_si256 (__m256i __a, __m256i __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm256_testc_si256 (__m256i __a, __m256i __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm256_testnzc_si256 (__m256i __a, __m256i __b)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm256_movemask_pd (__m256d __a)
 
static __inline int
__DEFAULT_FN_ATTRS 
_mm256_movemask_ps (__m256 __a)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_zeroall (void)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_zeroupper (void)
 
static __inline __m128
__DEFAULT_FN_ATTRS 
_mm_broadcast_ss (float const *__a)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_broadcast_sd (double const *__a)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_broadcast_ss (float const *__a)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_broadcast_pd (__m128d const *__a)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_broadcast_ps (__m128 const *__a)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_load_pd (double const *__p)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_load_ps (float const *__p)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_loadu_pd (double const *__p)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_loadu_ps (float const *__p)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_load_si256 (__m256i const *__p)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_loadu_si256 (__m256i const *__p)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_lddqu_si256 (__m256i const *__p)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_store_pd (double *__p, __m256d __a)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_store_ps (float *__p, __m256 __a)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_storeu_pd (double *__p, __m256d __a)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_storeu_ps (float *__p, __m256 __a)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_store_si256 (__m256i *__p, __m256i __a)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_storeu_si256 (__m256i *__p, __m256i __a)
 
static __inline __m128d
__DEFAULT_FN_ATTRS 
_mm_maskload_pd (double const *__p, __m128i __m)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_maskload_pd (double const *__p, __m256i __m)
 
static __inline __m128
__DEFAULT_FN_ATTRS 
_mm_maskload_ps (float const *__p, __m128i __m)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_maskload_ps (float const *__p, __m256i __m)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_maskstore_ps (float *__p, __m256i __m, __m256 __a)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm_maskstore_pd (double *__p, __m128i __m, __m128d __a)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_maskstore_pd (double *__p, __m256i __m, __m256d __a)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm_maskstore_ps (float *__p, __m128i __m, __m128 __a)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_stream_si256 (__m256i *__a, __m256i __b)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_stream_pd (double *__a, __m256d __b)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_stream_ps (float *__p, __m256 __a)
 
static __inline__ __m256d
__DEFAULT_FN_ATTRS 
_mm256_undefined_pd (void)
 
static __inline__ __m256
__DEFAULT_FN_ATTRS 
_mm256_undefined_ps (void)
 
static __inline__ __m256i
__DEFAULT_FN_ATTRS 
_mm256_undefined_si256 (void)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_set_pd (double __a, double __b, double __c, double __d)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_set_ps (float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_set_epi32 (int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_set_epi16 (short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, short __w03, short __w02, short __w01, short __w00)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_set_epi8 (char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, char __b19, char __b18, char __b17, char __b16, char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b09, char __b08, char __b07, char __b06, char __b05, char __b04, char __b03, char __b02, char __b01, char __b00)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_set_epi64x (long long __a, long long __b, long long __c, long long __d)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_setr_pd (double __a, double __b, double __c, double __d)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_setr_ps (float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_setr_epi32 (int __i0, int __i1, int __i2, int __i3, int __i4, int __i5, int __i6, int __i7)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_setr_epi16 (short __w15, short __w14, short __w13, short __w12, short __w11, short __w10, short __w09, short __w08, short __w07, short __w06, short __w05, short __w04, short __w03, short __w02, short __w01, short __w00)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_setr_epi8 (char __b31, char __b30, char __b29, char __b28, char __b27, char __b26, char __b25, char __b24, char __b23, char __b22, char __b21, char __b20, char __b19, char __b18, char __b17, char __b16, char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b09, char __b08, char __b07, char __b06, char __b05, char __b04, char __b03, char __b02, char __b01, char __b00)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_setr_epi64x (long long __a, long long __b, long long __c, long long __d)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_set1_pd (double __w)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_set1_ps (float __w)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_set1_epi32 (int __i)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_set1_epi16 (short __w)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_set1_epi8 (char __b)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_set1_epi64x (long long __q)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_setzero_pd (void)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_setzero_ps (void)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_setzero_si256 (void)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_castpd_ps (__m256d __a)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_castpd_si256 (__m256d __a)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_castps_pd (__m256 __a)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_castps_si256 (__m256 __a)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_castsi256_ps (__m256i __a)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_castsi256_pd (__m256i __a)
 
static __inline __m128d
__DEFAULT_FN_ATTRS 
_mm256_castpd256_pd128 (__m256d __a)
 
static __inline __m128
__DEFAULT_FN_ATTRS 
_mm256_castps256_ps128 (__m256 __a)
 
static __inline __m128i
__DEFAULT_FN_ATTRS 
_mm256_castsi256_si128 (__m256i __a)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_castpd128_pd256 (__m128d __a)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_castps128_ps256 (__m128 __a)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_castsi128_si256 (__m128i __a)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_loadu2_m128 (float const *__addr_hi, float const *__addr_lo)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_loadu2_m128d (double const *__addr_hi, double const *__addr_lo)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_loadu2_m128i (__m128i const *__addr_hi, __m128i const *__addr_lo)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_storeu2_m128 (float *__addr_hi, float *__addr_lo, __m256 __a)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_storeu2_m128d (double *__addr_hi, double *__addr_lo, __m256d __a)
 
static __inline void
__DEFAULT_FN_ATTRS 
_mm256_storeu2_m128i (__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_set_m128 (__m128 __hi, __m128 __lo)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_set_m128d (__m128d __hi, __m128d __lo)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_set_m128i (__m128i __hi, __m128i __lo)
 
static __inline __m256
__DEFAULT_FN_ATTRS 
_mm256_setr_m128 (__m128 __lo, __m128 __hi)
 
static __inline __m256d
__DEFAULT_FN_ATTRS 
_mm256_setr_m128d (__m128d __lo, __m128d __hi)
 
static __inline __m256i
__DEFAULT_FN_ATTRS 
_mm256_setr_m128i (__m128i __lo, __m128i __hi)
 

Macro Definition Documentation

#define __DEFAULT_FN_ATTRS   __attribute__((__always_inline__, __nodebug__, __target__("avx")))

Definition at line 53 of file avxintrin.h.

#define _CMP_EQ_OQ   0x00 /* Equal (ordered, non-signaling) */

Definition at line 1605 of file avxintrin.h.

#define _CMP_EQ_OS   0x10 /* Equal (ordered, signaling) */

Definition at line 1621 of file avxintrin.h.

#define _CMP_EQ_UQ   0x08 /* Equal (unordered, non-signaling) */

Definition at line 1613 of file avxintrin.h.

#define _CMP_EQ_US   0x18 /* Equal (unordered, signaling) */

Definition at line 1629 of file avxintrin.h.

#define _CMP_FALSE_OQ   0x0b /* False (ordered, non-signaling) */

Definition at line 1616 of file avxintrin.h.

#define _CMP_FALSE_OS   0x1b /* False (ordered, signaling) */

Definition at line 1632 of file avxintrin.h.

#define _CMP_GE_OQ   0x1d /* Greater-than-or-equal (ordered, non-signaling) */

Definition at line 1634 of file avxintrin.h.

#define _CMP_GE_OS   0x0d /* Greater-than-or-equal (ordered, signaling) */

Definition at line 1618 of file avxintrin.h.

#define _CMP_GT_OQ   0x1e /* Greater-than (ordered, non-signaling) */

Definition at line 1635 of file avxintrin.h.

#define _CMP_GT_OS   0x0e /* Greater-than (ordered, signaling) */

Definition at line 1619 of file avxintrin.h.

#define _CMP_LE_OQ   0x12 /* Less-than-or-equal (ordered, non-signaling) */

Definition at line 1623 of file avxintrin.h.

#define _CMP_LE_OS   0x02 /* Less-than-or-equal (ordered, signaling) */

Definition at line 1607 of file avxintrin.h.

#define _CMP_LT_OQ   0x11 /* Less-than (ordered, non-signaling) */

Definition at line 1622 of file avxintrin.h.

#define _CMP_LT_OS   0x01 /* Less-than (ordered, signaling) */

Definition at line 1606 of file avxintrin.h.

#define _CMP_NEQ_OQ   0x0c /* Not-equal (ordered, non-signaling) */

Definition at line 1617 of file avxintrin.h.

#define _CMP_NEQ_OS   0x1c /* Not-equal (ordered, signaling) */

Definition at line 1633 of file avxintrin.h.

#define _CMP_NEQ_UQ   0x04 /* Not-equal (unordered, non-signaling) */

Definition at line 1609 of file avxintrin.h.

#define _CMP_NEQ_US   0x14 /* Not-equal (unordered, signaling) */

Definition at line 1625 of file avxintrin.h.

#define _CMP_NGE_UQ   0x19 /* Not-greater-than-or-equal (unord, non-sign) */

Definition at line 1630 of file avxintrin.h.

#define _CMP_NGE_US   0x09 /* Not-greater-than-or-equal (unord, signaling) */

Definition at line 1614 of file avxintrin.h.

#define _CMP_NGT_UQ   0x1a /* Not-greater-than (unordered, non-signaling) */

Definition at line 1631 of file avxintrin.h.

#define _CMP_NGT_US   0x0a /* Not-greater-than (unordered, signaling) */

Definition at line 1615 of file avxintrin.h.

#define _CMP_NLE_UQ   0x16 /* Not-less-than-or-equal (unord, non-signaling) */

Definition at line 1627 of file avxintrin.h.

#define _CMP_NLE_US   0x06 /* Not-less-than-or-equal (unordered, signaling) */

Definition at line 1611 of file avxintrin.h.

#define _CMP_NLT_UQ   0x15 /* Not-less-than (unordered, non-signaling) */

Definition at line 1626 of file avxintrin.h.

#define _CMP_NLT_US   0x05 /* Not-less-than (unordered, signaling) */

Definition at line 1610 of file avxintrin.h.

#define _CMP_ORD_Q   0x07 /* Ordered (nonsignaling) */

Definition at line 1612 of file avxintrin.h.

#define _CMP_ORD_S   0x17 /* Ordered (signaling) */

Definition at line 1628 of file avxintrin.h.

#define _CMP_TRUE_UQ   0x0f /* True (unordered, non-signaling) */

Definition at line 1620 of file avxintrin.h.

#define _CMP_TRUE_US   0x1f /* True (unordered, signaling) */

Definition at line 1636 of file avxintrin.h.

#define _CMP_UNORD_Q   0x03 /* Unordered (non-signaling) */

Definition at line 1608 of file avxintrin.h.

#define _CMP_UNORD_S   0x13 /* Unordered (signaling) */

Definition at line 1624 of file avxintrin.h.

#define _mm256_blend_pd (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256d)__builtin_shufflevector((__v4df)(__m256d)(V1), \
(__v4df)(__m256d)(V2), \
(((M) & 0x01) ? 4 : 0), \
(((M) & 0x02) ? 5 : 1), \
(((M) & 0x04) ? 6 : 2), \
(((M) & 0x08) ? 7 : 3)); })

Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the immediate integer operand.

__m256d _mm256_blend_pd(__m256d V1, __m256d V2, const int M);

This intrinsic corresponds to the VBLENDPD / BLENDPD instruction.

Parameters
V1A 256-bit vector of [4 x double].
V2A 256-bit vector of [4 x double].
MAn immediate integer operand, with mask bits [3:0] specifying how the values are to be copied. The position of the mask bit corresponds to the index of a copied value. When a mask bit is 0, the corresponding 64-bit element in operand V1 is copied to the same position in the destination. When a mask bit is 1, the corresponding 64-bit element in operand V2 is copied to the same position in the destination.
Returns
A 256-bit vector of [4 x double] containing the copied values.

Definition at line 1354 of file avxintrin.h.

#define _mm256_blend_ps (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256)__builtin_shufflevector((__v8sf)(__m256)(V1), \
(__v8sf)(__m256)(V2), \
(((M) & 0x01) ? 8 : 0), \
(((M) & 0x02) ? 9 : 1), \
(((M) & 0x04) ? 10 : 2), \
(((M) & 0x08) ? 11 : 3), \
(((M) & 0x10) ? 12 : 4), \
(((M) & 0x20) ? 13 : 5), \
(((M) & 0x40) ? 14 : 6), \
(((M) & 0x80) ? 15 : 7)); })

Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the immediate integer operand.

__m256 _mm256_blend_ps(__m256 V1, __m256 V2, const int M);

This intrinsic corresponds to the VBLENDPS / BLENDPS instruction.

Parameters
V1A 256-bit vector of [8 x float].
V2A 256-bit vector of [8 x float].
MAn immediate integer operand, with mask bits [7:0] specifying how the values are to be copied. The position of the mask bit corresponds to the index of a copied value. When a mask bit is 0, the corresponding 32-bit element in operand V1 is copied to the same position in the destination. When a mask bit is 1, the corresponding 32-bit element in operand V2 is copied to the same position in the destination.
Returns
A 256-bit vector of [8 x float] containing the copied values.

Definition at line 1386 of file avxintrin.h.

#define _mm256_ceil_pd (   V)    _mm256_round_pd((V), _MM_FROUND_CEIL)

Rounds up the values stored in a 256-bit vector of [4 x double].

The source values are rounded up to integer values and returned as 64-bit double-precision floating-point values.

__m256d _mm256_ceil_pd(__m256d V);

This intrinsic corresponds to the VROUNDPD / ROUNDPD instruction.

Parameters
VA 256-bit vector of [4 x double].
Returns
A 256-bit vector of [4 x double] containing the rounded up values.

Definition at line 461 of file avxintrin.h.

#define _mm256_ceil_ps (   V)    _mm256_round_ps((V), _MM_FROUND_CEIL)

Rounds up the values stored in a 256-bit vector of [8 x float].

The source values are rounded up to integer values and returned as floating-point values.

__m256 _mm256_ceil_ps(__m256 V);

This intrinsic corresponds to the VROUNDPS / ROUNDPS instruction.

Parameters
VA 256-bit vector of [8 x float].
Returns
A 256-bit vector of [8 x float] containing the rounded up values.

Definition at line 496 of file avxintrin.h.

#define _mm256_cmp_pd (   a,
  b,
 
)
Value:
__extension__ ({ \
(__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
(__v4df)(__m256d)(b), (c)); })

Compares each of the corresponding double-precision values of two 256-bit vectors of [4 x double], using the operation specified by the immediate integer operand.

Returns a [4 x double] vector consisting of four doubles corresponding to the four comparison results: zero if the comparison is false, and all 1's if the comparison is true.

__m256d _mm256_cmp_pd(__m256d a, __m256d b, const int c);

This intrinsic corresponds to the VCMPPD / CMPPD instruction.

Parameters
aA 256-bit vector of [4 x double].
bA 256-bit vector of [4 x double].
cAn immediate integer operand, with bits [4:0] specifying which comparison operation to use: 00h, 08h, 10h, 18h: Equal 01h, 09h, 11h, 19h: Less than 02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped operands) 03h, 0Bh, 13h, 1Bh: Unordered 04h, 0Ch, 14h, 1Ch: Not equal 05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands) 06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal (swapped operands) 07h, 0Fh, 17h, 1Fh: Ordered
Returns
A 256-bit vector of [4 x double] containing the comparison results.

Definition at line 1742 of file avxintrin.h.

#define _mm256_cmp_ps (   a,
  b,
 
)
Value:
__extension__ ({ \
(__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
(__v8sf)(__m256)(b), (c)); })

Compares each of the corresponding values of two 256-bit vectors of [8 x float], using the operation specified by the immediate integer operand.

Returns a [8 x float] vector consisting of eight floats corresponding to the eight comparison results: zero if the comparison is false, and all 1's if the comparison is true.

__m256 _mm256_cmp_ps(__m256 a, __m256 b, const int c);

This intrinsic corresponds to the VCMPPS / CMPPS instruction.

Parameters
aA 256-bit vector of [8 x float].
bA 256-bit vector of [8 x float].
cAn immediate integer operand, with bits [4:0] specifying which comparison operation to use: 00h, 08h, 10h, 18h: Equal 01h, 09h, 11h, 19h: Less than 02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped operands) 03h, 0Bh, 13h, 1Bh: Unordered 04h, 0Ch, 14h, 1Ch: Not equal 05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands) 06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal (swapped operands) 07h, 0Fh, 17h, 1Fh: Ordered
Returns
A 256-bit vector of [8 x float] containing the comparison results.

Definition at line 1778 of file avxintrin.h.

#define _mm256_dp_ps (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \
(__v8sf)(__m256)(V2), (M)); })

Computes two dot products in parallel, using the lower and upper halves of two [8 x float] vectors as input to the two computations, and returning the two dot products in the lower and upper halves of the [8 x float] result.

The immediate integer operand controls which input elements will contribute to the dot product, and where the final results are returned. In general, for each dot product, the four corresponding elements of the input vectors are multiplied; the first two and second two products are summed, then the two sums are added to form the final result.

__m256 _mm256_dp_ps(__m256 V1, __m256 V2, const int M);

This intrinsic corresponds to the VDPPS / DPPS instruction.

Parameters
V1A vector of [8 x float] values, treated as two [4 x float] vectors.
V2A vector of [8 x float] values, treated as two [4 x float] vectors.
MAn immediate integer argument. Bits [7:4] determine which elements of the input vectors are used, with bit [4] corresponding to the lowest element and bit [7] corresponding to the highest element of each [4 x float] subvector. If a bit is set, the corresponding elements from the two input vectors are used as an input for dot product; otherwise that input is treated as zero. Bits [3:0] determine which elements of the result will receive a copy of the final dot product, with bit [0] corresponding to the lowest element and bit [3] corresponding to the highest element of each [4 x float] subvector. If a bit is set, the dot product is returned in the corresponding element; otherwise that element is set to zero. The bitmask is applied in the same way to each of the two parallel dot product computations.
Returns
A 256-bit vector of [8 x float] containing the two dot products.

Definition at line 1491 of file avxintrin.h.

#define _mm256_extractf128_pd (   V,
 
)
Value:
__extension__ ({ \
(__m128d)__builtin_shufflevector( \
(__v4df)(__m256d)(V), \
(__v4df)(_mm256_undefined_pd()), \
(((M) & 1) ? 2 : 0), \
(((M) & 1) ? 3 : 1) );})
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd(void)
Definition: avxintrin.h:2519

Definition at line 2826 of file avxintrin.h.

Referenced by _mm256_storeu2_m128d().

#define _mm256_extractf128_ps (   V,
 
)
Value:
__extension__ ({ \
(__m128)__builtin_shufflevector( \
(__v8sf)(__m256)(V), \
(__v8sf)(_mm256_undefined_ps()), \
(((M) & 1) ? 4 : 0), \
(((M) & 1) ? 5 : 1), \
(((M) & 1) ? 6 : 2), \
(((M) & 1) ? 7 : 3) );})
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Definition: avxintrin.h:2525

Definition at line 2817 of file avxintrin.h.

Referenced by _mm256_storeu2_m128().

#define _mm256_extractf128_si256 (   V,
 
)
Value:
__extension__ ({ \
(__m128i)__builtin_shufflevector( \
(__v4di)(__m256i)(V), \
(__v4di)(_mm256_undefined_si256()), \
(((M) & 1) ? 2 : 0), \
(((M) & 1) ? 3 : 1) );})
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Definition: avxintrin.h:2531

Definition at line 2833 of file avxintrin.h.

Referenced by _mm256_storeu2_m128i().

#define _mm256_floor_pd (   V)    _mm256_round_pd((V), _MM_FROUND_FLOOR)

Rounds down the values stored in a 256-bit vector of [4 x double].

The source values are rounded down to integer values and returned as 64-bit double-precision floating-point values.

__m256d _mm256_floor_pd(__m256d V);

This intrinsic corresponds to the VROUNDPD / ROUNDPD instruction.

Parameters
VA 256-bit vector of [4 x double].
Returns
A 256-bit vector of [4 x double] containing the rounded down values.

Definition at line 479 of file avxintrin.h.

#define _mm256_floor_ps (   V)    _mm256_round_ps((V), _MM_FROUND_FLOOR)

Rounds down the values stored in a 256-bit vector of [8 x float].

The source values are rounded down to integer values and returned as floating-point values.

__m256 _mm256_floor_ps(__m256 V);

This intrinsic corresponds to the VROUNDPS / ROUNDPS instruction.

Parameters
VA 256-bit vector of [8 x float].
Returns
A 256-bit vector of [8 x float] containing the rounded down values.

Definition at line 513 of file avxintrin.h.

#define _mm256_insertf128_pd (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256d)__builtin_shufflevector( \
(__v4df)(__m256d)(V1), \
(__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \
(((M) & 1) ? 0 : 4), \
(((M) & 1) ? 1 : 5), \
(((M) & 1) ? 4 : 2), \
(((M) & 1) ? 5 : 3) );})
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castpd128_pd256(__m128d __a)
Definition: avxintrin.h:2759

Definition at line 2794 of file avxintrin.h.

Referenced by _mm256_loadu2_m128d().

#define _mm256_insertf128_ps (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256)__builtin_shufflevector( \
(__v8sf)(__m256)(V1), \
(__v8sf)_mm256_castps128_ps256((__m128)(V2)), \
(((M) & 1) ? 0 : 8), \
(((M) & 1) ? 1 : 9), \
(((M) & 1) ? 2 : 10), \
(((M) & 1) ? 3 : 11), \
(((M) & 1) ? 8 : 4), \
(((M) & 1) ? 9 : 5), \
(((M) & 1) ? 10 : 6), \
(((M) & 1) ? 11 : 7) );})
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castps128_ps256(__m128 __a)
Definition: avxintrin.h:2765

Definition at line 2781 of file avxintrin.h.

Referenced by _mm256_loadu2_m128().

#define _mm256_insertf128_si256 (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256i)__builtin_shufflevector( \
(__v4di)(__m256i)(V1), \
(__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
(((M) & 1) ? 0 : 4), \
(((M) & 1) ? 1 : 5), \
(((M) & 1) ? 4 : 2), \
(((M) & 1) ? 5 : 3) );})
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castsi128_si256(__m128i __a)
Definition: avxintrin.h:2771

Definition at line 2803 of file avxintrin.h.

Referenced by _mm256_loadu2_m128i().

#define _mm256_permute2f128_pd (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \
(__v4df)(__m256d)(V2), (M)); })

Permutes 128-bit data values stored in two 256-bit vectors of [4 x double], as specified by the immediate integer operand.

__m256d _mm256_permute2f128_pd(__m256d V1, __m256d V2, const int M);

This intrinsic corresponds to the VPERM2F128 / PERM2F128 instruction.

Parameters
V1A 256-bit vector of [4 x double].
V2A 256-bit vector of [4 x double.
MAn immediate integer operand specifying how the values are to be permuted. Bits [1:0]: 00: Bits [127:0] of operand V1 are copied to bits [127:0] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [127:0] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [127:0] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [127:0] of the destination. Bits [5:4]: 00: Bits [127:0] of operand V1 are copied to bits [255:128] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [255:128] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [255:128] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [255:128] of the destination.
Returns
A 256-bit vector of [4 x double] containing the copied values.

Definition at line 1244 of file avxintrin.h.

#define _mm256_permute2f128_ps (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
(__v8sf)(__m256)(V2), (M)); })

Permutes 128-bit data values stored in two 256-bit vectors of [8 x float], as specified by the immediate integer operand.

__m256 _mm256_permute2f128_ps(__m256 V1, __m256 V2, const int M);

This intrinsic corresponds to the VPERM2F128 / PERM2F128 instruction.

Parameters
V1A 256-bit vector of [8 x float].
V2A 256-bit vector of [8 x float].
MAn immediate integer operand specifying how the values are to be permuted. Bits [1:0]: 00: Bits [127:0] of operand V1 are copied to bits [127:0] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [127:0] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [127:0] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [127:0] of the destination. Bits [5:4]: 00: Bits [127:0] of operand V1 are copied to bits [255:128] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [255:128] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [255:128] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [255:128] of the destination.
Returns
A 256-bit vector of [8 x float] containing the copied values.

Definition at line 1285 of file avxintrin.h.

#define _mm256_permute2f128_si256 (   V1,
  V2,
 
)
Value:
__extension__ ({ \
(__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
(__v8si)(__m256i)(V2), (M)); })

Permutes 128-bit data values stored in two 256-bit integer vectors, as specified by the immediate integer operand.

__m256i _mm256_permute2f128_si256(__m256i V1, __m256i V2, const int M);

This intrinsic corresponds to the VPERM2F128 / PERM2F128 instruction.

Parameters
V1A 256-bit integer vector.
V2A 256-bit integer vector.
MAn immediate integer operand specifying how the values are to be copied. Bits [1:0]: 00: Bits [127:0] of operand V1 are copied to bits [127:0] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [127:0] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [127:0] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [127:0] of the destination. Bits [5:4]: 00: Bits [127:0] of operand V1 are copied to bits [255:128] of the destination. 01: Bits [255:128] of operand V1 are copied to bits [255:128] of the destination. 10: Bits [127:0] of operand V2 are copied to bits [255:128] of the destination. 11: Bits [255:128] of operand V2 are copied to bits [255:128] of the destination.
Returns
A 256-bit integer vector containing the copied values.

Definition at line 1325 of file avxintrin.h.

#define _mm256_permute_pd (   A,
 
)
Value:
__extension__ ({ \
(__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
(__v4df)_mm256_undefined_pd(), \
0 + (((C) >> 0) & 0x1), \
0 + (((C) >> 1) & 0x1), \
2 + (((C) >> 2) & 0x1), \
2 + (((C) >> 3) & 0x1)); })
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd(void)
Definition: avxintrin.h:2519

Copies the values in a 256-bit vector of [4 x double] as specified by the immediate integer operand.

__m256d _mm256_permute_pd(__m256d A, const int C);

This intrinsic corresponds to the VPERMILPD / PERMILPD instruction.

Parameters
AA 256-bit vector of [4 x double].
CAn immediate integer operand specifying how the values are to be copied. Bit [0]: 0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector. 1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector. Bit [1]: 0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector. 1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector. Bit [2]: 0: Bits [191:128] of the source are copied to bits [191:128] of the returned vector. 1: Bits [255:192] of the source are copied to bits [191:128] of the returned vector. Bit [3]: 0: Bits [191:128] of the source are copied to bits [255:192] of the returned vector. 1: Bits [255:192] of the source are copied to bits [255:192] of the returned vector.
Returns
A 256-bit vector of [4 x double] containing the copied values.

Definition at line 1041 of file avxintrin.h.

#define _mm256_permute_ps (   A,
 
)
Value:
__extension__ ({ \
(__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
(__v8sf)_mm256_undefined_ps(), \
0 + (((C) >> 0) & 0x3), \
0 + (((C) >> 2) & 0x3), \
0 + (((C) >> 4) & 0x3), \
0 + (((C) >> 6) & 0x3), \
4 + (((C) >> 0) & 0x3), \
4 + (((C) >> 2) & 0x3), \
4 + (((C) >> 4) & 0x3), \
4 + (((C) >> 6) & 0x3)); })
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Definition: avxintrin.h:2525

Copies the values in a 256-bit vector of [8 x float] as specified by the immediate integer operand.

__m256 _mm256_permute_ps(__m256 A, const int C);

This intrinsic corresponds to the VPERMILPS / PERMILPS instruction.

Parameters
AA 256-bit vector of [8 x float].
CAn immediate integer operand specifying how the values are to be copied. Bits [1:0]: 00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector. 01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector. 10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector. 11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector. Bits [3:2]: 00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector. 01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector. 10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector. 11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector. Bits [5:4]: 00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector. 01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector. 10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector. 11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector. Bits [7:6]: 00: Bits [31:0] of the source are copied to bits [127:96] of the returned vector. 01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector. 10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector. 11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector. Bits [1:0]: 00: Bits [159:128] of the source are copied to bits [159:128] of the returned vector. 01: Bits [191:160] of the source are copied to bits [159:128] of the returned vector. 10: Bits [223:192] of the source are copied to bits [159:128] of the returned vector. 11: Bits [255:224] of the source are copied to bits [159:128] of the returned vector. Bits [3:2]: 00: Bits [159:128] of the source are copied to bits [191:160] of the returned vector. 01: Bits [191:160] of the source are copied to bits [191:160] of the returned vector. 10: Bits [223:192] of the source are copied to bits [191:160] of the returned vector. 11: Bits [255:224] of the source are copied to bits [191:160] of the returned vector. Bits [5:4]: 00: Bits [159:128] of the source are copied to bits [223:192] of the returned vector. 01: Bits [191:160] of the source are copied to bits [223:192] of the returned vector. 10: Bits [223:192] of the source are copied to bits [223:192] of the returned vector. 11: Bits [255:224] of the source are copied to bits [223:192] of the returned vector. Bits [7:6]: 00: Bits [159:128] of the source are copied to bits [255:224] of the returned vector. 01: Bits [191:160] of the source are copied to bits [255:224] of the returned vector. 10: Bits [223:192] of the source are copied to bits [255:224] of the returned vector. 11: Bits [255:224] of the source are copied to bits [255:224] of the returned vector.
Returns
A 256-bit vector of [8 x float] containing the copied values.

Definition at line 1195 of file avxintrin.h.

#define _mm256_round_pd (   V,
 
)
Value:
__extension__ ({ \
(__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)); })

Rounds the values in a 256-bit vector of [4 x double] as specified by the byte operand.

The source values are rounded to integer values and returned as 64-bit double-precision floating-point values.

__m256d _mm256_round_pd(__m256d V, const int M);

This intrinsic corresponds to the VROUNDPD / ROUNDPD instruction.

Parameters
VA 256-bit vector of [4 x double].
MAn integer value that specifies the rounding operation. Bits [7:4] are reserved. Bit [3] is a precision exception value: 0: A normal PE exception is used. 1: The PE field is not updated. Bit [2] is the rounding control source: 0: Use bits [1:0] of M. 1: Use the current MXCSR setting. Bits [1:0] contain the rounding control definition: 00: Nearest. 01: Downward (toward negative infinity). 10: Upward (toward positive infinity). 11: Truncated.
Returns
A 256-bit vector of [4 x double] containing the rounded values.

Definition at line 411 of file avxintrin.h.

#define _mm256_round_ps (   V,
 
)
Value:
__extension__ ({ \
(__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)); })

Rounds the values stored in a 256-bit vector of [8 x float] as specified by the byte operand.

The source values are rounded to integer values and returned as floating-point values.

__m256 _mm256_round_ps(__m256 V, const int M);

This intrinsic corresponds to the VROUNDPS / ROUNDPS instruction.

Parameters
VA 256-bit vector of [8 x float].
MAn integer value that specifies the rounding operation. Bits [7:4] are reserved. Bit [3] is a precision exception value: 0: A normal PE exception is used. 1: The PE field is not updated. Bit [2] is the rounding control source: 0: Use bits [1:0] of M. 1: Use the current MXCSR setting. Bits [1:0] contain the rounding control definition: 00: Nearest. 01: Downward (toward negative infinity). 10: Upward (toward positive infinity). 11: Truncated.
Returns
A 256-bit vector of [8 x float] containing the rounded values.

Definition at line 443 of file avxintrin.h.

#define _mm256_shuffle_pd (   a,
  b,
  mask 
)
Value:
__extension__ ({ \
(__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \
(__v4df)(__m256d)(b), \
0 + (((mask) >> 0) & 0x1), \
4 + (((mask) >> 1) & 0x1), \
2 + (((mask) >> 2) & 0x1), \
6 + (((mask) >> 3) & 0x1)); })

Selects four double-precision values from the 256-bit operands of [4 x double], as specified by the immediate value operand.

The selected elements from the first 256-bit operand are copied to bits [63:0] and bits [191:128] in the destination, and the selected elements from the second 256-bit operand are copied to bits [127:64] and bits [255:192] in the destination. For example, if bits [3:0] of the immediate operand contain a value of 0xF, the 256-bit destination vector would contain the following values: b[3], a[3], b[1], a[1].

__m256d _mm256_shuffle_pd(__m256d a, __m256d b, const int mask);

This intrinsic corresponds to the VSHUFPD / SHUFPD instruction.

Parameters
aA 256-bit vector of [4 x double].
bA 256-bit vector of [4 x double].
maskAn immediate value containing 8-bit values specifying which elements to copy from a and b: Bit [0]=0: Bits [63:0] are copied from a to bits [63:0] of the destination. Bit [0]=1: Bits [127:64] are copied from a to bits [63:0] of the destination. Bit [1]=0: Bits [63:0] are copied from b to bits [127:64] of the destination. Bit [1]=1: Bits [127:64] are copied from b to bits [127:64] of the destination. Bit [2]=0: Bits [191:128] are copied from a to bits [191:128] of the destination. Bit [2]=1: Bits [255:192] are copied from a to bits [191:128] of the destination. Bit [3]=0: Bits [191:128] are copied from b to bits [255:192] of the destination. Bit [3]=1: Bits [255:192] are copied from b to bits [255:192] of the destination.
Returns
A 256-bit vector of [4 x double] containing the shuffled values.

Definition at line 1596 of file avxintrin.h.

#define _mm256_shuffle_ps (   a,
  b,
  mask 
)
Value:
__extension__ ({ \
(__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \
(__v8sf)(__m256)(b), \
0 + (((mask) >> 0) & 0x3), \
0 + (((mask) >> 2) & 0x3), \
8 + (((mask) >> 4) & 0x3), \
8 + (((mask) >> 6) & 0x3), \
4 + (((mask) >> 0) & 0x3), \
4 + (((mask) >> 2) & 0x3), \
12 + (((mask) >> 4) & 0x3), \
12 + (((mask) >> 6) & 0x3)); })

Selects 8 float values from the 256-bit operands of [8 x float], as specified by the immediate value operand.

The four selected elements in each operand are copied to the destination according to the bits specified in the immediate operand. The selected elements from the first 256-bit operand are copied to bits [63:0] and bits [191:128] of the destination, and the selected elements from the second 256-bit operand are copied to bits [127:64] and bits [255:192] of the destination. For example, if bits [7:0] of the immediate operand contain a value of 0xFF, the 256-bit destination vector would contain the following values: b[7], b[7], a[7], a[7], b[3], b[3], a[3], a[3].

__m256 _mm256_shuffle_ps(__m256 a, __m256 b, const int mask);

This intrinsic corresponds to the VSHUFPS / SHUFPS instruction.

Parameters
aA 256-bit vector of [8 x float]. The four selected elements in this operand are copied to bits [63:0] and bits [191:128] in the destination, according to the bits specified in the immediate operand.
bA 256-bit vector of [8 x float]. The four selected elements in this operand are copied to bits [127:64] and bits [255:192] in the destination, according to the bits specified in the immediate operand.
maskAn immediate value containing an 8-bit value specifying which elements to copy from a and b. Bits [3:0] specify the values copied from operand a. Bits [7:4] specify the values copied from operand b. The destinations within the 256-bit destination are assigned values as follows, according to the bit value assignments described below: Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the destination. Bits [3:2] are used to assign values to bits [63:32] and [191:160] in the destination. Bits [5:4] are used to assign values to bits [95:64] and [223:192] in the destination. Bits [7:6] are used to assign values to bits [127:96] and [255:224] in the destination. Bit value assignments: 00: Bits [31:0] and [159:128] are copied from the selected operand. 01: Bits [63:32] and [191:160] are copied from the selected operand. 10: Bits [95:64] and [223:192] are copied from the selected operand. 11: Bits [127:96] and [255:224] are copied from the selected operand.
Returns
A 256-bit vector of [8 x float] containing the shuffled values.

Definition at line 1543 of file avxintrin.h.

#define _mm_cmp_pd (   a,
  b,
 
)
Value:
__extension__ ({ \
(__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
(__v2df)(__m128d)(b), (c)); })

Compares each of the corresponding double-precision values of two 128-bit vectors of [2 x double], using the operation specified by the immediate integer operand.

Returns a [2 x double] vector consisting of two doubles corresponding to the two comparison results: zero if the comparison is false, and all 1's if the comparison is true.

__m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);

This intrinsic corresponds to the VCMPPD / CMPPD instruction.

Parameters
aA 128-bit vector of [2 x double].
bA 128-bit vector of [2 x double].
cAn immediate integer operand, with bits [4:0] specifying which comparison operation to use: 00h, 08h, 10h, 18h: Equal 01h, 09h, 11h, 19h: Less than 02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped operands) 03h, 0Bh, 13h, 1Bh: Unordered 04h, 0Ch, 14h, 1Ch: Not equal 05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands) 06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal (swapped operands) 07h, 0Fh, 17h, 1Fh: Ordered
Returns
A 128-bit vector of [2 x double] containing the comparison results.

Definition at line 1670 of file avxintrin.h.

#define _mm_cmp_ps (   a,
  b,
 
)
Value:
__extension__ ({ \
(__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
(__v4sf)(__m128)(b), (c)); })

Compares each of the corresponding values of two 128-bit vectors of [4 x float], using the operation specified by the immediate integer operand.

Returns a [4 x float] vector consisting of four floats corresponding to the four comparison results: zero if the comparison is false, and all 1's if the comparison is true.

__m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);

This intrinsic corresponds to the VCMPPS / CMPPS instruction.

Parameters
aA 128-bit vector of [4 x float].
bA 128-bit vector of [4 x float].
cAn immediate integer operand, with bits [4:0] specifying which comparison operation to use: 00h, 08h, 10h, 18h: Equal 01h, 09h, 11h, 19h: Less than 02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped operands) 03h, 0Bh, 13h, 1Bh: Unordered 04h, 0Ch, 14h, 1Ch: Not equal 05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands) 06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal (swapped operands) 07h, 0Fh, 17h, 1Fh: Ordered
Returns
A 128-bit vector of [4 x float] containing the comparison results.

Definition at line 1706 of file avxintrin.h.

#define _mm_cmp_sd (   a,
  b,
 
)
Value:
__extension__ ({ \
(__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
(__v2df)(__m128d)(b), (c)); })

Compares each of the corresponding scalar double-precision values of two 128-bit vectors of [2 x double], using the operation specified by the immediate integer operand.

If the result is true, all 64 bits of the destination vector are set; otherwise they are cleared.

__m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);

This intrinsic corresponds to the VCMPSD / CMPSD instruction.

Parameters
aA 128-bit vector of [2 x double].
bA 128-bit vector of [2 x double].
cAn immediate integer operand, with bits [4:0] specifying which comparison operation to use: 00h, 08h, 10h, 18h: Equal 01h, 09h, 11h, 19h: Less than 02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped operands) 03h, 0Bh, 13h, 1Bh: Unordered 04h, 0Ch, 14h, 1Ch: Not equal 05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands) 06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal (swapped operands) 07h, 0Fh, 17h, 1Fh: Ordered
Returns
A 128-bit vector of [2 x double] containing the comparison results.

Definition at line 1813 of file avxintrin.h.

#define _mm_cmp_ss (   a,
  b,
 
)
Value:
__extension__ ({ \
(__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
(__v4sf)(__m128)(b), (c)); })

Compares each of the corresponding scalar values of two 128-bit vectors of [4 x float], using the operation specified by the immediate integer operand.

If the result is true, all 32 bits of the destination vector are set; otherwise they are cleared.

__m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);

This intrinsic corresponds to the VCMPSS / CMPSS instruction.

Parameters
aA 128-bit vector of [4 x float].
bA 128-bit vector of [4 x float].
cAn immediate integer operand, with bits [4:0] specifying which comparison operation to use: 00h, 08h, 10h, 18h: Equal 01h, 09h, 11h, 19h: Less than 02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped operands) 03h, 0Bh, 13h, 1Bh: Unordered 04h, 0Ch, 14h, 1Ch: Not equal 05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands) 06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal (swapped operands) 07h, 0Fh, 17h, 1Fh: Ordered
Returns
A 128-bit vector of [4 x float] containing the comparison results.

Definition at line 1848 of file avxintrin.h.

#define _mm_permute_pd (   A,
 
)
Value:
__extension__ ({ \
(__m128d)__builtin_shufflevector((__v2df)(__m128d)(A), \
(__v2df)_mm_undefined_pd(), \
((C) >> 0) & 0x1, ((C) >> 1) & 0x1); })
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Definition: emmintrin.h:548

Copies the values in a 128-bit vector of [2 x double] as specified by the immediate integer operand.

__m128d _mm_permute_pd(__m128d A, const int C);

This intrinsic corresponds to the VPERMILPD / PERMILPD instruction.

Parameters
AA 128-bit vector of [2 x double].
CAn immediate integer operand specifying how the values are to be copied. Bit [0]: 0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector. 1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector. Bit [1]: 0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector. 1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector.
Returns
A 128-bit vector of [2 x double] containing the copied values.

Definition at line 1000 of file avxintrin.h.

#define _mm_permute_ps (   A,
 
)
Value:
__extension__ ({ \
(__m128)__builtin_shufflevector((__v4sf)(__m128)(A), \
(__v4sf)_mm_undefined_ps(), \
((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void)
Create a 128-bit vector of [4 x float] with undefined values.
Definition: xmmintrin.h:1730

Copies the values in a 128-bit vector of [4 x float] as specified by the immediate integer operand.

__m128 _mm_permute_ps(__m128 A, const int C);

This intrinsic corresponds to the VPERMILPS / PERMILPS instruction.

Parameters
AA 128-bit vector of [4 x float].
CAn immediate integer operand specifying how the values are to be copied. Bits [1:0]: 00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector. 01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector. 10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector. 11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector. Bits [3:2]: 00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector. 01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector. 10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector. 11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector. Bits [5:4]: 00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector. 01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector. 10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector. 11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector. Bits [7:6]: 00: Bits [31:0] of the source are copied to bits [127:96] of the returned vector. 01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector. 10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector. 11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector.
Returns
A 128-bit vector of [4 x float] containing the copied values.

Definition at line 1101 of file avxintrin.h.

Typedef Documentation

typedef long long __m256i __attribute__((__vector_size__(32)))

Definition at line 31 of file avxintrin.h.

Function Documentation

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_add_pd ( __m256d  __a,
__m256d  __b 
)
static

Adds two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VADDPD / ADDPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the source operands.
__bA 256-bit vector of [4 x double] containing one of the source operands.
Returns
A 256-bit vector of [4 x double] containing the sums of both operands.

Definition at line 69 of file avxintrin.h.

References __b.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_add_ps ( __m256  __a,
__m256  __b 
)
static

Adds two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VADDPS / ADDPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the source operands.
__bA 256-bit vector of [8 x float] containing one of the source operands.
Returns
A 256-bit vector of [8 x float] containing the sums of both operands.

Definition at line 87 of file avxintrin.h.

References __b.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_addsub_pd ( __m256d  __a,
__m256d  __b 
)
static

Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VADDSUBPD / ADDSUBPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing the left source operand.
__bA 256-bit vector of [4 x double] containing the right source operand.
Returns
A 256-bit vector of [4 x double] containing the alternating sums and differences between both operands.

Definition at line 142 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_addsub_ps ( __m256  __a,
__m256  __b 
)
static

Adds the even-indexed values and subtracts the odd-indexed values of two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VADDSUBPS / ADDSUBPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing the left source operand.
__bA 256-bit vector of [8 x float] containing the right source operand.
Returns
A 256-bit vector of [8 x float] containing the alternating sums and differences between both operands.

Definition at line 161 of file avxintrin.h.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_and_pd ( __m256d  __a,
__m256d  __b 
)
static

Performs a bitwise AND of two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VANDPD / ANDPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the source operands.
__bA 256-bit vector of [4 x double] containing one of the source operands.
Returns
A 256-bit vector of [4 x double] containing the bitwise AND of the values between both operands.

Definition at line 529 of file avxintrin.h.

References __b.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_and_ps ( __m256  __a,
__m256  __b 
)
static

Performs a bitwise AND of two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VANDPS / ANDPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the source operands.
__bA 256-bit vector of [8 x float] containing one of the source operands.
Returns
A 256-bit vector of [8 x float] containing the bitwise AND of the values between both operands.

Definition at line 547 of file avxintrin.h.

References __b.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_andnot_pd ( __m256d  __a,
__m256d  __b 
)
static

Performs a bitwise AND of two 256-bit vectors of [4 x double], using the one's complement of the values contained in the first source operand.

This intrinsic corresponds to the VANDNPD / ANDNPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing the left source operand. The one's complement of this value is used in the bitwise AND.
__bA 256-bit vector of [4 x double] containing the right source operand.
Returns
A 256-bit vector of [4 x double] containing the bitwise AND of the values of the second operand and the one's complement of the first operand.

Definition at line 568 of file avxintrin.h.

References __b.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_andnot_ps ( __m256  __a,
__m256  __b 
)
static

Performs a bitwise AND of two 256-bit vectors of [8 x float], using the one's complement of the values contained in the first source operand.

This intrinsic corresponds to the VANDNPS / ANDNPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing the left source operand. The one's complement of this value is used in the bitwise AND.
__bA 256-bit vector of [8 x float] containing the right source operand.
Returns
A 256-bit vector of [8 x float] containing the bitwise AND of the values of the second operand and the one's complement of the first operand.

Definition at line 589 of file avxintrin.h.

References __b.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_blendv_pd ( __m256d  __a,
__m256d  __b,
__m256d  __c 
)
static

Merges 64-bit double-precision data values stored in either of the two 256-bit vectors of [4 x double], as specified by the 256-bit vector operand.

This intrinsic corresponds to the VBLENDVPD / BLENDVPD instruction.

Parameters
__aA 256-bit vector of [4 x double].
__bA 256-bit vector of [4 x double].
__cA 256-bit vector operand, with mask bits 255, 191, 127, and 63 specifying how the values are to be copied. The position of the mask bit corresponds to the most significant bit of a copied value. When a mask bit is 0, the corresponding 64-bit element in operand __a is copied to the same position in the destination. When a mask bit is 1, the corresponding 64-bit element in operand __b is copied to the same position in the destination.
Returns
A 256-bit vector of [4 x double] containing the copied values.

Definition at line 1420 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_blendv_ps ( __m256  __a,
__m256  __b,
__m256  __c 
)
static

Merges 32-bit single-precision data values stored in either of the two 256-bit vectors of [8 x float], as specified by the 256-bit vector operand.

This intrinsic corresponds to the VBLENDVPS / BLENDVPS instruction.

Parameters
__aA 256-bit vector of [8 x float].
__bA 256-bit vector of [8 x float].
__cA 256-bit vector operand, with mask bits 255, 223, 191, 159, 127, 95, 63, and 31 specifying how the values are to be copied. The position of the mask bit corresponds to the most significant bit of a copied value. When a mask bit is 0, the corresponding 32-bit element in operand __a is copied to the same position in the destination. When a mask bit is 1, the corresponding 32-bit element in operand __b is copied to the same position in the destination.
Returns
A 256-bit vector of [8 x float] containing the copied values.

Definition at line 1448 of file avxintrin.h.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_pd ( __m128d const *  __a)
static

Definition at line 2338 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ps ( __m128 const *  __a)
static

Definition at line 2344 of file avxintrin.h.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_sd ( double const *  __a)
static

Definition at line 2324 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ss ( float const *  __a)
static

Definition at line 2331 of file avxintrin.h.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castpd128_pd256 ( __m128d  __a)
static

Definition at line 2759 of file avxintrin.h.

Referenced by _mm256_loadu2_m128d().

static __inline __m128d __DEFAULT_FN_ATTRS _mm256_castpd256_pd128 ( __m256d  __a)
static

Definition at line 2741 of file avxintrin.h.

Referenced by _mm256_storeu2_m128d().

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castpd_ps ( __m256d  __a)
static

Definition at line 2705 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castpd_si256 ( __m256d  __a)
static

Definition at line 2711 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castps128_ps256 ( __m128  __a)
static

Definition at line 2765 of file avxintrin.h.

Referenced by _mm256_loadu2_m128().

static __inline __m128 __DEFAULT_FN_ATTRS _mm256_castps256_ps128 ( __m256  __a)
static

Definition at line 2747 of file avxintrin.h.

Referenced by _mm256_storeu2_m128().

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castps_pd ( __m256  __a)
static

Definition at line 2717 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castps_si256 ( __m256  __a)
static

Definition at line 2723 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_castsi128_si256 ( __m128i  __a)
static

Definition at line 2771 of file avxintrin.h.

Referenced by _mm256_loadu2_m128i().

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_castsi256_pd ( __m256i  __a)
static

Definition at line 2735 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_castsi256_ps ( __m256i  __a)
static

Definition at line 2729 of file avxintrin.h.

static __inline __m128i __DEFAULT_FN_ATTRS _mm256_castsi256_si128 ( __m256i  __a)
static

Definition at line 2753 of file avxintrin.h.

Referenced by _mm256_storeu2_m128i().

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi32_pd ( __m128i  __a)
static

Converts a vector of [4 x i32] into a vector of [4 x double].

This intrinsic corresponds to the VCVTDQ2PD / CVTDQ2PD instruction.

Parameters
__aA 128-bit integer vector of [4 x i32].
Returns
A 256-bit vector of [4 x double] containing the converted values.

Definition at line 2060 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_cvtepi32_ps ( __m256i  __a)
static

Converts a vector of [8 x i32] into a vector of [8 x float].

This intrinsic corresponds to the VCVTDQ2PS / CVTDQ2PS instruction.

Parameters
__aA 256-bit integer vector.
Returns
A 256-bit vector of [8 x float] containing the converted values.

Definition at line 2075 of file avxintrin.h.

static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epi32 ( __m256d  __a)
static

Definition at line 2124 of file avxintrin.h.

static __inline __m128 __DEFAULT_FN_ATTRS _mm256_cvtpd_ps ( __m256d  __a)
static

Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x float].

This intrinsic corresponds to the VCVTPD2PS / CVTPD2PS instruction.

Parameters
__aA 256-bit vector of [4 x double].
Returns
A 128-bit vector of [4 x float] containing the converted values.

Definition at line 2091 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epi32 ( __m256  __a)
static

Converts a vector of [8 x float] into a vector of [8 x i32].

This intrinsic corresponds to the VCVTPS2DQ / CVTPS2DQ instruction.

Parameters
__aA 256-bit vector of [8 x float].
Returns
A 256-bit integer vector containing the converted values.

Definition at line 2106 of file avxintrin.h.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtps_pd ( __m128  __a)
static

Definition at line 2112 of file avxintrin.h.

static __inline double __DEFAULT_FN_ATTRS _mm256_cvtsd_f64 ( __m256d  __a)
static

Definition at line 2136 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm256_cvtsi256_si32 ( __m256i  __a)
static

Definition at line 2142 of file avxintrin.h.

static __inline float __DEFAULT_FN_ATTRS _mm256_cvtss_f32 ( __m256  __a)
static

Definition at line 2149 of file avxintrin.h.

static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32 ( __m256d  __a)
static

Definition at line 2118 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32 ( __m256  __a)
static

Definition at line 2130 of file avxintrin.h.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_div_pd ( __m256d  __a,
__m256d  __b 
)
static

Divides two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VDIVPD / DIVPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing the dividend.
__bA 256-bit vector of [4 x double] containing the divisor.
Returns
A 256-bit vector of [4 x double] containing the quotients of both operands.

Definition at line 179 of file avxintrin.h.

References __b.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_div_ps ( __m256  __a,
__m256  __b 
)
static

Divides two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VDIVPS / DIVPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing the dividend.
__bA 256-bit vector of [8 x float] containing the divisor.
Returns
A 256-bit vector of [8 x float] containing the quotients of both operands.

Definition at line 197 of file avxintrin.h.

References __b.

static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi16 ( __m256i  __a,
const int  __imm 
)
static

Takes a [16 x i16] vector and returns the vector element value indexed by the immediate constant operand.

This intrinsic corresponds to the VEXTRACTF128+COMPOSITE / EXTRACTF128+COMPOSITE instruction.

Parameters
__aA 256-bit integer vector of [16 x i16].
__immAn immediate integer operand with bits [3:0] determining which vector element is extracted and returned.
Returns
A 32-bit integer containing the extracted 16 bits of zero extended packed data.

Definition at line 1890 of file avxintrin.h.

References __b.

static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi32 ( __m256i  __a,
const int  __imm 
)
static

Takes a [8 x i32] vector and returns the vector element value indexed by the immediate constant operand.

This intrinsic corresponds to the VEXTRACTF128+COMPOSITE / EXTRACTF128+COMPOSITE instruction.

Parameters
__aA 256-bit vector of [8 x i32].
__immAn immediate integer operand with bits [2:0] determining which vector element is extracted and returned.
Returns
A 32-bit integer containing the extracted 32 bits of extended packed data.

Definition at line 1868 of file avxintrin.h.

References __b.

static __inline int __DEFAULT_FN_ATTRS _mm256_extract_epi8 ( __m256i  __a,
const int  __imm 
)
static

Takes a [32 x i8] vector and returns the vector element value indexed by the immediate constant operand.

This intrinsic corresponds to the VEXTRACTF128+COMPOSITE / EXTRACTF128+COMPOSITE instruction.

Parameters
__aA 256-bit integer vector of [32 x i8].
__immAn immediate integer operand with bits [4:0] determining which vector element is extracted and returned.
Returns
A 32-bit integer containing the extracted 8 bits of zero extended packed data.

Definition at line 1912 of file avxintrin.h.

References __b.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_hadd_pd ( __m256d  __a,
__m256d  __b 
)
static

Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VHADDPD / HADDPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the source operands. The horizontal sums of the values are returned in the even-indexed elements of a vector of [4 x double].
__bA 256-bit vector of [4 x double] containing one of the source operands. The horizontal sums of the values are returned in the odd-indexed elements of a vector of [4 x double].
Returns
A 256-bit vector of [4 x double] containing the horizontal sums of both operands.

Definition at line 685 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_hadd_ps ( __m256  __a,
__m256  __b 
)
static

Horizontally adds the adjacent pairs of values contained in two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VHADDPS / HADDPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the source operands. The horizontal sums of the values are returned in the elements with index 0, 1, 4, 5 of a vector of [8 x float].
__bA 256-bit vector of [8 x float] containing one of the source operands. The horizontal sums of the values are returned in the elements with index 2, 3, 6, 7 of a vector of [8 x float].
Returns
A 256-bit vector of [8 x float] containing the horizontal sums of both operands.

Definition at line 708 of file avxintrin.h.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_hsub_pd ( __m256d  __a,
__m256d  __b 
)
static

Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VHSUBPD / HSUBPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the source operands. The horizontal differences between the values are returned in the even-indexed elements of a vector of [4 x double].
__bA 256-bit vector of [4 x double] containing one of the source operands. The horizontal differences between the values are returned in the odd-indexed elements of a vector of [4 x double].
Returns
A 256-bit vector of [4 x double] containing the horizontal differences of both operands.

Definition at line 731 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_hsub_ps ( __m256  __a,
__m256  __b 
)
static

Horizontally subtracts the adjacent pairs of values contained in two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VHSUBPS / HSUBPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the source operands. The horizontal differences between the values are returned in the elements with index 0, 1, 4, 5 of a vector of [8 x float].
__bA 256-bit vector of [8 x float] containing one of the source operands. The horizontal differences between the values are returned in the elements with index 2, 3, 6, 7 of a vector of [8 x float].
Returns
A 256-bit vector of [8 x float] containing the horizontal differences of both operands.

Definition at line 754 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_insert_epi16 ( __m256i  __a,
int  __b,
int const  __imm 
)
static

Takes a [16 x i16] vector and replaces the vector element value indexed by the immediate constant operand with a new value.

Returns the modified vector.

This intrinsic corresponds to the VINSERTF128+COMPOSITE / INSERTF128+COMPOSITE instruction.

Parameters
__aA vector of [16 x i16] to be used by the insert operation.
__bAn i16 integer value. The replacement value for the insert operation.
__immAn immediate integer specifying the index of the vector element to be replaced.
Returns
A copy of vector __a, after replacing its element indexed by __imm with __b.

Definition at line 1988 of file avxintrin.h.

References __b, and __c.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_insert_epi32 ( __m256i  __a,
int  __b,
int const  __imm 
)
static

Takes a [8 x i32] vector and replaces the vector element value indexed by the immediate constant operand by a new value.

Returns the modified vector.

This intrinsic corresponds to the VINSERTF128+COMPOSITE / INSERTF128+COMPOSITE instruction.

Parameters
__aA vector of [8 x i32] to be used by the insert operation.
__bAn integer value. The replacement value for the insert operation.
__immAn immediate integer specifying the index of the vector element to be replaced.
Returns
A copy of vector __a, after replacing its element indexed by __imm with __b.

Definition at line 1961 of file avxintrin.h.

References __b, and __c.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_insert_epi8 ( __m256i  __a,
int  __b,
int const  __imm 
)
static

Takes a [32 x i8] vector and replaces the vector element value indexed by the immediate constant operand with a new value.

Returns the modified vector.

This intrinsic corresponds to the VINSERTF128+COMPOSITE / INSERTF128+COMPOSITE instruction.

Parameters
__aA vector of [32 x i8] to be used by the insert operation.
__bAn i8 integer value. The replacement value for the insert operation.
__immAn immediate integer specifying the index of the vector element to be replaced.
Returns
A copy of vector __a, after replacing its element indexed by __imm with __b.

Definition at line 2014 of file avxintrin.h.

References __b, and __c.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_lddqu_si256 ( __m256i const *  __p)
static

Definition at line 2396 of file avxintrin.h.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_load_pd ( double const *  __p)
static

Definition at line 2351 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_load_ps ( float const *  __p)
static

Definition at line 2357 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_load_si256 ( __m256i const *  __p)
static

Definition at line 2381 of file avxintrin.h.

References __p.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_loadu2_m128 ( float const *  __addr_hi,
float const *  __addr_lo 
)
static

Definition at line 2842 of file avxintrin.h.

References _mm256_castps128_ps256(), _mm256_insertf128_ps, and _mm_loadu_ps().

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_loadu2_m128d ( double const *  __addr_hi,
double const *  __addr_lo 
)
static

Definition at line 2849 of file avxintrin.h.

References _mm256_castpd128_pd256(), _mm256_insertf128_pd, and _mm_loadu_pd().

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_loadu2_m128i ( __m128i const *  __addr_hi,
__m128i const *  __addr_lo 
)
static
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_loadu_pd ( double const *  __p)
static

Definition at line 2363 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_loadu_ps ( float const *  __p)
static

Definition at line 2372 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_loadu_si256 ( __m256i const *  __p)
static

Definition at line 2387 of file avxintrin.h.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_maskload_pd ( double const *  __p,
__m256i  __m 
)
static

Definition at line 2455 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_maskload_ps ( float const *  __p,
__m256i  __m 
)
static

Definition at line 2468 of file avxintrin.h.

static __inline void __DEFAULT_FN_ATTRS _mm256_maskstore_pd ( double *  __p,
__m256i  __m,
__m256d  __a 
)
static

Definition at line 2487 of file avxintrin.h.

static __inline void __DEFAULT_FN_ATTRS _mm256_maskstore_ps ( float *  __p,
__m256i  __m,
__m256  __a 
)
static

Definition at line 2475 of file avxintrin.h.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_max_pd ( __m256d  __a,
__m256d  __b 
)
static

Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values.

This intrinsic corresponds to the VMAXPD / MAXPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the operands.
__bA 256-bit vector of [4 x double] containing one of the operands.
Returns
A 256-bit vector of [4 x double] containing the maximum values between both operands.

Definition at line 216 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_max_ps ( __m256  __a,
__m256  __b 
)
static

Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values.

This intrinsic corresponds to the VMAXPS / MAXPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the operands.
__bA 256-bit vector of [8 x float] containing one of the operands.
Returns
A 256-bit vector of [8 x float] containing the maximum values between both operands.

Definition at line 235 of file avxintrin.h.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_min_pd ( __m256d  __a,
__m256d  __b 
)
static

Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values.

This intrinsic corresponds to the VMINPD / MINPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the operands.
__bA 256-bit vector of [4 x double] containing one of the operands.
Returns
A 256-bit vector of [4 x double] containing the minimum values between both operands.

Definition at line 254 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_min_ps ( __m256  __a,
__m256  __b 
)
static

Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values.

This intrinsic corresponds to the VMINPS / MINPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the operands.
__bA 256-bit vector of [8 x float] containing one of the operands.
Returns
A 256-bit vector of [8 x float] containing the minimum values between both operands.

Definition at line 273 of file avxintrin.h.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_movedup_pd ( __m256d  __a)
static

Definition at line 2168 of file avxintrin.h.

Referenced by _mm256_mask_movedup_pd(), and _mm256_maskz_movedup_pd().

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_movehdup_ps ( __m256  __a)
static

Definition at line 2156 of file avxintrin.h.

Referenced by _mm256_mask_movehdup_ps(), and _mm256_maskz_movehdup_ps().

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_moveldup_ps ( __m256  __a)
static

Definition at line 2162 of file avxintrin.h.

Referenced by _mm256_mask_moveldup_ps(), and _mm256_maskz_moveldup_ps().

static __inline int __DEFAULT_FN_ATTRS _mm256_movemask_pd ( __m256d  __a)
static

Definition at line 2291 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm256_movemask_ps ( __m256  __a)
static

Definition at line 2297 of file avxintrin.h.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_mul_pd ( __m256d  __a,
__m256d  __b 
)
static

Multiplies two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VMULPD / MULPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the operands.
__bA 256-bit vector of [4 x double] containing one of the operands.
Returns
A 256-bit vector of [4 x double] containing the products of both operands.

Definition at line 291 of file avxintrin.h.

References __b.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_mul_ps ( __m256  __a,
__m256  __b 
)
static

Multiplies two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VMULPS / MULPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the operands.
__bA 256-bit vector of [8 x float] containing one of the operands.
Returns
A 256-bit vector of [8 x float] containing the products of both operands.

Definition at line 309 of file avxintrin.h.

References __b.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_or_pd ( __m256d  __a,
__m256d  __b 
)
static

Performs a bitwise OR of two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VORPD / ORPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the source operands.
__bA 256-bit vector of [4 x double] containing one of the source operands.
Returns
A 256-bit vector of [4 x double] containing the bitwise OR of the values between both operands.

Definition at line 607 of file avxintrin.h.

References __b.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_or_ps ( __m256  __a,
__m256  __b 
)
static

Performs a bitwise OR of two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VORPS / ORPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the source operands.
__bA 256-bit vector of [8 x float] containing one of the source operands.
Returns
A 256-bit vector of [8 x float] containing the bitwise OR of the values between both operands.

Definition at line 625 of file avxintrin.h.

References __b.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_permutevar_pd ( __m256d  __a,
__m256i  __c 
)
static

Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector operand.

This intrinsic corresponds to the VPERMILPD / PERMILPD instruction.

Parameters
__aA 256-bit vector of [4 x double].
__cA 256-bit integer vector operand specifying how the values are to be copied. Bit [1]: 0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector. 1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector. Bit [65]: 0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector. 1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector. Bit [129]: 0: Bits [191:128] of the source are copied to bits [191:128] of the returned vector. 1: Bits [255:192] of the source are copied to bits [191:128] of the returned vector. Bit [193]: 0: Bits [191:128] of the source are copied to bits [255:192] of the returned vector. 1: Bits [255:192] of the source are copied to bits [255:192] of the returned vector.
Returns
A 256-bit vector of [4 x double] containing the copied values.

Definition at line 823 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_permutevar_ps ( __m256  __a,
__m256i  __c 
)
static

Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vector operand.

This intrinsic corresponds to the VPERMILPS / PERMILPS instruction.

Parameters
__aA 256-bit vector of [8 x float].
__cA 256-bit integer vector operand specifying how the values are to be copied. Bits [1:0]: 00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector. 01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector. 10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector. 11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector. Bits [33:32]: 00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector. 01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector. 10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector. 11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector. Bits [65:64]: 00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector. 01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector. 10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector. 11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector. Bits [97:96]: 00: Bits [31:0] of the source are copied to bits [127:96] of the returned vector. 01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector. 10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector. 11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector. Bits [129:128]: 00: Bits [159:128] of the source are copied to bits [159:128] of the returned vector. 01: Bits [191:160] of the source are copied to bits [159:128] of the returned vector. 10: Bits [223:192] of the source are copied to bits [159:128] of the returned vector. 11: Bits [255:224] of the source are copied to bits [159:128] of the returned vector. Bits [161:160]: 00: Bits [159:128] of the source are copied to bits [191:160] of the returned vector. 01: Bits [191:160] of the source are copied to bits [191:160] of the returned vector. 10: Bits [223:192] of the source are copied to bits [191:160] of the returned vector. 11: Bits [255:224] of the source are copied to bits [191:160] of the returned vector. Bits [193:192]: 00: Bits [159:128] of the source are copied to bits [223:192] of the returned vector. 01: Bits [191:160] of the source are copied to bits [223:192] of the returned vector. 10: Bits [223:192] of the source are copied to bits [223:192] of the returned vector. 11: Bits [255:224] of the source are copied to bits [223:192] of the returned vector. Bits [225:224]: 00: Bits [159:128] of the source are copied to bits [255:224] of the returned vector. 01: Bits [191:160] of the source are copied to bits [255:224] of the returned vector. 10: Bits [223:192] of the source are copied to bits [255:224] of the returned vector. 11: Bits [255:224] of the source are copied to bits [255:224] of the returned vector.
Returns
A 256-bit vector of [8 x float] containing the copied values.

Definition at line 969 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_rcp_ps ( __m256  __a)
static

Calculates the reciprocals of the values in a 256-bit vector of [8 x float].

This intrinsic corresponds to the VRCPPS / RCPPS instruction.

Parameters
__aA 256-bit vector of [8 x float].
Returns
A 256-bit vector of [8 x float] containing the reciprocals of the values in the operand.

Definition at line 377 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_rsqrt_ps ( __m256  __a)
static

Calculates the reciprocal square roots of the values in a 256-bit vector of [8 x float].

This intrinsic corresponds to the VRSQRTPS / RSQRTPS instruction.

Parameters
__aA 256-bit vector of [8 x float].
Returns
A 256-bit vector of [8 x float] containing the reciprocal square roots of the values in the operand.

Definition at line 360 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi16 ( short  __w)
static

Definition at line 2664 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi32 ( int  __i)
static

Definition at line 2658 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi64x ( long long  __q)
static

Definition at line 2679 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi8 ( char  __b)
static

Definition at line 2671 of file avxintrin.h.

References __b.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set1_pd ( double  __w)
static

Definition at line 2646 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set1_ps ( float  __w)
static

Definition at line 2652 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi16 ( short  __w15,
short  __w14,
short  __w13,
short  __w12,
short  __w11,
short  __w10,
short  __w09,
short  __w08,
short  __w07,
short  __w06,
short  __w05,
short  __w04,
short  __w03,
short  __w02,
short  __w01,
short  __w00 
)
static

Definition at line 2557 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi32 ( int  __i0,
int  __i1,
int  __i2,
int  __i3,
int  __i4,
int  __i5,
int  __i6,
int  __i7 
)
static

Definition at line 2550 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi64x ( long long  __a,
long long  __b,
long long  __c,
long long  __d 
)
static

Definition at line 2585 of file avxintrin.h.

References __b, and __c.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_epi8 ( char  __b31,
char  __b30,
char  __b29,
char  __b28,
char  __b27,
char  __b26,
char  __b25,
char  __b24,
char  __b23,
char  __b22,
char  __b21,
char  __b20,
char  __b19,
char  __b18,
char  __b17,
char  __b16,
char  __b15,
char  __b14,
char  __b13,
char  __b12,
char  __b11,
char  __b10,
char  __b09,
char  __b08,
char  __b07,
char  __b06,
char  __b05,
char  __b04,
char  __b03,
char  __b02,
char  __b01,
char  __b00 
)
static

Definition at line 2567 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set_m128 ( __m128  __hi,
__m128  __lo 
)
static
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_m128d ( __m128d  __hi,
__m128d  __lo 
)
static

Definition at line 2902 of file avxintrin.h.

References _mm256_set_m128().

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_m128i ( __m128i  __hi,
__m128i  __lo 
)
static

Definition at line 2907 of file avxintrin.h.

References _mm256_set_m128().

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_pd ( double  __a,
double  __b,
double  __c,
double  __d 
)
static

Definition at line 2537 of file avxintrin.h.

References __b, and __c.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_set_ps ( float  __a,
float  __b,
float  __c,
float  __d,
float  __e,
float  __f,
float  __g,
float  __h 
)
static

Definition at line 2543 of file avxintrin.h.

References __b, and __c.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi16 ( short  __w15,
short  __w14,
short  __w13,
short  __w12,
short  __w11,
short  __w10,
short  __w09,
short  __w08,
short  __w07,
short  __w06,
short  __w05,
short  __w04,
short  __w03,
short  __w02,
short  __w01,
short  __w00 
)
static

Definition at line 2612 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi32 ( int  __i0,
int  __i1,
int  __i2,
int  __i3,
int  __i4,
int  __i5,
int  __i6,
int  __i7 
)
static

Definition at line 2605 of file avxintrin.h.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi64x ( long long  __a,
long long  __b,
long long  __c,
long long  __d 
)
static

Definition at line 2639 of file avxintrin.h.

References __b, and __c.

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_epi8 ( char  __b31,
char  __b30,
char  __b29,
char  __b28,
char  __b27,
char  __b26,
char  __b25,
char  __b24,
char  __b23,
char  __b22,
char  __b21,
char  __b20,
char  __b19,
char  __b18,
char  __b17,
char  __b16,
char  __b15,
char  __b14,
char  __b13,
char  __b12,
char  __b11,
char  __b10,
char  __b09,
char  __b08,
char  __b07,
char  __b06,
char  __b05,
char  __b04,
char  __b03,
char  __b02,
char  __b01,
char  __b00 
)
static

Definition at line 2622 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setr_m128 ( __m128  __lo,
__m128  __hi 
)
static

Definition at line 2912 of file avxintrin.h.

References _mm256_set_m128().

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setr_m128d ( __m128d  __lo,
__m128d  __hi 
)
static

Definition at line 2917 of file avxintrin.h.

References _mm256_set_m128().

static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_m128i ( __m128i  __lo,
__m128i  __hi 
)
static

Definition at line 2922 of file avxintrin.h.

References _mm256_set_m128().

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setr_pd ( double  __a,
double  __b,
double  __c,
double  __d 
)
static

Definition at line 2592 of file avxintrin.h.

References __b, and __c.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setr_ps ( float  __a,
float  __b,
float  __c,
float  __d,
float  __e,
float  __f,
float  __g,
float  __h 
)
static

Definition at line 2598 of file avxintrin.h.

References __b, and __c.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd ( void  )
static
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps ( void  )
static
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256 ( void  )
static

Definition at line 2698 of file avxintrin.h.

Referenced by _mm256_abs_epi64(), _mm256_cvtpd_epi64(), _mm256_cvtpd_epu64(), _mm256_cvtps_epi64(), _mm256_cvtps_epu32(), _mm256_cvtps_epu64(), _mm256_cvttpd_epi64(), _mm256_cvttpd_epu64(), _mm256_cvttps_epi64(), _mm256_cvttps_epu32(), _mm256_cvttps_epu64(), _mm256_lzcnt_epi32(), _mm256_lzcnt_epi64(), _mm256_maskz_abs_epi16(), _mm256_maskz_abs_epi32(), _mm256_maskz_abs_epi64(), _mm256_maskz_abs_epi8(), _mm256_maskz_add_epi16(), _mm256_maskz_add_epi32(), _mm256_maskz_add_epi64(), _mm256_maskz_add_epi8(), _mm256_maskz_adds_epi16(), _mm256_maskz_adds_epi8(), _mm256_maskz_adds_epu16(), _mm256_maskz_adds_epu8(), _mm256_maskz_and_epi32(), _mm256_maskz_and_epi64(), _mm256_maskz_andnot_epi32(), _mm256_maskz_andnot_epi64(), _mm256_maskz_avg_epu16(), _mm256_maskz_avg_epu8(), _mm256_maskz_broadcast_i32x2(), _mm256_maskz_broadcast_i32x4(), _mm256_maskz_broadcast_i64x2(), _mm256_maskz_broadcastb_epi8(), _mm256_maskz_broadcastd_epi32(), _mm256_maskz_broadcastq_epi64(), _mm256_maskz_broadcastw_epi16(), _mm256_maskz_compress_epi32(), _mm256_maskz_compress_epi64(), _mm256_maskz_conflict_epi32(), _mm256_maskz_conflict_epi64(), _mm256_maskz_cvtepi16_epi32(), _mm256_maskz_cvtepi16_epi64(), _mm256_maskz_cvtepi32_epi64(), _mm256_maskz_cvtepi8_epi16(), _mm256_maskz_cvtepi8_epi32(), _mm256_maskz_cvtepi8_epi64(), _mm256_maskz_cvtepu16_epi32(), _mm256_maskz_cvtepu16_epi64(), _mm256_maskz_cvtepu32_epi64(), _mm256_maskz_cvtepu8_epi16(), _mm256_maskz_cvtepu8_epi32(), _mm256_maskz_cvtepu8_epi64(), _mm256_maskz_cvtpd_epi64(), _mm256_maskz_cvtpd_epu64(), _mm256_maskz_cvtps_epi32(), _mm256_maskz_cvtps_epi64(), _mm256_maskz_cvtps_epu32(), _mm256_maskz_cvtps_epu64(), _mm256_maskz_cvttpd_epi64(), _mm256_maskz_cvttpd_epu64(), _mm256_maskz_cvttps_epi32(), _mm256_maskz_cvttps_epi64(), _mm256_maskz_cvttps_epu32(), _mm256_maskz_cvttps_epu64(), _mm256_maskz_expand_epi32(), _mm256_maskz_expand_epi64(), _mm256_maskz_expandloadu_epi32(), _mm256_maskz_expandloadu_epi64(), _mm256_maskz_load_epi32(), _mm256_maskz_load_epi64(), _mm256_maskz_loadu_epi16(), _mm256_maskz_loadu_epi32(), _mm256_maskz_loadu_epi64(), _mm256_maskz_loadu_epi8(), _mm256_maskz_lzcnt_epi32(), _mm256_maskz_lzcnt_epi64(), _mm256_maskz_madd_epi16(), _mm256_maskz_maddubs_epi16(), _mm256_maskz_max_epi16(), _mm256_maskz_max_epi32(), _mm256_maskz_max_epi64(), _mm256_maskz_max_epi8(), _mm256_maskz_max_epu16(), _mm256_maskz_max_epu32(), _mm256_maskz_max_epu64(), _mm256_maskz_max_epu8(), _mm256_maskz_min_epi16(), _mm256_maskz_min_epi32(), _mm256_maskz_min_epi64(), _mm256_maskz_min_epi8(), _mm256_maskz_min_epu16(), _mm256_maskz_min_epu32(), _mm256_maskz_min_epu64(), _mm256_maskz_min_epu8(), _mm256_maskz_mov_epi16(), _mm256_maskz_mov_epi32(), _mm256_maskz_mov_epi64(), _mm256_maskz_mov_epi8(), _mm256_maskz_mul_epi32(), _mm256_maskz_mul_epu32(), _mm256_maskz_mulhi_epi16(), _mm256_maskz_mulhi_epu16(), _mm256_maskz_mulhrs_epi16(), _mm256_maskz_mullo_epi16(), _mm256_maskz_mullo_epi32(), _mm256_maskz_mullo_epi64(), _mm256_maskz_multishift_epi64_epi8(), _mm256_maskz_or_epi32(), _mm256_maskz_or_epi64(), _mm256_maskz_packs_epi16(), _mm256_maskz_packs_epi32(), _mm256_maskz_packus_epi16(), _mm256_maskz_packus_epi32(), _mm256_maskz_permutexvar_epi16(), _mm256_maskz_permutexvar_epi32(), _mm256_maskz_permutexvar_epi64(), _mm256_maskz_permutexvar_epi8(), _mm256_maskz_rolv_epi32(), _mm256_maskz_rolv_epi64(), _mm256_maskz_rorv_epi32(), _mm256_maskz_rorv_epi64(), _mm256_maskz_set1_epi16(), _mm256_maskz_set1_epi64(), _mm256_maskz_set1_epi8(), _mm256_maskz_shuffle_epi8(), _mm256_maskz_sll_epi16(), _mm256_maskz_sll_epi32(), _mm256_maskz_sll_epi64(), _mm256_maskz_sllv_epi16(), _mm256_maskz_sllv_epi32(), _mm256_maskz_sllv_epi64(), _mm256_maskz_sra_epi16(), _mm256_maskz_sra_epi32(), _mm256_maskz_sra_epi64(), _mm256_maskz_srav_epi16(), _mm256_maskz_srav_epi32(), _mm256_maskz_srav_epi64(), _mm256_maskz_srl_epi16(), _mm256_maskz_srl_epi32(), _mm256_maskz_srl_epi64(), _mm256_maskz_srlv_epi16(), _mm256_maskz_srlv_epi32(), _mm256_maskz_srlv_epi64(), _mm256_maskz_sub_epi16(), _mm256_maskz_sub_epi32(), _mm256_maskz_sub_epi64(), _mm256_maskz_sub_epi8(), _mm256_maskz_subs_epi16(), _mm256_maskz_subs_epi8(), _mm256_maskz_subs_epu16(), _mm256_maskz_subs_epu8(), _mm256_maskz_unpackhi_epi16(), _mm256_maskz_unpackhi_epi32(), _mm256_maskz_unpackhi_epi64(), _mm256_maskz_unpackhi_epi8(), _mm256_maskz_unpacklo_epi16(), _mm256_maskz_unpacklo_epi32(), _mm256_maskz_unpacklo_epi64(), _mm256_maskz_unpacklo_epi8(), _mm256_maskz_xor_epi32(), _mm256_maskz_xor_epi64(), _mm256_max_epi64(), _mm256_max_epu64(), _mm256_min_epi64(), _mm256_min_epu64(), _mm256_rolv_epi32(), _mm256_rolv_epi64(), _mm256_rorv_epi32(), _mm256_rorv_epi64(), _mm256_sllv_epi16(), _mm256_sra_epi64(), _mm256_srav_epi16(), _mm256_srav_epi64(), _mm256_srlv_epi16(), _mm512_cvtepi16_epi8(), _mm512_cvtsepi16_epi8(), _mm512_cvttpd_epi32(), _mm512_cvtusepi16_epi8(), _mm512_maskz_cvtepi16_epi8(), _mm512_maskz_cvtepi32_epi16(), _mm512_maskz_cvtepi64_epi32(), _mm512_maskz_cvtpd_epi32(), _mm512_maskz_cvtpd_epu32(), _mm512_maskz_cvtsepi16_epi8(), _mm512_maskz_cvtsepi32_epi16(), _mm512_maskz_cvtsepi64_epi32(), _mm512_maskz_cvttpd_epi32(), _mm512_maskz_cvttpd_epu32(), _mm512_maskz_cvtusepi16_epi8(), _mm512_maskz_cvtusepi32_epi16(), and _mm512_maskz_cvtusepi64_epi32().

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd ( __m256d  __a)
static

Calculates the square roots of the values in a 256-bit vector of [4 x double].

This intrinsic corresponds to the VSQRTPD / SQRTPD instruction.

Parameters
__aA 256-bit vector of [4 x double].
Returns
A 256-bit vector of [4 x double] containing the square roots of the values in the operand.

Definition at line 326 of file avxintrin.h.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps ( __m256  __a)
static

Calculates the square roots of the values in a 256-bit vector of [8 x float].

This intrinsic corresponds to the VSQRTPS / SQRTPS instruction.

Parameters
__aA 256-bit vector of [8 x float].
Returns
A 256-bit vector of [8 x float] containing the square roots of the values in the operand.

Definition at line 343 of file avxintrin.h.

static __inline void __DEFAULT_FN_ATTRS _mm256_store_pd ( double *  __p,
__m256d  __a 
)
static

Definition at line 2403 of file avxintrin.h.

static __inline void __DEFAULT_FN_ATTRS _mm256_store_ps ( float *  __p,
__m256  __a 
)
static

Definition at line 2409 of file avxintrin.h.

static __inline void __DEFAULT_FN_ATTRS _mm256_store_si256 ( __m256i *  __p,
__m256i  __a 
)
static

Definition at line 2433 of file avxintrin.h.

static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128 ( float *  __addr_hi,
float *  __addr_lo,
__m256  __a 
)
static

Definition at line 2864 of file avxintrin.h.

References _mm256_castps256_ps128(), _mm256_extractf128_ps, and _mm_storeu_ps().

static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128d ( double *  __addr_hi,
double *  __addr_lo,
__m256d  __a 
)
static

Definition at line 2875 of file avxintrin.h.

References _mm256_castpd256_pd128(), _mm256_extractf128_pd, and _mm_storeu_pd().

static __inline void __DEFAULT_FN_ATTRS _mm256_storeu2_m128i ( __m128i *  __addr_hi,
__m128i *  __addr_lo,
__m256i  __a 
)
static
static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_pd ( double *  __p,
__m256d  __a 
)
static

Definition at line 2415 of file avxintrin.h.

static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_ps ( float *  __p,
__m256  __a 
)
static

Definition at line 2424 of file avxintrin.h.

static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_si256 ( __m256i *  __p,
__m256i  __a 
)
static

Definition at line 2439 of file avxintrin.h.

static __inline void __DEFAULT_FN_ATTRS _mm256_stream_pd ( double *  __a,
__m256d  __b 
)
static

Definition at line 2506 of file avxintrin.h.

static __inline void __DEFAULT_FN_ATTRS _mm256_stream_ps ( float *  __p,
__m256  __a 
)
static

Definition at line 2512 of file avxintrin.h.

static __inline void __DEFAULT_FN_ATTRS _mm256_stream_si256 ( __m256i *  __a,
__m256i  __b 
)
static

Definition at line 2500 of file avxintrin.h.

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sub_pd ( __m256d  __a,
__m256d  __b 
)
static

Subtracts two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VSUBPD / SUBPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing the minuend.
__bA 256-bit vector of [4 x double] containing the subtrahend.
Returns
A 256-bit vector of [4 x double] containing the differences between both operands.

Definition at line 105 of file avxintrin.h.

References __b.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sub_ps ( __m256  __a,
__m256  __b 
)
static

Subtracts two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VSUBPS / SUBPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing the minuend.
__bA 256-bit vector of [8 x float] containing the subtrahend.
Returns
A 256-bit vector of [8 x float] containing the differences between both operands.

Definition at line 123 of file avxintrin.h.

References __b.

static __inline int __DEFAULT_FN_ATTRS _mm256_testc_pd ( __m256d  __a,
__m256d  __b 
)
static

Definition at line 2242 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm256_testc_ps ( __m256  __a,
__m256  __b 
)
static

Definition at line 2260 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm256_testc_si256 ( __m256i  __a,
__m256i  __b 
)
static

Definition at line 2278 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_pd ( __m256d  __a,
__m256d  __b 
)
static

Definition at line 2248 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_ps ( __m256  __a,
__m256  __b 
)
static

Definition at line 2266 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm256_testnzc_si256 ( __m256i  __a,
__m256i  __b 
)
static

Definition at line 2284 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm256_testz_pd ( __m256d  __a,
__m256d  __b 
)
static

Definition at line 2236 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm256_testz_ps ( __m256  __a,
__m256  __b 
)
static

Definition at line 2254 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm256_testz_si256 ( __m256i  __a,
__m256i  __b 
)
static

Definition at line 2272 of file avxintrin.h.

static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd ( void  )
static

Definition at line 2519 of file avxintrin.h.

Referenced by _mm256_broadcast_f32x4(), and _mm256_broadcast_f64x2().

static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps ( void  )
static

Definition at line 2525 of file avxintrin.h.

Referenced by _mm256_broadcast_f32x2(), and _mm512_cvtpd_ps().

static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256 ( void  )
static
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpackhi_pd ( __m256d  __a,
__m256d  __b 
)
static

Definition at line 2175 of file avxintrin.h.

Referenced by _mm256_mask_unpackhi_pd(), and _mm256_maskz_unpackhi_pd().

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpackhi_ps ( __m256  __a,
__m256  __b 
)
static

Definition at line 2187 of file avxintrin.h.

Referenced by _mm256_mask_unpackhi_ps(), and _mm256_maskz_unpackhi_ps().

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpacklo_pd ( __m256d  __a,
__m256d  __b 
)
static

Definition at line 2181 of file avxintrin.h.

Referenced by _mm256_mask_unpacklo_pd(), and _mm256_maskz_unpacklo_pd().

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpacklo_ps ( __m256  __a,
__m256  __b 
)
static

Definition at line 2193 of file avxintrin.h.

Referenced by _mm256_mask_unpacklo_ps(), and _mm256_maskz_unpacklo_ps().

static __inline __m256d __DEFAULT_FN_ATTRS _mm256_xor_pd ( __m256d  __a,
__m256d  __b 
)
static

Performs a bitwise XOR of two 256-bit vectors of [4 x double].

This intrinsic corresponds to the VXORPD / XORPD instruction.

Parameters
__aA 256-bit vector of [4 x double] containing one of the source operands.
__bA 256-bit vector of [4 x double] containing one of the source operands.
Returns
A 256-bit vector of [4 x double] containing the bitwise XOR of the values between both operands.

Definition at line 643 of file avxintrin.h.

References __b.

static __inline __m256 __DEFAULT_FN_ATTRS _mm256_xor_ps ( __m256  __a,
__m256  __b 
)
static

Performs a bitwise XOR of two 256-bit vectors of [8 x float].

This intrinsic corresponds to the VXORPS / XORPS instruction.

Parameters
__aA 256-bit vector of [8 x float] containing one of the source operands.
__bA 256-bit vector of [8 x float] containing one of the source operands.
Returns
A 256-bit vector of [8 x float] containing the bitwise XOR of the values between both operands.

Definition at line 661 of file avxintrin.h.

References __b.

static __inline void __DEFAULT_FN_ATTRS _mm256_zeroall ( void  )
static

Definition at line 2304 of file avxintrin.h.

static __inline void __DEFAULT_FN_ATTRS _mm256_zeroupper ( void  )
static

Definition at line 2310 of file avxintrin.h.

static __inline __m128 __DEFAULT_FN_ATTRS _mm_broadcast_ss ( float const *  __a)
static

Definition at line 2317 of file avxintrin.h.

static __inline __m128d __DEFAULT_FN_ATTRS _mm_maskload_pd ( double const *  __p,
__m128i  __m 
)
static

Definition at line 2449 of file avxintrin.h.

static __inline __m128 __DEFAULT_FN_ATTRS _mm_maskload_ps ( float const *  __p,
__m128i  __m 
)
static

Definition at line 2462 of file avxintrin.h.

static __inline void __DEFAULT_FN_ATTRS _mm_maskstore_pd ( double *  __p,
__m128i  __m,
__m128d  __a 
)
static

Definition at line 2481 of file avxintrin.h.

static __inline void __DEFAULT_FN_ATTRS _mm_maskstore_ps ( float *  __p,
__m128i  __m,
__m128  __a 
)
static

Definition at line 2493 of file avxintrin.h.

static __inline __m128d __DEFAULT_FN_ATTRS _mm_permutevar_pd ( __m128d  __a,
__m128i  __c 
)
static

Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector operand.

This intrinsic corresponds to the VPERMILPD / PERMILPD instruction.

Parameters
__aA 128-bit vector of [2 x double].
__cA 128-bit integer vector operand specifying how the values are to be copied. Bit [1]: 0: Bits [63:0] of the source are copied to bits [63:0] of the returned vector. 1: Bits [127:64] of the source are copied to bits [63:0] of the returned vector. Bit [65]: 0: Bits [63:0] of the source are copied to bits [127:64] of the returned vector. 1: Bits [127:64] of the source are copied to bits [127:64] of the returned vector.
Returns
A 128-bit vector of [2 x double] containing the copied values.

Definition at line 784 of file avxintrin.h.

static __inline __m128 __DEFAULT_FN_ATTRS _mm_permutevar_ps ( __m128  __a,
__m128i  __c 
)
static

Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vector operand.

This intrinsic corresponds to the VPERMILPS / PERMILPS instruction.

Parameters
__aA 128-bit vector of [4 x float].
__cA 128-bit integer vector operand specifying how the values are to be copied. Bits [1:0]: 00: Bits [31:0] of the source are copied to bits [31:0] of the returned vector. 01: Bits [63:32] of the source are copied to bits [31:0] of the returned vector. 10: Bits [95:64] of the source are copied to bits [31:0] of the returned vector. 11: Bits [127:96] of the source are copied to bits [31:0] of the returned vector. Bits [33:32]: 00: Bits [31:0] of the source are copied to bits [63:32] of the returned vector. 01: Bits [63:32] of the source are copied to bits [63:32] of the returned vector. 10: Bits [95:64] of the source are copied to bits [63:32] of the returned vector. 11: Bits [127:96] of the source are copied to bits [63:32] of the returned vector. Bits [65:64]: 00: Bits [31:0] of the source are copied to bits [95:64] of the returned vector. 01: Bits [63:32] of the source are copied to bits [95:64] of the returned vector. 10: Bits [95:64] of the source are copied to bits [95:64] of the returned vector. 11: Bits [127:96] of the source are copied to bits [95:64] of the returned vector. Bits [97:96]: 00: Bits [31:0] of the source are copied to bits [127:96] of the returned vector. 01: Bits [63:32] of the source are copied to bits [127:96] of the returned vector. 10: Bits [95:64] of the source are copied to bits [127:96] of the returned vector. 11: Bits [127:96] of the source are copied to bits [127:96] of the returned vector.
Returns
A 128-bit vector of [4 x float] containing the copied values.

Definition at line 878 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm_testc_pd ( __m128d  __a,
__m128d  __b 
)
static

Definition at line 2206 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm_testc_ps ( __m128  __a,
__m128  __b 
)
static

Definition at line 2224 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm_testnzc_pd ( __m128d  __a,
__m128d  __b 
)
static

Definition at line 2212 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm_testnzc_ps ( __m128  __a,
__m128  __b 
)
static

Definition at line 2230 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm_testz_pd ( __m128d  __a,
__m128d  __b 
)
static

Definition at line 2200 of file avxintrin.h.

static __inline int __DEFAULT_FN_ATTRS _mm_testz_ps ( __m128  __a,
__m128  __b 
)
static

Definition at line 2218 of file avxintrin.h.