30 #define __DEFAULT_FN_ATTRS \
31 __attribute__((__always_inline__, __nodebug__, __target__("sse3")))
48 return (__m128i)__builtin_ia32_lddqu((
char const *)__p);
67 return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
90 return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
113 return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
135 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
157 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);
176 return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
199 return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
222 return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
240 #define _mm_loaddup_pd(dp) _mm_load1_pd(dp)
258 return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
261 #define _MM_DENORMALS_ZERO_ON (0x0040)
262 #define _MM_DENORMALS_ZERO_OFF (0x0000)
264 #define _MM_DENORMALS_ZERO_MASK (0x0040)
266 #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
267 #define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
287 __builtin_ia32_monitor((
void *)__p, __extensions, __hints);
306 __builtin_ia32_mwait(__extensions, __hints);
309 #undef __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_lddqu_si128(__m128i const *__p)
Loads data from an unaligned memory location to elements in a 128-bit vector.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hadd_ps(__m128 __a, __m128 __b)
Horizontally adds the adjacent pairs of values contained in two 128-bit vectors of [4 x float]...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_moveldup_ps(__m128 __a)
Duplicates low-order (even-indexed) values from a 128-bit vector of [4 x float] to float values store...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hsub_pd(__m128d __a, __m128d __b)
Horizontally subtracts the pairs of values contained in two 128-bit vectors of [2 x double]...
static __inline unsigned char unsigned int unsigned int unsigned int * __p
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hsub_ps(__m128 __a, __m128 __b)
Horizontally subtracts the adjacent pairs of values contained in two 128-bit vectors of [4 x float]...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hadd_pd(__m128d __a, __m128d __b)
Horizontally adds the pairs of values contained in two 128-bit vectors of [2 x double].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_addsub_ps(__m128 __a, __m128 __b)
Adds the even-indexed values and subtracts the odd-indexed values of two 128-bit vectors of [4 x floa...
#define __DEFAULT_FN_ATTRS
static __inline__ vector float vector float __b
static __inline__ void __DEFAULT_FN_ATTRS _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
Establishes a linear address memory range to be monitored and puts the processor in the monitor event...
static __inline__ void __DEFAULT_FN_ATTRS _mm_mwait(unsigned __extensions, unsigned __hints)
Used with the MONITOR instruction to wait while the processor is in the monitor event pending state...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehdup_ps(__m128 __a)
Moves and duplicates high-order (odd-indexed) values from a 128-bit vector of [4 x float] to float va...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_movedup_pd(__m128d __a)
Moves and duplicates the double-precision value in the lower bits of a 128-bit vector of [2 x double]...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_addsub_pd(__m128d __a, __m128d __b)
Adds the even-indexed values and subtracts the odd-indexed values of two 128-bit vectors of [2 x doub...