30 typedef long long __m128i
__attribute__((__vector_size__(16)));
34 typedef long long __v2di
__attribute__ ((__vector_size__ (16)));
39 typedef unsigned long long __v2du
__attribute__ ((__vector_size__ (16)));
40 typedef unsigned short __v8hu
__attribute__((__vector_size__(16)));
41 typedef unsigned char __v16qu
__attribute__((__vector_size__(16)));
45 typedef signed char __v16qs
__attribute__((__vector_size__(16)));
50 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
62 return (__m128d)((__v2df)__a + (__v2df)
__b);
75 return (__m128d)((__v2df)__a - (__v2df)
__b);
88 return (__m128d)((__v2df)__a * (__v2df)
__b);
101 return (__m128d)((__v2df)__a / (__v2df)
__b);
107 __m128d
__c = __builtin_ia32_sqrtsd((__v2df)__b);
108 return (__m128d) { __c[0], __a[1] };
114 return __builtin_ia32_sqrtpd((__v2df)__a);
120 return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);
126 return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);
132 return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);
138 return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);
144 return (__m128d)((__v4su)__a & (__v4su)
__b);
150 return (__m128d)(~(__v4su)__a & (__v4su)
__b);
156 return (__m128d)((__v4su)__a | (__v4su)
__b);
162 return (__m128d)((__v4su)__a ^ (__v4su)
__b);
168 return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);
174 return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);
180 return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);
186 return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);
192 return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);
198 return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);
204 return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);
210 return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);
216 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);
222 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);
228 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);
234 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);
240 return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);
246 return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);
252 return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);
258 __m128d
__c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);
259 return (__m128d) { __c[0], __a[1] };
265 __m128d
__c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);
266 return (__m128d) { __c[0], __a[1] };
272 return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);
278 return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);
284 return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);
290 return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);
296 return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);
302 __m128d
__c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);
303 return (__m128d) { __c[0], __a[1] };
309 __m128d
__c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);
310 return (__m128d) { __c[0], __a[1] };
316 return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);
322 return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);
328 return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);
334 return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);
340 return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);
346 return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);
352 return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);
358 return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);
364 return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);
370 return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);
376 return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);
382 return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);
388 return __builtin_ia32_cvtpd2ps((__v2df)__a);
394 return (__m128d) __builtin_convertvector(
395 __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);
401 return (__m128d) __builtin_convertvector(
402 __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);
408 return __builtin_ia32_cvtpd2dq((__v2df)__a);
414 return __builtin_ia32_cvtsd2si((__v2df)__a);
420 return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
440 return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a);
446 return __builtin_ia32_cvttsd2si((__v2df)__a);
452 return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);
458 return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);
464 return __builtin_ia32_cvtpi2pd((__v2si)__a);
476 return *(__m128d*)__dp;
482 struct __mm_load1_pd_struct {
485 double __u = ((
struct __mm_load1_pd_struct*)__dp)->__u;
486 return (__m128d){ __u, __u };
489 #define _mm_load_pd1(dp) _mm_load1_pd(dp)
494 __m128d __u = *(__m128d*)__dp;
495 return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);
504 return ((
struct __loadu_pd*)__dp)->__v;
510 struct __loadu_si64 {
513 long long __u = ((
struct __loadu_si64*)__a)->__v;
514 return (__m128i){__u, 0L};
520 struct __mm_load_sd_struct {
523 double __u = ((
struct __mm_load_sd_struct*)__dp)->__u;
524 return (__m128d){ __u, 0 };
530 struct __mm_loadh_pd_struct {
533 double __u = ((
struct __mm_loadh_pd_struct*)__dp)->__u;
534 return (__m128d){ __a[0], __u };
540 struct __mm_loadl_pd_struct {
543 double __u = ((
struct __mm_loadl_pd_struct*)__dp)->__u;
544 return (__m128d){ __u, __a[1] };
550 return (__m128d)__builtin_ia32_undef128();
556 return (__m128d){ __w, 0 };
562 return (__m128d){ __w, __w };
568 return (__m128d){
__x, __w };
574 return (__m128d){ __w, __x };
580 return (__m128d){ 0, 0 };
586 return (__m128d){ __b[0], __a[1] };
592 struct __mm_store_sd_struct {
595 ((
struct __mm_store_sd_struct*)__dp)->__u = __a[0];
601 *(__m128d*)__dp = __a;
607 __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
623 ((
struct __storeu_pd*)__dp)->__v = __a;
629 __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0);
630 *(__m128d *)__dp = __a;
636 struct __mm_storeh_pd_struct {
639 ((
struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
645 struct __mm_storeh_pd_struct {
648 ((
struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
654 return (__m128i)((__v16qu)__a + (__v16qu)
__b);
660 return (__m128i)((__v8hu)__a + (__v8hu)
__b);
666 return (__m128i)((__v4su)__a + (__v4su)
__b);
672 return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);
678 return (__m128i)((__v2du)__a + (__v2du)
__b);
684 return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
690 return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
696 return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
702 return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
708 return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
714 return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
720 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
726 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
732 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
738 return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
744 return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
750 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
756 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
775 return (__m128i)((__v8hu)__a * (__v8hu)
__b);
794 return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
813 return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
835 return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
853 return (__m128i)((__v16qu)__a - (__v16qu)
__b);
871 return (__m128i)((__v8hu)__a - (__v8hu)
__b);
889 return (__m128i)((__v4su)__a - (__v4su)
__b);
908 return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);
926 return (__m128i)((__v2du)__a - (__v2du)
__b);
947 return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
968 return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
988 return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
1008 return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
1026 return (__m128i)((__v2du)__a & (__v2du)
__b);
1046 return (__m128i)(~(__v2du)__a & (__v2du)
__b);
1063 return (__m128i)((__v2du)__a | (__v2du)
__b);
1081 return (__m128i)((__v2du)__a ^ (__v2du)
__b);
1101 #define _mm_slli_si128(a, imm) __extension__ ({ \
1102 (__m128i)__builtin_shufflevector( \
1103 (__v16qi)_mm_setzero_si128(), \
1104 (__v16qi)(__m128i)(a), \
1105 ((char)(imm)&0xF0) ? 0 : 16 - (char)(imm), \
1106 ((char)(imm)&0xF0) ? 1 : 17 - (char)(imm), \
1107 ((char)(imm)&0xF0) ? 2 : 18 - (char)(imm), \
1108 ((char)(imm)&0xF0) ? 3 : 19 - (char)(imm), \
1109 ((char)(imm)&0xF0) ? 4 : 20 - (char)(imm), \
1110 ((char)(imm)&0xF0) ? 5 : 21 - (char)(imm), \
1111 ((char)(imm)&0xF0) ? 6 : 22 - (char)(imm), \
1112 ((char)(imm)&0xF0) ? 7 : 23 - (char)(imm), \
1113 ((char)(imm)&0xF0) ? 8 : 24 - (char)(imm), \
1114 ((char)(imm)&0xF0) ? 9 : 25 - (char)(imm), \
1115 ((char)(imm)&0xF0) ? 10 : 26 - (char)(imm), \
1116 ((char)(imm)&0xF0) ? 11 : 27 - (char)(imm), \
1117 ((char)(imm)&0xF0) ? 12 : 28 - (char)(imm), \
1118 ((char)(imm)&0xF0) ? 13 : 29 - (char)(imm), \
1119 ((char)(imm)&0xF0) ? 14 : 30 - (char)(imm), \
1120 ((char)(imm)&0xF0) ? 15 : 31 - (char)(imm)); })
1122 #define _mm_bslli_si128(a, imm) \
1123 _mm_slli_si128((a), (imm))
1141 return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
1160 return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
1179 return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
1198 return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
1217 return __builtin_ia32_psllqi128((__v2di)__a, __count);
1236 return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);
1256 return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
1276 return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
1296 return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
1316 return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
1336 #define _mm_srli_si128(a, imm) __extension__ ({ \
1337 (__m128i)__builtin_shufflevector( \
1338 (__v16qi)(__m128i)(a), \
1339 (__v16qi)_mm_setzero_si128(), \
1340 ((char)(imm)&0xF0) ? 16 : (char)(imm) + 0, \
1341 ((char)(imm)&0xF0) ? 17 : (char)(imm) + 1, \
1342 ((char)(imm)&0xF0) ? 18 : (char)(imm) + 2, \
1343 ((char)(imm)&0xF0) ? 19 : (char)(imm) + 3, \
1344 ((char)(imm)&0xF0) ? 20 : (char)(imm) + 4, \
1345 ((char)(imm)&0xF0) ? 21 : (char)(imm) + 5, \
1346 ((char)(imm)&0xF0) ? 22 : (char)(imm) + 6, \
1347 ((char)(imm)&0xF0) ? 23 : (char)(imm) + 7, \
1348 ((char)(imm)&0xF0) ? 24 : (char)(imm) + 8, \
1349 ((char)(imm)&0xF0) ? 25 : (char)(imm) + 9, \
1350 ((char)(imm)&0xF0) ? 26 : (char)(imm) + 10, \
1351 ((char)(imm)&0xF0) ? 27 : (char)(imm) + 11, \
1352 ((char)(imm)&0xF0) ? 28 : (char)(imm) + 12, \
1353 ((char)(imm)&0xF0) ? 29 : (char)(imm) + 13, \
1354 ((char)(imm)&0xF0) ? 30 : (char)(imm) + 14, \
1355 ((char)(imm)&0xF0) ? 31 : (char)(imm) + 15); })
1357 #define _mm_bsrli_si128(a, imm) \
1358 _mm_srli_si128((a), (imm))
1376 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
1395 return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
1414 return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
1433 return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
1452 return __builtin_ia32_psrlqi128((__v2di)__a, __count);
1471 return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);
1490 return (__m128i)((__v16qi)__a == (__v16qi)
__b);
1509 return (__m128i)((__v8hi)__a == (__v8hi)
__b);
1528 return (__m128i)((__v4si)__a == (__v4si)
__b);
1550 return (__m128i)((__v16qs)__a > (__v16qs)
__b);
1570 return (__m128i)((__v8hi)__a > (__v8hi)
__b);
1590 return (__m128i)((__v4si)__a > (__v4si)
__b);
1672 _mm_cvtsi64_sd(__m128d __a,
long long __b)
1690 _mm_cvtsd_si64(__m128d __a)
1692 return __builtin_ia32_cvtsd2si64((__v2df)__a);
1707 _mm_cvttsd_si64(__m128d __a)
1709 return __builtin_ia32_cvttsd2si64((__v2df)__a);
1725 return __builtin_ia32_cvtdq2ps((__v4si)__a);
1741 return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a);
1757 return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);
1773 return (__m128i)(__v4si){ __a, 0, 0, 0 };
1788 _mm_cvtsi64_si128(
long long __a)
1790 return (__m128i){ __a, 0 };
1808 __v4si
__b = (__v4si)__a;
1825 _mm_cvtsi128_si64(__m128i __a)
1860 struct __loadu_si128 {
1863 return ((
struct __loadu_si128*)__p)->__v;
1881 struct __mm_loadl_epi64_struct {
1884 return (__m128i) { ((
struct __mm_loadl_epi64_struct*)__p)->__u, 0};
1899 return (__m128i)__builtin_ia32_undef128();
1921 return (__m128i){ __q0, __q1 };
1943 return (__m128i){ (
long long)__q0, (
long long)__q1 };
1971 return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
2009 _mm_set_epi16(
short __w7,
short __w6,
short __w5,
short __w4,
short __w3,
short __w2,
short __w1,
short __w0)
2011 return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
2057 _mm_set_epi8(
char __b15,
char __b14,
char __b13,
char __b12,
char __b11,
char __b10,
char __b9,
char __b8,
char __b7,
char __b6,
char __b5,
char __b4,
char __b3,
char __b2,
char __b1,
char __b0)
2059 return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
2078 return (__m128i){ __q, __q };
2097 return (__m128i){ (
long long)__q, (
long long)__q };
2116 return (__m128i)(__v4si){ __i, __i, __i, __i };
2135 return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
2154 return (__m128i)(__v16qi){
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b, __b };
2160 return (__m128i){ (
long long)__q0, (
long long)__q1 };
2166 return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
2170 _mm_setr_epi16(
short __w0,
short __w1,
short __w2,
short __w3,
short __w4,
short __w5,
short __w6,
short __w7)
2172 return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
2176 _mm_setr_epi8(
char __b0,
char __b1,
char __b2,
char __b3,
char __b4,
char __b5,
char __b6,
char __b7,
char __b8,
char __b9,
char __b10,
char __b11,
char __b12,
char __b13,
char __b14,
char __b15)
2178 return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
2184 return (__m128i){ 0LL, 0LL };
2196 struct __storeu_si128 {
2199 ((
struct __storeu_si128*)__p)->__v =
__b;
2205 __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
2211 struct __mm_storel_epi64_struct {
2214 ((
struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
2220 __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p);
2226 __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);
2232 __builtin_ia32_movnti(__p, __a);
2237 _mm_stream_si64(
long long *
__p,
long long __a)
2239 __builtin_ia32_movnti64(__p, __a);
2246 __builtin_ia32_clflush(__p);
2252 __builtin_ia32_lfence();
2258 __builtin_ia32_mfence();
2264 return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
2270 return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
2276 return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
2282 __v8hi
__b = (__v8hi)__a;
2283 return (
unsigned short)__b[__imm & 7];
2289 __v8hi
__c = (__v8hi)__a;
2290 __c[__imm & 7] =
__b;
2291 return (__m128i)
__c;
2297 return __builtin_ia32_pmovmskb128((__v16qi)__a);
2300 #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
2301 (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
2302 (__v4si)_mm_undefined_si128(), \
2303 ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \
2304 ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3); })
2306 #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
2307 (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
2308 (__v8hi)_mm_undefined_si128(), \
2309 ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \
2310 ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3, \
2313 #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
2314 (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
2315 (__v8hi)_mm_undefined_si128(), \
2317 4 + (((imm) >> 0) & 0x3), \
2318 4 + (((imm) >> 2) & 0x3), \
2319 4 + (((imm) >> 4) & 0x3), \
2320 4 + (((imm) >> 6) & 0x3)); })
2325 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
2331 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
2337 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
2343 return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1);
2349 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
2355 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
2361 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
2367 return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0);
2373 return (__m64)__a[0];
2379 return (__m128i){ (
long long)__a, 0 };
2385 return __builtin_shufflevector((__v2di)__a, (__m128i){ 0 }, 0, 2);
2391 return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);
2397 return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0);
2403 return __builtin_ia32_movmskpd((__v2df)__a);
2406 #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
2407 (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
2408 0 + (((i) >> 0) & 0x1), \
2409 2 + (((i) >> 1) & 0x1)); })
2420 return (__m128i)__a;
2426 return (__m128d)__a;
2432 return (__m128i)__a;
2444 return (__m128d)__a;
2450 __builtin_ia32_pause();
2453 #undef __DEFAULT_FN_ATTRS
2455 #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, int __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a)
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
Initializes the 16-bit values in a 128-bit vector of [8 x i16] with the specified 16-bit integer valu...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, int __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q)
Initializes both values in a 128-bit vector of [2 x i64] with the specified 64-bit value...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q)
Initializes both values in a 128-bit integer vector with the specified 64-bit integer value...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w)
static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, __m128d __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, __m128d __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w)
static __inline__ void __DEFAULT_FN_ATTRS _mm_clflush(void const *__p)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, __m128d __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si64(void const *__a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b)
Subtracts the corresponding elements of two [2 x i64] vectors.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, __m128i __b)
Subtracts the corresponding 8-bit integer values in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a, double const *__dp)
static __inline__ int __DEFAULT_FN_ATTRS _mm_extract_epi16(__m128i __a, int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit unsigned integer values in the input and returns the differences in th...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding 16-bit values of the 128-bit integer vectors for equality...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a, __m128d __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
double __m128d __attribute__((__vector_size__(16)))
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding signed 16-bit values of the 128-bit integer vectors to determine i...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, __m64 __q0)
Initializes both 64-bit values in a 128-bit vector of [2 x i64] with the specified 64-bit integer val...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadl_epi64(__m128i const *__p)
Returns a vector of [2 x i64] where the lower element is taken from the lower element of the operand...
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtpd_pi32(__m128d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding signed 32-bit values of the 128-bit integer vectors to determine i...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, long long __q0)
Initializes both 64-bit values in a 128-bit vector of [2 x i64] with the specified 64-bit integer val...
static __inline unsigned char unsigned int unsigned int unsigned int * __p
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp, __m128d __a)
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b)
Subtracts the corresponding 32-bit integer values in the operands.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding signed 32-bit values of the 128-bit integer vectors to determine i...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, double __x)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a)
Returns a vector of [4 x i32] where the lowest element is the input operand and the remaining element...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp, __m128d __a)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mfence(void)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32], truncating the result when it is inexact...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, double __x)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, __m128d __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_lfence(void)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding 32-bit values of the 128-bit integer vectors for equality...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, __m128d __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp, __m128d __a)
static __inline__ vector float vector float __b
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding signed 8-bit values of the 128-bit integer vectors to determine if...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
Initializes the 32-bit values in a 128-bit vector of [4 x i32] with the specified 32-bit integer valu...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding signed 16-bit values of the 128-bit integer vectors to determine i...
static __inline unsigned char unsigned int __x
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, __m128i __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, __m128d __a)
static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i *__p, __m128i __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a)
static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two [8 x short] vectors and returns a vector containing the ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_insert_epi16(__m128i __a, int __b, int __imm)
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a)
Moves the least significant 32 bits of a vector of [4 x i32] to a 32-bit signed integer value...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Initializes the 8-bit values in a 128-bit vector of [16 x i8] with the specified 8-bit integer values...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtpi32_pd(__m64 __a)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the two 64-bit integer vecto...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a, __m128d __b)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b)
Subtracts signed or unsigned 64-bit integer values and writes the difference to the corresponding bit...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, __m64 __q1)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp, __m128d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, int __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the corresponding elements o...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit signed integer values in the input and returns the differences in the ...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit unsigned integer values in the input and returns the differences in the...
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding signed 8-bit values of the 128-bit integer vectors to determine if...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, __m128i __b)
Computes the absolute differences of corresponding 8-bit integer values in two 128-bit vectors...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, __m128i __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, __m128d __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w)
Initializes all values in a 128-bit vector of [8 x i16] with the specified 16-bit value...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a, double const *__dp)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a, __m128d __b)
#define __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, __m128i __b)
Subtracts the corresponding 16-bit integer values in the operands.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a, __m128 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, __m128i __b)
Performs a bitwise exclusive OR of two 128-bit integer vectors.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit signed integer values in the input and returns the differences in the c...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, __m128i __b)
Performs a bitwise OR of two 128-bit integer vectors.
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si128(__m128i *__p, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si32(int *__p, int __a)
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp)
static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a)
static __inline__ void __DEFAULT_FN_ATTRS _mm_pause(void)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvttpd_pi32(__m128d __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x float].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b)
static __inline__ vector float vector float vector float __c
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp, __m128d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors, using the one's complement of the values conta...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a, __m128d __b)