clang  3.9.0
emmintrin.h
Go to the documentation of this file.
1 /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23 
24 #ifndef __EMMINTRIN_H
25 #define __EMMINTRIN_H
26 
27 #include <xmmintrin.h>
28 
29 typedef double __m128d __attribute__((__vector_size__(16)));
30 typedef long long __m128i __attribute__((__vector_size__(16)));
31 
32 /* Type defines. */
33 typedef double __v2df __attribute__ ((__vector_size__ (16)));
34 typedef long long __v2di __attribute__ ((__vector_size__ (16)));
35 typedef short __v8hi __attribute__((__vector_size__(16)));
36 typedef char __v16qi __attribute__((__vector_size__(16)));
37 
38 /* Unsigned types */
39 typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
40 typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
41 typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
42 
43 /* We need an explicitly signed variant for char. Note that this shouldn't
44  * appear in the interface though. */
45 typedef signed char __v16qs __attribute__((__vector_size__(16)));
46 
47 #include <f16cintrin.h>
48 
49 /* Define the default attributes for the functions in this file. */
50 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
51 
52 static __inline__ __m128d __DEFAULT_FN_ATTRS
53 _mm_add_sd(__m128d __a, __m128d __b)
54 {
55  __a[0] += __b[0];
56  return __a;
57 }
58 
59 static __inline__ __m128d __DEFAULT_FN_ATTRS
60 _mm_add_pd(__m128d __a, __m128d __b)
61 {
62  return (__m128d)((__v2df)__a + (__v2df)__b);
63 }
64 
65 static __inline__ __m128d __DEFAULT_FN_ATTRS
66 _mm_sub_sd(__m128d __a, __m128d __b)
67 {
68  __a[0] -= __b[0];
69  return __a;
70 }
71 
72 static __inline__ __m128d __DEFAULT_FN_ATTRS
73 _mm_sub_pd(__m128d __a, __m128d __b)
74 {
75  return (__m128d)((__v2df)__a - (__v2df)__b);
76 }
77 
78 static __inline__ __m128d __DEFAULT_FN_ATTRS
79 _mm_mul_sd(__m128d __a, __m128d __b)
80 {
81  __a[0] *= __b[0];
82  return __a;
83 }
84 
85 static __inline__ __m128d __DEFAULT_FN_ATTRS
86 _mm_mul_pd(__m128d __a, __m128d __b)
87 {
88  return (__m128d)((__v2df)__a * (__v2df)__b);
89 }
90 
91 static __inline__ __m128d __DEFAULT_FN_ATTRS
92 _mm_div_sd(__m128d __a, __m128d __b)
93 {
94  __a[0] /= __b[0];
95  return __a;
96 }
97 
98 static __inline__ __m128d __DEFAULT_FN_ATTRS
99 _mm_div_pd(__m128d __a, __m128d __b)
100 {
101  return (__m128d)((__v2df)__a / (__v2df)__b);
102 }
103 
104 static __inline__ __m128d __DEFAULT_FN_ATTRS
105 _mm_sqrt_sd(__m128d __a, __m128d __b)
106 {
107  __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b);
108  return (__m128d) { __c[0], __a[1] };
109 }
110 
111 static __inline__ __m128d __DEFAULT_FN_ATTRS
112 _mm_sqrt_pd(__m128d __a)
113 {
114  return __builtin_ia32_sqrtpd((__v2df)__a);
115 }
116 
117 static __inline__ __m128d __DEFAULT_FN_ATTRS
118 _mm_min_sd(__m128d __a, __m128d __b)
119 {
120  return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);
121 }
122 
123 static __inline__ __m128d __DEFAULT_FN_ATTRS
124 _mm_min_pd(__m128d __a, __m128d __b)
125 {
126  return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);
127 }
128 
129 static __inline__ __m128d __DEFAULT_FN_ATTRS
130 _mm_max_sd(__m128d __a, __m128d __b)
131 {
132  return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);
133 }
134 
135 static __inline__ __m128d __DEFAULT_FN_ATTRS
136 _mm_max_pd(__m128d __a, __m128d __b)
137 {
138  return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);
139 }
140 
141 static __inline__ __m128d __DEFAULT_FN_ATTRS
142 _mm_and_pd(__m128d __a, __m128d __b)
143 {
144  return (__m128d)((__v4su)__a & (__v4su)__b);
145 }
146 
147 static __inline__ __m128d __DEFAULT_FN_ATTRS
148 _mm_andnot_pd(__m128d __a, __m128d __b)
149 {
150  return (__m128d)(~(__v4su)__a & (__v4su)__b);
151 }
152 
153 static __inline__ __m128d __DEFAULT_FN_ATTRS
154 _mm_or_pd(__m128d __a, __m128d __b)
155 {
156  return (__m128d)((__v4su)__a | (__v4su)__b);
157 }
158 
159 static __inline__ __m128d __DEFAULT_FN_ATTRS
160 _mm_xor_pd(__m128d __a, __m128d __b)
161 {
162  return (__m128d)((__v4su)__a ^ (__v4su)__b);
163 }
164 
165 static __inline__ __m128d __DEFAULT_FN_ATTRS
166 _mm_cmpeq_pd(__m128d __a, __m128d __b)
167 {
168  return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);
169 }
170 
171 static __inline__ __m128d __DEFAULT_FN_ATTRS
172 _mm_cmplt_pd(__m128d __a, __m128d __b)
173 {
174  return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);
175 }
176 
177 static __inline__ __m128d __DEFAULT_FN_ATTRS
178 _mm_cmple_pd(__m128d __a, __m128d __b)
179 {
180  return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);
181 }
182 
183 static __inline__ __m128d __DEFAULT_FN_ATTRS
184 _mm_cmpgt_pd(__m128d __a, __m128d __b)
185 {
186  return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);
187 }
188 
189 static __inline__ __m128d __DEFAULT_FN_ATTRS
190 _mm_cmpge_pd(__m128d __a, __m128d __b)
191 {
192  return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);
193 }
194 
195 static __inline__ __m128d __DEFAULT_FN_ATTRS
196 _mm_cmpord_pd(__m128d __a, __m128d __b)
197 {
198  return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);
199 }
200 
201 static __inline__ __m128d __DEFAULT_FN_ATTRS
202 _mm_cmpunord_pd(__m128d __a, __m128d __b)
203 {
204  return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);
205 }
206 
207 static __inline__ __m128d __DEFAULT_FN_ATTRS
208 _mm_cmpneq_pd(__m128d __a, __m128d __b)
209 {
210  return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);
211 }
212 
213 static __inline__ __m128d __DEFAULT_FN_ATTRS
214 _mm_cmpnlt_pd(__m128d __a, __m128d __b)
215 {
216  return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);
217 }
218 
219 static __inline__ __m128d __DEFAULT_FN_ATTRS
220 _mm_cmpnle_pd(__m128d __a, __m128d __b)
221 {
222  return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);
223 }
224 
225 static __inline__ __m128d __DEFAULT_FN_ATTRS
226 _mm_cmpngt_pd(__m128d __a, __m128d __b)
227 {
228  return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);
229 }
230 
231 static __inline__ __m128d __DEFAULT_FN_ATTRS
232 _mm_cmpnge_pd(__m128d __a, __m128d __b)
233 {
234  return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);
235 }
236 
237 static __inline__ __m128d __DEFAULT_FN_ATTRS
238 _mm_cmpeq_sd(__m128d __a, __m128d __b)
239 {
240  return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);
241 }
242 
243 static __inline__ __m128d __DEFAULT_FN_ATTRS
244 _mm_cmplt_sd(__m128d __a, __m128d __b)
245 {
246  return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);
247 }
248 
249 static __inline__ __m128d __DEFAULT_FN_ATTRS
250 _mm_cmple_sd(__m128d __a, __m128d __b)
251 {
252  return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);
253 }
254 
255 static __inline__ __m128d __DEFAULT_FN_ATTRS
256 _mm_cmpgt_sd(__m128d __a, __m128d __b)
257 {
258  __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);
259  return (__m128d) { __c[0], __a[1] };
260 }
261 
262 static __inline__ __m128d __DEFAULT_FN_ATTRS
263 _mm_cmpge_sd(__m128d __a, __m128d __b)
264 {
265  __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);
266  return (__m128d) { __c[0], __a[1] };
267 }
268 
269 static __inline__ __m128d __DEFAULT_FN_ATTRS
270 _mm_cmpord_sd(__m128d __a, __m128d __b)
271 {
272  return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);
273 }
274 
275 static __inline__ __m128d __DEFAULT_FN_ATTRS
276 _mm_cmpunord_sd(__m128d __a, __m128d __b)
277 {
278  return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);
279 }
280 
281 static __inline__ __m128d __DEFAULT_FN_ATTRS
282 _mm_cmpneq_sd(__m128d __a, __m128d __b)
283 {
284  return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);
285 }
286 
287 static __inline__ __m128d __DEFAULT_FN_ATTRS
288 _mm_cmpnlt_sd(__m128d __a, __m128d __b)
289 {
290  return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);
291 }
292 
293 static __inline__ __m128d __DEFAULT_FN_ATTRS
294 _mm_cmpnle_sd(__m128d __a, __m128d __b)
295 {
296  return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);
297 }
298 
299 static __inline__ __m128d __DEFAULT_FN_ATTRS
300 _mm_cmpngt_sd(__m128d __a, __m128d __b)
301 {
302  __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);
303  return (__m128d) { __c[0], __a[1] };
304 }
305 
306 static __inline__ __m128d __DEFAULT_FN_ATTRS
307 _mm_cmpnge_sd(__m128d __a, __m128d __b)
308 {
309  __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);
310  return (__m128d) { __c[0], __a[1] };
311 }
312 
313 static __inline__ int __DEFAULT_FN_ATTRS
314 _mm_comieq_sd(__m128d __a, __m128d __b)
315 {
316  return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);
317 }
318 
319 static __inline__ int __DEFAULT_FN_ATTRS
320 _mm_comilt_sd(__m128d __a, __m128d __b)
321 {
322  return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);
323 }
324 
325 static __inline__ int __DEFAULT_FN_ATTRS
326 _mm_comile_sd(__m128d __a, __m128d __b)
327 {
328  return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);
329 }
330 
331 static __inline__ int __DEFAULT_FN_ATTRS
332 _mm_comigt_sd(__m128d __a, __m128d __b)
333 {
334  return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);
335 }
336 
337 static __inline__ int __DEFAULT_FN_ATTRS
338 _mm_comige_sd(__m128d __a, __m128d __b)
339 {
340  return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);
341 }
342 
343 static __inline__ int __DEFAULT_FN_ATTRS
344 _mm_comineq_sd(__m128d __a, __m128d __b)
345 {
346  return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);
347 }
348 
349 static __inline__ int __DEFAULT_FN_ATTRS
350 _mm_ucomieq_sd(__m128d __a, __m128d __b)
351 {
352  return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);
353 }
354 
355 static __inline__ int __DEFAULT_FN_ATTRS
356 _mm_ucomilt_sd(__m128d __a, __m128d __b)
357 {
358  return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);
359 }
360 
361 static __inline__ int __DEFAULT_FN_ATTRS
362 _mm_ucomile_sd(__m128d __a, __m128d __b)
363 {
364  return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);
365 }
366 
367 static __inline__ int __DEFAULT_FN_ATTRS
368 _mm_ucomigt_sd(__m128d __a, __m128d __b)
369 {
370  return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);
371 }
372 
373 static __inline__ int __DEFAULT_FN_ATTRS
374 _mm_ucomige_sd(__m128d __a, __m128d __b)
375 {
376  return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);
377 }
378 
379 static __inline__ int __DEFAULT_FN_ATTRS
380 _mm_ucomineq_sd(__m128d __a, __m128d __b)
381 {
382  return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);
383 }
384 
385 static __inline__ __m128 __DEFAULT_FN_ATTRS
386 _mm_cvtpd_ps(__m128d __a)
387 {
388  return __builtin_ia32_cvtpd2ps((__v2df)__a);
389 }
390 
391 static __inline__ __m128d __DEFAULT_FN_ATTRS
392 _mm_cvtps_pd(__m128 __a)
393 {
394  return (__m128d) __builtin_convertvector(
395  __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);
396 }
397 
398 static __inline__ __m128d __DEFAULT_FN_ATTRS
399 _mm_cvtepi32_pd(__m128i __a)
400 {
401  return (__m128d) __builtin_convertvector(
402  __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);
403 }
404 
405 static __inline__ __m128i __DEFAULT_FN_ATTRS
406 _mm_cvtpd_epi32(__m128d __a)
407 {
408  return __builtin_ia32_cvtpd2dq((__v2df)__a);
409 }
410 
411 static __inline__ int __DEFAULT_FN_ATTRS
412 _mm_cvtsd_si32(__m128d __a)
413 {
414  return __builtin_ia32_cvtsd2si((__v2df)__a);
415 }
416 
417 static __inline__ __m128 __DEFAULT_FN_ATTRS
418 _mm_cvtsd_ss(__m128 __a, __m128d __b)
419 {
420  return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
421 }
422 
423 static __inline__ __m128d __DEFAULT_FN_ATTRS
424 _mm_cvtsi32_sd(__m128d __a, int __b)
425 {
426  __a[0] = __b;
427  return __a;
428 }
429 
430 static __inline__ __m128d __DEFAULT_FN_ATTRS
431 _mm_cvtss_sd(__m128d __a, __m128 __b)
432 {
433  __a[0] = __b[0];
434  return __a;
435 }
436 
437 static __inline__ __m128i __DEFAULT_FN_ATTRS
438 _mm_cvttpd_epi32(__m128d __a)
439 {
440  return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a);
441 }
442 
443 static __inline__ int __DEFAULT_FN_ATTRS
444 _mm_cvttsd_si32(__m128d __a)
445 {
446  return __builtin_ia32_cvttsd2si((__v2df)__a);
447 }
448 
449 static __inline__ __m64 __DEFAULT_FN_ATTRS
450 _mm_cvtpd_pi32(__m128d __a)
451 {
452  return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);
453 }
454 
455 static __inline__ __m64 __DEFAULT_FN_ATTRS
456 _mm_cvttpd_pi32(__m128d __a)
457 {
458  return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);
459 }
460 
461 static __inline__ __m128d __DEFAULT_FN_ATTRS
462 _mm_cvtpi32_pd(__m64 __a)
463 {
464  return __builtin_ia32_cvtpi2pd((__v2si)__a);
465 }
466 
467 static __inline__ double __DEFAULT_FN_ATTRS
468 _mm_cvtsd_f64(__m128d __a)
469 {
470  return __a[0];
471 }
472 
473 static __inline__ __m128d __DEFAULT_FN_ATTRS
474 _mm_load_pd(double const *__dp)
475 {
476  return *(__m128d*)__dp;
477 }
478 
479 static __inline__ __m128d __DEFAULT_FN_ATTRS
480 _mm_load1_pd(double const *__dp)
481 {
482  struct __mm_load1_pd_struct {
483  double __u;
484  } __attribute__((__packed__, __may_alias__));
485  double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;
486  return (__m128d){ __u, __u };
487 }
488 
489 #define _mm_load_pd1(dp) _mm_load1_pd(dp)
490 
491 static __inline__ __m128d __DEFAULT_FN_ATTRS
492 _mm_loadr_pd(double const *__dp)
493 {
494  __m128d __u = *(__m128d*)__dp;
495  return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);
496 }
497 
498 static __inline__ __m128d __DEFAULT_FN_ATTRS
499 _mm_loadu_pd(double const *__dp)
500 {
501  struct __loadu_pd {
502  __m128d __v;
503  } __attribute__((__packed__, __may_alias__));
504  return ((struct __loadu_pd*)__dp)->__v;
505 }
506 
507 static __inline__ __m128i __DEFAULT_FN_ATTRS
508 _mm_loadu_si64(void const *__a)
509 {
510  struct __loadu_si64 {
511  long long __v;
512  } __attribute__((__packed__, __may_alias__));
513  long long __u = ((struct __loadu_si64*)__a)->__v;
514  return (__m128i){__u, 0L};
515 }
516 
517 static __inline__ __m128d __DEFAULT_FN_ATTRS
518 _mm_load_sd(double const *__dp)
519 {
520  struct __mm_load_sd_struct {
521  double __u;
522  } __attribute__((__packed__, __may_alias__));
523  double __u = ((struct __mm_load_sd_struct*)__dp)->__u;
524  return (__m128d){ __u, 0 };
525 }
526 
527 static __inline__ __m128d __DEFAULT_FN_ATTRS
528 _mm_loadh_pd(__m128d __a, double const *__dp)
529 {
530  struct __mm_loadh_pd_struct {
531  double __u;
532  } __attribute__((__packed__, __may_alias__));
533  double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;
534  return (__m128d){ __a[0], __u };
535 }
536 
537 static __inline__ __m128d __DEFAULT_FN_ATTRS
538 _mm_loadl_pd(__m128d __a, double const *__dp)
539 {
540  struct __mm_loadl_pd_struct {
541  double __u;
542  } __attribute__((__packed__, __may_alias__));
543  double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;
544  return (__m128d){ __u, __a[1] };
545 }
546 
547 static __inline__ __m128d __DEFAULT_FN_ATTRS
549 {
550  return (__m128d)__builtin_ia32_undef128();
551 }
552 
553 static __inline__ __m128d __DEFAULT_FN_ATTRS
554 _mm_set_sd(double __w)
555 {
556  return (__m128d){ __w, 0 };
557 }
558 
559 static __inline__ __m128d __DEFAULT_FN_ATTRS
560 _mm_set1_pd(double __w)
561 {
562  return (__m128d){ __w, __w };
563 }
564 
565 static __inline__ __m128d __DEFAULT_FN_ATTRS
566 _mm_set_pd(double __w, double __x)
567 {
568  return (__m128d){ __x, __w };
569 }
570 
571 static __inline__ __m128d __DEFAULT_FN_ATTRS
572 _mm_setr_pd(double __w, double __x)
573 {
574  return (__m128d){ __w, __x };
575 }
576 
577 static __inline__ __m128d __DEFAULT_FN_ATTRS
579 {
580  return (__m128d){ 0, 0 };
581 }
582 
583 static __inline__ __m128d __DEFAULT_FN_ATTRS
584 _mm_move_sd(__m128d __a, __m128d __b)
585 {
586  return (__m128d){ __b[0], __a[1] };
587 }
588 
589 static __inline__ void __DEFAULT_FN_ATTRS
590 _mm_store_sd(double *__dp, __m128d __a)
591 {
592  struct __mm_store_sd_struct {
593  double __u;
594  } __attribute__((__packed__, __may_alias__));
595  ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
596 }
597 
598 static __inline__ void __DEFAULT_FN_ATTRS
599 _mm_store_pd(double *__dp, __m128d __a)
600 {
601  *(__m128d*)__dp = __a;
602 }
603 
604 static __inline__ void __DEFAULT_FN_ATTRS
605 _mm_store1_pd(double *__dp, __m128d __a)
606 {
607  __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
608  _mm_store_pd(__dp, __a);
609 }
610 
611 static __inline__ void __DEFAULT_FN_ATTRS
612 _mm_store_pd1(double *__dp, __m128d __a)
613 {
614  return _mm_store1_pd(__dp, __a);
615 }
616 
617 static __inline__ void __DEFAULT_FN_ATTRS
618 _mm_storeu_pd(double *__dp, __m128d __a)
619 {
620  struct __storeu_pd {
621  __m128d __v;
622  } __attribute__((__packed__, __may_alias__));
623  ((struct __storeu_pd*)__dp)->__v = __a;
624 }
625 
626 static __inline__ void __DEFAULT_FN_ATTRS
627 _mm_storer_pd(double *__dp, __m128d __a)
628 {
629  __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0);
630  *(__m128d *)__dp = __a;
631 }
632 
633 static __inline__ void __DEFAULT_FN_ATTRS
634 _mm_storeh_pd(double *__dp, __m128d __a)
635 {
636  struct __mm_storeh_pd_struct {
637  double __u;
638  } __attribute__((__packed__, __may_alias__));
639  ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
640 }
641 
642 static __inline__ void __DEFAULT_FN_ATTRS
643 _mm_storel_pd(double *__dp, __m128d __a)
644 {
645  struct __mm_storeh_pd_struct {
646  double __u;
647  } __attribute__((__packed__, __may_alias__));
648  ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
649 }
650 
651 static __inline__ __m128i __DEFAULT_FN_ATTRS
652 _mm_add_epi8(__m128i __a, __m128i __b)
653 {
654  return (__m128i)((__v16qu)__a + (__v16qu)__b);
655 }
656 
657 static __inline__ __m128i __DEFAULT_FN_ATTRS
658 _mm_add_epi16(__m128i __a, __m128i __b)
659 {
660  return (__m128i)((__v8hu)__a + (__v8hu)__b);
661 }
662 
663 static __inline__ __m128i __DEFAULT_FN_ATTRS
664 _mm_add_epi32(__m128i __a, __m128i __b)
665 {
666  return (__m128i)((__v4su)__a + (__v4su)__b);
667 }
668 
669 static __inline__ __m64 __DEFAULT_FN_ATTRS
670 _mm_add_si64(__m64 __a, __m64 __b)
671 {
672  return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);
673 }
674 
675 static __inline__ __m128i __DEFAULT_FN_ATTRS
676 _mm_add_epi64(__m128i __a, __m128i __b)
677 {
678  return (__m128i)((__v2du)__a + (__v2du)__b);
679 }
680 
681 static __inline__ __m128i __DEFAULT_FN_ATTRS
682 _mm_adds_epi8(__m128i __a, __m128i __b)
683 {
684  return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
685 }
686 
687 static __inline__ __m128i __DEFAULT_FN_ATTRS
688 _mm_adds_epi16(__m128i __a, __m128i __b)
689 {
690  return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
691 }
692 
693 static __inline__ __m128i __DEFAULT_FN_ATTRS
694 _mm_adds_epu8(__m128i __a, __m128i __b)
695 {
696  return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
697 }
698 
699 static __inline__ __m128i __DEFAULT_FN_ATTRS
700 _mm_adds_epu16(__m128i __a, __m128i __b)
701 {
702  return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
703 }
704 
705 static __inline__ __m128i __DEFAULT_FN_ATTRS
706 _mm_avg_epu8(__m128i __a, __m128i __b)
707 {
708  return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
709 }
710 
711 static __inline__ __m128i __DEFAULT_FN_ATTRS
712 _mm_avg_epu16(__m128i __a, __m128i __b)
713 {
714  return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
715 }
716 
717 static __inline__ __m128i __DEFAULT_FN_ATTRS
718 _mm_madd_epi16(__m128i __a, __m128i __b)
719 {
720  return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
721 }
722 
723 static __inline__ __m128i __DEFAULT_FN_ATTRS
724 _mm_max_epi16(__m128i __a, __m128i __b)
725 {
726  return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
727 }
728 
729 static __inline__ __m128i __DEFAULT_FN_ATTRS
730 _mm_max_epu8(__m128i __a, __m128i __b)
731 {
732  return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
733 }
734 
735 static __inline__ __m128i __DEFAULT_FN_ATTRS
736 _mm_min_epi16(__m128i __a, __m128i __b)
737 {
738  return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
739 }
740 
741 static __inline__ __m128i __DEFAULT_FN_ATTRS
742 _mm_min_epu8(__m128i __a, __m128i __b)
743 {
744  return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
745 }
746 
747 static __inline__ __m128i __DEFAULT_FN_ATTRS
748 _mm_mulhi_epi16(__m128i __a, __m128i __b)
749 {
750  return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
751 }
752 
753 static __inline__ __m128i __DEFAULT_FN_ATTRS
754 _mm_mulhi_epu16(__m128i __a, __m128i __b)
755 {
756  return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
757 }
758 
759 /// \brief Multiplies the corresponding elements of two [8 x short] vectors and
760 /// returns a vector containing the low-order 16 bits of each 32-bit product
761 /// in the corresponding element.
762 ///
763 /// \headerfile <x86intrin.h>
764 ///
765 /// This intrinsic corresponds to the \c VPMULLW / PMULLW instruction.
766 ///
767 /// \param __a
768 /// A 128-bit integer vector containing one of the source operands.
769 /// \param __b
770 /// A 128-bit integer vector containing one of the source operands.
771 /// \returns A 128-bit integer vector containing the products of both operands.
772 static __inline__ __m128i __DEFAULT_FN_ATTRS
773 _mm_mullo_epi16(__m128i __a, __m128i __b)
774 {
775  return (__m128i)((__v8hu)__a * (__v8hu)__b);
776 }
777 
778 /// \brief Multiplies 32-bit unsigned integer values contained in the lower bits
779 /// of the two 64-bit integer vectors and returns the 64-bit unsigned
780 /// product.
781 ///
782 /// \headerfile <x86intrin.h>
783 ///
784 /// This intrinsic corresponds to the \c PMULUDQ instruction.
785 ///
786 /// \param __a
787 /// A 64-bit integer containing one of the source operands.
788 /// \param __b
789 /// A 64-bit integer containing one of the source operands.
790 /// \returns A 64-bit integer vector containing the product of both operands.
791 static __inline__ __m64 __DEFAULT_FN_ATTRS
792 _mm_mul_su32(__m64 __a, __m64 __b)
793 {
794  return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
795 }
796 
797 /// \brief Multiplies 32-bit unsigned integer values contained in the lower
798 /// bits of the corresponding elements of two [2 x i64] vectors, and returns
799 /// the 64-bit products in the corresponding elements of a [2 x i64] vector.
800 ///
801 /// \headerfile <x86intrin.h>
802 ///
803 /// This intrinsic corresponds to the \c VPMULUDQ / PMULUDQ instruction.
804 ///
805 /// \param __a
806 /// A [2 x i64] vector containing one of the source operands.
807 /// \param __b
808 /// A [2 x i64] vector containing one of the source operands.
809 /// \returns A [2 x i64] vector containing the product of both operands.
810 static __inline__ __m128i __DEFAULT_FN_ATTRS
811 _mm_mul_epu32(__m128i __a, __m128i __b)
812 {
813  return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
814 }
815 
816 /// \brief Computes the absolute differences of corresponding 8-bit integer
817 /// values in two 128-bit vectors. Sums the first 8 absolute differences, and
818 /// separately sums the second 8 absolute differences. Packss these two
819 /// unsigned 16-bit integer sums into the upper and lower elements of a
820 /// [2 x i64] vector.
821 ///
822 /// \headerfile <x86intrin.h>
823 ///
824 /// This intrinsic corresponds to the \c VPSADBW / PSADBW instruction.
825 ///
826 /// \param __a
827 /// A 128-bit integer vector containing one of the source operands.
828 /// \param __b
829 /// A 128-bit integer vector containing one of the source operands.
830 /// \returns A [2 x i64] vector containing the sums of the sets of absolute
831 /// differences between both operands.
832 static __inline__ __m128i __DEFAULT_FN_ATTRS
833 _mm_sad_epu8(__m128i __a, __m128i __b)
834 {
835  return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
836 }
837 
838 /// \brief Subtracts the corresponding 8-bit integer values in the operands.
839 ///
840 /// \headerfile <x86intrin.h>
841 ///
842 /// This intrinsic corresponds to the \c VPSUBB / PSUBB instruction.
843 ///
844 /// \param __a
845 /// A 128-bit integer vector containing the minuends.
846 /// \param __b
847 /// A 128-bit integer vector containing the subtrahends.
848 /// \returns A 128-bit integer vector containing the differences of the values
849 /// in the operands.
850 static __inline__ __m128i __DEFAULT_FN_ATTRS
851 _mm_sub_epi8(__m128i __a, __m128i __b)
852 {
853  return (__m128i)((__v16qu)__a - (__v16qu)__b);
854 }
855 
856 /// \brief Subtracts the corresponding 16-bit integer values in the operands.
857 ///
858 /// \headerfile <x86intrin.h>
859 ///
860 /// This intrinsic corresponds to the \c VPSUBW / PSUBW instruction.
861 ///
862 /// \param __a
863 /// A 128-bit integer vector containing the minuends.
864 /// \param __b
865 /// A 128-bit integer vector containing the subtrahends.
866 /// \returns A 128-bit integer vector containing the differences of the values
867 /// in the operands.
868 static __inline__ __m128i __DEFAULT_FN_ATTRS
869 _mm_sub_epi16(__m128i __a, __m128i __b)
870 {
871  return (__m128i)((__v8hu)__a - (__v8hu)__b);
872 }
873 
874 /// \brief Subtracts the corresponding 32-bit integer values in the operands.
875 ///
876 /// \headerfile <x86intrin.h>
877 ///
878 /// This intrinsic corresponds to the \c VPSUBD / PSUBD instruction.
879 ///
880 /// \param __a
881 /// A 128-bit integer vector containing the minuends.
882 /// \param __b
883 /// A 128-bit integer vector containing the subtrahends.
884 /// \returns A 128-bit integer vector containing the differences of the values
885 /// in the operands.
886 static __inline__ __m128i __DEFAULT_FN_ATTRS
887 _mm_sub_epi32(__m128i __a, __m128i __b)
888 {
889  return (__m128i)((__v4su)__a - (__v4su)__b);
890 }
891 
892 /// \brief Subtracts signed or unsigned 64-bit integer values and writes the
893 /// difference to the corresponding bits in the destination.
894 ///
895 /// \headerfile <x86intrin.h>
896 ///
897 /// This intrinsic corresponds to the \c PSUBQ instruction.
898 ///
899 /// \param __a
900 /// A 64-bit integer vector containing the minuend.
901 /// \param __b
902 /// A 64-bit integer vector containing the subtrahend.
903 /// \returns A 64-bit integer vector containing the difference of the values in
904 /// the operands.
905 static __inline__ __m64 __DEFAULT_FN_ATTRS
906 _mm_sub_si64(__m64 __a, __m64 __b)
907 {
908  return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);
909 }
910 
911 /// \brief Subtracts the corresponding elements of two [2 x i64] vectors.
912 ///
913 /// \headerfile <x86intrin.h>
914 ///
915 /// This intrinsic corresponds to the \c VPSUBQ / PSUBQ instruction.
916 ///
917 /// \param __a
918 /// A 128-bit integer vector containing the minuends.
919 /// \param __b
920 /// A 128-bit integer vector containing the subtrahends.
921 /// \returns A 128-bit integer vector containing the differences of the values
922 /// in the operands.
923 static __inline__ __m128i __DEFAULT_FN_ATTRS
924 _mm_sub_epi64(__m128i __a, __m128i __b)
925 {
926  return (__m128i)((__v2du)__a - (__v2du)__b);
927 }
928 
929 /// \brief Subtracts corresponding 8-bit signed integer values in the input and
930 /// returns the differences in the corresponding bytes in the destination.
931 /// Differences greater than 7Fh are saturated to 7Fh, and differences less
932 /// than 80h are saturated to 80h.
933 ///
934 /// \headerfile <x86intrin.h>
935 ///
936 /// This intrinsic corresponds to the \c VPSUBSB / PSUBSB instruction.
937 ///
938 /// \param __a
939 /// A 128-bit integer vector containing the minuends.
940 /// \param __b
941 /// A 128-bit integer vector containing the subtrahends.
942 /// \returns A 128-bit integer vector containing the differences of the values
943 /// in the operands.
944 static __inline__ __m128i __DEFAULT_FN_ATTRS
945 _mm_subs_epi8(__m128i __a, __m128i __b)
946 {
947  return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
948 }
949 
950 /// \brief Subtracts corresponding 16-bit signed integer values in the input and
951 /// returns the differences in the corresponding bytes in the destination.
952 /// Differences greater than 7FFFh are saturated to 7FFFh, and values less
953 /// than 8000h are saturated to 8000h.
954 ///
955 /// \headerfile <x86intrin.h>
956 ///
957 /// This intrinsic corresponds to the \c VPSUBSW / PSUBSW instruction.
958 ///
959 /// \param __a
960 /// A 128-bit integer vector containing the minuends.
961 /// \param __b
962 /// A 128-bit integer vector containing the subtrahends.
963 /// \returns A 128-bit integer vector containing the differences of the values
964 /// in the operands.
965 static __inline__ __m128i __DEFAULT_FN_ATTRS
966 _mm_subs_epi16(__m128i __a, __m128i __b)
967 {
968  return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
969 }
970 
971 /// \brief Subtracts corresponding 8-bit unsigned integer values in the input
972 /// and returns the differences in the corresponding bytes in the
973 /// destination. Differences less than 00h are saturated to 00h.
974 ///
975 /// \headerfile <x86intrin.h>
976 ///
977 /// This intrinsic corresponds to the \c VPSUBUSB / PSUBUSB instruction.
978 ///
979 /// \param __a
980 /// A 128-bit integer vector containing the minuends.
981 /// \param __b
982 /// A 128-bit integer vector containing the subtrahends.
983 /// \returns A 128-bit integer vector containing the unsigned integer
984 /// differences of the values in the operands.
985 static __inline__ __m128i __DEFAULT_FN_ATTRS
986 _mm_subs_epu8(__m128i __a, __m128i __b)
987 {
988  return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
989 }
990 
991 /// \brief Subtracts corresponding 16-bit unsigned integer values in the input
992 /// and returns the differences in the corresponding bytes in the
993 /// destination. Differences less than 0000h are saturated to 0000h.
994 ///
995 /// \headerfile <x86intrin.h>
996 ///
997 /// This intrinsic corresponds to the \c VPSUBUSW / PSUBUSW instruction.
998 ///
999 /// \param __a
1000 /// A 128-bit integer vector containing the minuends.
1001 /// \param __b
1002 /// A 128-bit integer vector containing the subtrahends.
1003 /// \returns A 128-bit integer vector containing the unsigned integer
1004 /// differences of the values in the operands.
1005 static __inline__ __m128i __DEFAULT_FN_ATTRS
1006 _mm_subs_epu16(__m128i __a, __m128i __b)
1007 {
1008  return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
1009 }
1010 
1011 /// \brief Performs a bitwise AND of two 128-bit integer vectors.
1012 ///
1013 /// \headerfile <x86intrin.h>
1014 ///
1015 /// This intrinsic corresponds to the \c VPAND / PAND instruction.
1016 ///
1017 /// \param __a
1018 /// A 128-bit integer vector containing one of the source operands.
1019 /// \param __b
1020 /// A 128-bit integer vector containing one of the source operands.
1021 /// \returns A 128-bit integer vector containing the bitwise AND of the values
1022 /// in both operands.
1023 static __inline__ __m128i __DEFAULT_FN_ATTRS
1024 _mm_and_si128(__m128i __a, __m128i __b)
1025 {
1026  return (__m128i)((__v2du)__a & (__v2du)__b);
1027 }
1028 
1029 /// \brief Performs a bitwise AND of two 128-bit integer vectors, using the
1030 /// one's complement of the values contained in the first source operand.
1031 ///
1032 /// \headerfile <x86intrin.h>
1033 ///
1034 /// This intrinsic corresponds to the \c VPANDN / PANDN instruction.
1035 ///
1036 /// \param __a
1037 /// A 128-bit vector containing the left source operand. The one's complement
1038 /// of this value is used in the bitwise AND.
1039 /// \param __b
1040 /// A 128-bit vector containing the right source operand.
1041 /// \returns A 128-bit integer vector containing the bitwise AND of the one's
1042 /// complement of the first operand and the values in the second operand.
1043 static __inline__ __m128i __DEFAULT_FN_ATTRS
1044 _mm_andnot_si128(__m128i __a, __m128i __b)
1045 {
1046  return (__m128i)(~(__v2du)__a & (__v2du)__b);
1047 }
1048 /// \brief Performs a bitwise OR of two 128-bit integer vectors.
1049 ///
1050 /// \headerfile <x86intrin.h>
1051 ///
1052 /// This intrinsic corresponds to the \c VPOR / POR instruction.
1053 ///
1054 /// \param __a
1055 /// A 128-bit integer vector containing one of the source operands.
1056 /// \param __b
1057 /// A 128-bit integer vector containing one of the source operands.
1058 /// \returns A 128-bit integer vector containing the bitwise OR of the values
1059 /// in both operands.
1060 static __inline__ __m128i __DEFAULT_FN_ATTRS
1061 _mm_or_si128(__m128i __a, __m128i __b)
1062 {
1063  return (__m128i)((__v2du)__a | (__v2du)__b);
1064 }
1065 
1066 /// \brief Performs a bitwise exclusive OR of two 128-bit integer vectors.
1067 ///
1068 /// \headerfile <x86intrin.h>
1069 ///
1070 /// This intrinsic corresponds to the \c VPXOR / PXOR instruction.
1071 ///
1072 /// \param __a
1073 /// A 128-bit integer vector containing one of the source operands.
1074 /// \param __b
1075 /// A 128-bit integer vector containing one of the source operands.
1076 /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the
1077 /// values in both operands.
1078 static __inline__ __m128i __DEFAULT_FN_ATTRS
1079 _mm_xor_si128(__m128i __a, __m128i __b)
1080 {
1081  return (__m128i)((__v2du)__a ^ (__v2du)__b);
1082 }
1083 
1084 /// \brief Left-shifts the 128-bit integer vector operand by the specified
1085 /// number of bytes. Low-order bits are cleared.
1086 ///
1087 /// \headerfile <x86intrin.h>
1088 ///
1089 /// \code
1090 /// __m128i _mm_slli_si128(__m128i a, const int imm);
1091 /// \endcode
1092 ///
1093 /// This intrinsic corresponds to the \c VPSLLDQ / PSLLDQ instruction.
1094 ///
1095 /// \param a
1096 /// A 128-bit integer vector containing the source operand.
1097 /// \param imm
1098 /// An immediate value specifying the number of bytes to left-shift
1099 /// operand a.
1100 /// \returns A 128-bit integer vector containing the left-shifted value.
1101 #define _mm_slli_si128(a, imm) __extension__ ({ \
1102  (__m128i)__builtin_shufflevector( \
1103  (__v16qi)_mm_setzero_si128(), \
1104  (__v16qi)(__m128i)(a), \
1105  ((char)(imm)&0xF0) ? 0 : 16 - (char)(imm), \
1106  ((char)(imm)&0xF0) ? 1 : 17 - (char)(imm), \
1107  ((char)(imm)&0xF0) ? 2 : 18 - (char)(imm), \
1108  ((char)(imm)&0xF0) ? 3 : 19 - (char)(imm), \
1109  ((char)(imm)&0xF0) ? 4 : 20 - (char)(imm), \
1110  ((char)(imm)&0xF0) ? 5 : 21 - (char)(imm), \
1111  ((char)(imm)&0xF0) ? 6 : 22 - (char)(imm), \
1112  ((char)(imm)&0xF0) ? 7 : 23 - (char)(imm), \
1113  ((char)(imm)&0xF0) ? 8 : 24 - (char)(imm), \
1114  ((char)(imm)&0xF0) ? 9 : 25 - (char)(imm), \
1115  ((char)(imm)&0xF0) ? 10 : 26 - (char)(imm), \
1116  ((char)(imm)&0xF0) ? 11 : 27 - (char)(imm), \
1117  ((char)(imm)&0xF0) ? 12 : 28 - (char)(imm), \
1118  ((char)(imm)&0xF0) ? 13 : 29 - (char)(imm), \
1119  ((char)(imm)&0xF0) ? 14 : 30 - (char)(imm), \
1120  ((char)(imm)&0xF0) ? 15 : 31 - (char)(imm)); })
1121 
1122 #define _mm_bslli_si128(a, imm) \
1123  _mm_slli_si128((a), (imm))
1124 
1125 /// \brief Left-shifts each 16-bit value in the 128-bit integer vector operand
1126 /// by the specified number of bits. Low-order bits are cleared.
1127 ///
1128 /// \headerfile <x86intrin.h>
1129 ///
1130 /// This intrinsic corresponds to the \c VPSLLW / PSLLW instruction.
1131 ///
1132 /// \param __a
1133 /// A 128-bit integer vector containing the source operand.
1134 /// \param __count
1135 /// An integer value specifying the number of bits to left-shift each value
1136 /// in operand __a.
1137 /// \returns A 128-bit integer vector containing the left-shifted values.
1138 static __inline__ __m128i __DEFAULT_FN_ATTRS
1139 _mm_slli_epi16(__m128i __a, int __count)
1140 {
1141  return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
1142 }
1143 
1144 /// \brief Left-shifts each 16-bit value in the 128-bit integer vector operand
1145 /// by the specified number of bits. Low-order bits are cleared.
1146 ///
1147 /// \headerfile <x86intrin.h>
1148 ///
1149 /// This intrinsic corresponds to the \c VPSLLW / PSLLW instruction.
1150 ///
1151 /// \param __a
1152 /// A 128-bit integer vector containing the source operand.
1153 /// \param __count
1154 /// A 128-bit integer vector in which bits [63:0] specify the number of bits
1155 /// to left-shift each value in operand __a.
1156 /// \returns A 128-bit integer vector containing the left-shifted values.
1157 static __inline__ __m128i __DEFAULT_FN_ATTRS
1158 _mm_sll_epi16(__m128i __a, __m128i __count)
1159 {
1160  return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
1161 }
1162 
1163 /// \brief Left-shifts each 32-bit value in the 128-bit integer vector operand
1164 /// by the specified number of bits. Low-order bits are cleared.
1165 ///
1166 /// \headerfile <x86intrin.h>
1167 ///
1168 /// This intrinsic corresponds to the \c VPSLLD / PSLLD instruction.
1169 ///
1170 /// \param __a
1171 /// A 128-bit integer vector containing the source operand.
1172 /// \param __count
1173 /// An integer value specifying the number of bits to left-shift each value
1174 /// in operand __a.
1175 /// \returns A 128-bit integer vector containing the left-shifted values.
1176 static __inline__ __m128i __DEFAULT_FN_ATTRS
1177 _mm_slli_epi32(__m128i __a, int __count)
1178 {
1179  return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
1180 }
1181 
1182 /// \brief Left-shifts each 32-bit value in the 128-bit integer vector operand
1183 /// by the specified number of bits. Low-order bits are cleared.
1184 ///
1185 /// \headerfile <x86intrin.h>
1186 ///
1187 /// This intrinsic corresponds to the \c VPSLLD / PSLLD instruction.
1188 ///
1189 /// \param __a
1190 /// A 128-bit integer vector containing the source operand.
1191 /// \param __count
1192 /// A 128-bit integer vector in which bits [63:0] specify the number of bits
1193 /// to left-shift each value in operand __a.
1194 /// \returns A 128-bit integer vector containing the left-shifted values.
1195 static __inline__ __m128i __DEFAULT_FN_ATTRS
1196 _mm_sll_epi32(__m128i __a, __m128i __count)
1197 {
1198  return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
1199 }
1200 
1201 /// \brief Left-shifts each 64-bit value in the 128-bit integer vector operand
1202 /// by the specified number of bits. Low-order bits are cleared.
1203 ///
1204 /// \headerfile <x86intrin.h>
1205 ///
1206 /// This intrinsic corresponds to the \c VPSLLQ / PSLLQ instruction.
1207 ///
1208 /// \param __a
1209 /// A 128-bit integer vector containing the source operand.
1210 /// \param __count
1211 /// An integer value specifying the number of bits to left-shift each value
1212 /// in operand __a.
1213 /// \returns A 128-bit integer vector containing the left-shifted values.
1214 static __inline__ __m128i __DEFAULT_FN_ATTRS
1215 _mm_slli_epi64(__m128i __a, int __count)
1216 {
1217  return __builtin_ia32_psllqi128((__v2di)__a, __count);
1218 }
1219 
1220 /// \brief Left-shifts each 64-bit value in the 128-bit integer vector operand
1221 /// by the specified number of bits. Low-order bits are cleared.
1222 ///
1223 /// \headerfile <x86intrin.h>
1224 ///
1225 /// This intrinsic corresponds to the \c VPSLLQ / PSLLQ instruction.
1226 ///
1227 /// \param __a
1228 /// A 128-bit integer vector containing the source operand.
1229 /// \param __count
1230 /// A 128-bit integer vector in which bits [63:0] specify the number of bits
1231 /// to left-shift each value in operand __a.
1232 /// \returns A 128-bit integer vector containing the left-shifted values.
1233 static __inline__ __m128i __DEFAULT_FN_ATTRS
1234 _mm_sll_epi64(__m128i __a, __m128i __count)
1235 {
1236  return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);
1237 }
1238 
1239 /// \brief Right-shifts each 16-bit value in the 128-bit integer vector operand
1240 /// by the specified number of bits. High-order bits are filled with the sign
1241 /// bit of the initial value.
1242 ///
1243 /// \headerfile <x86intrin.h>
1244 ///
1245 /// This intrinsic corresponds to the \c VPSRAW / PSRAW instruction.
1246 ///
1247 /// \param __a
1248 /// A 128-bit integer vector containing the source operand.
1249 /// \param __count
1250 /// An integer value specifying the number of bits to right-shift each value
1251 /// in operand __a.
1252 /// \returns A 128-bit integer vector containing the right-shifted values.
1253 static __inline__ __m128i __DEFAULT_FN_ATTRS
1254 _mm_srai_epi16(__m128i __a, int __count)
1255 {
1256  return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
1257 }
1258 
1259 /// \brief Right-shifts each 16-bit value in the 128-bit integer vector operand
1260 /// by the specified number of bits. High-order bits are filled with the sign
1261 /// bit of the initial value.
1262 ///
1263 /// \headerfile <x86intrin.h>
1264 ///
1265 /// This intrinsic corresponds to the \c VPSRAW / PSRAW instruction.
1266 ///
1267 /// \param __a
1268 /// A 128-bit integer vector containing the source operand.
1269 /// \param __count
1270 /// A 128-bit integer vector in which bits [63:0] specify the number of bits
1271 /// to right-shift each value in operand __a.
1272 /// \returns A 128-bit integer vector containing the right-shifted values.
1273 static __inline__ __m128i __DEFAULT_FN_ATTRS
1274 _mm_sra_epi16(__m128i __a, __m128i __count)
1275 {
1276  return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
1277 }
1278 
1279 /// \brief Right-shifts each 32-bit value in the 128-bit integer vector operand
1280 /// by the specified number of bits. High-order bits are filled with the sign
1281 /// bit of the initial value.
1282 ///
1283 /// \headerfile <x86intrin.h>
1284 ///
1285 /// This intrinsic corresponds to the \c VPSRAD / PSRAD instruction.
1286 ///
1287 /// \param __a
1288 /// A 128-bit integer vector containing the source operand.
1289 /// \param __count
1290 /// An integer value specifying the number of bits to right-shift each value
1291 /// in operand __a.
1292 /// \returns A 128-bit integer vector containing the right-shifted values.
1293 static __inline__ __m128i __DEFAULT_FN_ATTRS
1294 _mm_srai_epi32(__m128i __a, int __count)
1295 {
1296  return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
1297 }
1298 
1299 /// \brief Right-shifts each 32-bit value in the 128-bit integer vector operand
1300 /// by the specified number of bits. High-order bits are filled with the sign
1301 /// bit of the initial value.
1302 ///
1303 /// \headerfile <x86intrin.h>
1304 ///
1305 /// This intrinsic corresponds to the \c VPSRAD / PSRAD instruction.
1306 ///
1307 /// \param __a
1308 /// A 128-bit integer vector containing the source operand.
1309 /// \param __count
1310 /// A 128-bit integer vector in which bits [63:0] specify the number of bits
1311 /// to right-shift each value in operand __a.
1312 /// \returns A 128-bit integer vector containing the right-shifted values.
1313 static __inline__ __m128i __DEFAULT_FN_ATTRS
1314 _mm_sra_epi32(__m128i __a, __m128i __count)
1315 {
1316  return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
1317 }
1318 
1319 /// \brief Right-shifts the 128-bit integer vector operand by the specified
1320 /// number of bytes. High-order bits are cleared.
1321 ///
1322 /// \headerfile <x86intrin.h>
1323 ///
1324 /// \code
1325 /// __m128i _mm_srli_si128(__m128i a, const int imm);
1326 /// \endcode
1327 ///
1328 /// This intrinsic corresponds to the \c VPSRLDQ / PSRLDQ instruction.
1329 ///
1330 /// \param a
1331 /// A 128-bit integer vector containing the source operand.
1332 /// \param imm
1333 /// An immediate value specifying the number of bytes to right-shift operand
1334 /// a.
1335 /// \returns A 128-bit integer vector containing the right-shifted value.
1336 #define _mm_srli_si128(a, imm) __extension__ ({ \
1337  (__m128i)__builtin_shufflevector( \
1338  (__v16qi)(__m128i)(a), \
1339  (__v16qi)_mm_setzero_si128(), \
1340  ((char)(imm)&0xF0) ? 16 : (char)(imm) + 0, \
1341  ((char)(imm)&0xF0) ? 17 : (char)(imm) + 1, \
1342  ((char)(imm)&0xF0) ? 18 : (char)(imm) + 2, \
1343  ((char)(imm)&0xF0) ? 19 : (char)(imm) + 3, \
1344  ((char)(imm)&0xF0) ? 20 : (char)(imm) + 4, \
1345  ((char)(imm)&0xF0) ? 21 : (char)(imm) + 5, \
1346  ((char)(imm)&0xF0) ? 22 : (char)(imm) + 6, \
1347  ((char)(imm)&0xF0) ? 23 : (char)(imm) + 7, \
1348  ((char)(imm)&0xF0) ? 24 : (char)(imm) + 8, \
1349  ((char)(imm)&0xF0) ? 25 : (char)(imm) + 9, \
1350  ((char)(imm)&0xF0) ? 26 : (char)(imm) + 10, \
1351  ((char)(imm)&0xF0) ? 27 : (char)(imm) + 11, \
1352  ((char)(imm)&0xF0) ? 28 : (char)(imm) + 12, \
1353  ((char)(imm)&0xF0) ? 29 : (char)(imm) + 13, \
1354  ((char)(imm)&0xF0) ? 30 : (char)(imm) + 14, \
1355  ((char)(imm)&0xF0) ? 31 : (char)(imm) + 15); })
1356 
1357 #define _mm_bsrli_si128(a, imm) \
1358  _mm_srli_si128((a), (imm))
1359 
1360 /// \brief Right-shifts each of 16-bit values in the 128-bit integer vector
1361 /// operand by the specified number of bits. High-order bits are cleared.
1362 ///
1363 /// \headerfile <x86intrin.h>
1364 ///
1365 /// This intrinsic corresponds to the \c VPSRLW / PSRLW instruction.
1366 ///
1367 /// \param __a
1368 /// A 128-bit integer vector containing the source operand.
1369 /// \param __count
1370 /// An integer value specifying the number of bits to right-shift each value
1371 /// in operand __a.
1372 /// \returns A 128-bit integer vector containing the right-shifted values.
1373 static __inline__ __m128i __DEFAULT_FN_ATTRS
1374 _mm_srli_epi16(__m128i __a, int __count)
1375 {
1376  return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
1377 }
1378 
1379 /// \brief Right-shifts each of 16-bit values in the 128-bit integer vector
1380 /// operand by the specified number of bits. High-order bits are cleared.
1381 ///
1382 /// \headerfile <x86intrin.h>
1383 ///
1384 /// This intrinsic corresponds to the \c VPSRLW / PSRLW instruction.
1385 ///
1386 /// \param __a
1387 /// A 128-bit integer vector containing the source operand.
1388 /// \param __count
1389 /// A 128-bit integer vector in which bits [63:0] specify the number of bits
1390 /// to right-shift each value in operand __a.
1391 /// \returns A 128-bit integer vector containing the right-shifted values.
1392 static __inline__ __m128i __DEFAULT_FN_ATTRS
1393 _mm_srl_epi16(__m128i __a, __m128i __count)
1394 {
1395  return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
1396 }
1397 
1398 /// \brief Right-shifts each of 32-bit values in the 128-bit integer vector
1399 /// operand by the specified number of bits. High-order bits are cleared.
1400 ///
1401 /// \headerfile <x86intrin.h>
1402 ///
1403 /// This intrinsic corresponds to the \c VPSRLD / PSRLD instruction.
1404 ///
1405 /// \param __a
1406 /// A 128-bit integer vector containing the source operand.
1407 /// \param __count
1408 /// An integer value specifying the number of bits to right-shift each value
1409 /// in operand __a.
1410 /// \returns A 128-bit integer vector containing the right-shifted values.
1411 static __inline__ __m128i __DEFAULT_FN_ATTRS
1412 _mm_srli_epi32(__m128i __a, int __count)
1413 {
1414  return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
1415 }
1416 
1417 /// \brief Right-shifts each of 32-bit values in the 128-bit integer vector
1418 /// operand by the specified number of bits. High-order bits are cleared.
1419 ///
1420 /// \headerfile <x86intrin.h>
1421 ///
1422 /// This intrinsic corresponds to the \c VPSRLD / PSRLD instruction.
1423 ///
1424 /// \param __a
1425 /// A 128-bit integer vector containing the source operand.
1426 /// \param __count
1427 /// A 128-bit integer vector in which bits [63:0] specify the number of bits
1428 /// to right-shift each value in operand __a.
1429 /// \returns A 128-bit integer vector containing the right-shifted values.
1430 static __inline__ __m128i __DEFAULT_FN_ATTRS
1431 _mm_srl_epi32(__m128i __a, __m128i __count)
1432 {
1433  return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
1434 }
1435 
1436 /// \brief Right-shifts each of 64-bit values in the 128-bit integer vector
1437 /// operand by the specified number of bits. High-order bits are cleared.
1438 ///
1439 /// \headerfile <x86intrin.h>
1440 ///
1441 /// This intrinsic corresponds to the \c VPSRLQ / PSRLQ instruction.
1442 ///
1443 /// \param __a
1444 /// A 128-bit integer vector containing the source operand.
1445 /// \param __count
1446 /// An integer value specifying the number of bits to right-shift each value
1447 /// in operand __a.
1448 /// \returns A 128-bit integer vector containing the right-shifted values.
1449 static __inline__ __m128i __DEFAULT_FN_ATTRS
1450 _mm_srli_epi64(__m128i __a, int __count)
1451 {
1452  return __builtin_ia32_psrlqi128((__v2di)__a, __count);
1453 }
1454 
1455 /// \brief Right-shifts each of 64-bit values in the 128-bit integer vector
1456 /// operand by the specified number of bits. High-order bits are cleared.
1457 ///
1458 /// \headerfile <x86intrin.h>
1459 ///
1460 /// This intrinsic corresponds to the \c VPSRLQ / PSRLQ instruction.
1461 ///
1462 /// \param __a
1463 /// A 128-bit integer vector containing the source operand.
1464 /// \param __count
1465 /// A 128-bit integer vector in which bits [63:0] specify the number of bits
1466 /// to right-shift each value in operand __a.
1467 /// \returns A 128-bit integer vector containing the right-shifted values.
1468 static __inline__ __m128i __DEFAULT_FN_ATTRS
1469 _mm_srl_epi64(__m128i __a, __m128i __count)
1470 {
1471  return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);
1472 }
1473 
1474 /// \brief Compares each of the corresponding 8-bit values of the 128-bit
1475 /// integer vectors for equality. Each comparison yields 0h for false, FFh
1476 /// for true.
1477 ///
1478 /// \headerfile <x86intrin.h>
1479 ///
1480 /// This intrinsic corresponds to the \c VPCMPEQB / PCMPEQB instruction.
1481 ///
1482 /// \param __a
1483 /// A 128-bit integer vector.
1484 /// \param __b
1485 /// A 128-bit integer vector.
1486 /// \returns A 128-bit integer vector containing the comparison results.
1487 static __inline__ __m128i __DEFAULT_FN_ATTRS
1488 _mm_cmpeq_epi8(__m128i __a, __m128i __b)
1489 {
1490  return (__m128i)((__v16qi)__a == (__v16qi)__b);
1491 }
1492 
1493 /// \brief Compares each of the corresponding 16-bit values of the 128-bit
1494 /// integer vectors for equality. Each comparison yields 0h for false, FFFFh
1495 /// for true.
1496 ///
1497 /// \headerfile <x86intrin.h>
1498 ///
1499 /// This intrinsic corresponds to the \c VPCMPEQW / PCMPEQW instruction.
1500 ///
1501 /// \param __a
1502 /// A 128-bit integer vector.
1503 /// \param __b
1504 /// A 128-bit integer vector.
1505 /// \returns A 128-bit integer vector containing the comparison results.
1506 static __inline__ __m128i __DEFAULT_FN_ATTRS
1507 _mm_cmpeq_epi16(__m128i __a, __m128i __b)
1508 {
1509  return (__m128i)((__v8hi)__a == (__v8hi)__b);
1510 }
1511 
1512 /// \brief Compares each of the corresponding 32-bit values of the 128-bit
1513 /// integer vectors for equality. Each comparison yields 0h for false,
1514 /// FFFFFFFFh for true.
1515 ///
1516 /// \headerfile <x86intrin.h>
1517 ///
1518 /// This intrinsic corresponds to the \c VPCMPEQD / PCMPEQD instruction.
1519 ///
1520 /// \param __a
1521 /// A 128-bit integer vector.
1522 /// \param __b
1523 /// A 128-bit integer vector.
1524 /// \returns A 128-bit integer vector containing the comparison results.
1525 static __inline__ __m128i __DEFAULT_FN_ATTRS
1526 _mm_cmpeq_epi32(__m128i __a, __m128i __b)
1527 {
1528  return (__m128i)((__v4si)__a == (__v4si)__b);
1529 }
1530 
1531 /// \brief Compares each of the corresponding signed 8-bit values of the 128-bit
1532 /// integer vectors to determine if the values in the first operand are
1533 /// greater than those in the second operand. Each comparison yields 0h for
1534 /// false, FFh for true.
1535 ///
1536 /// \headerfile <x86intrin.h>
1537 ///
1538 /// This intrinsic corresponds to the \c VPCMPGTB / PCMPGTB instruction.
1539 ///
1540 /// \param __a
1541 /// A 128-bit integer vector.
1542 /// \param __b
1543 /// A 128-bit integer vector.
1544 /// \returns A 128-bit integer vector containing the comparison results.
1545 static __inline__ __m128i __DEFAULT_FN_ATTRS
1546 _mm_cmpgt_epi8(__m128i __a, __m128i __b)
1547 {
1548  /* This function always performs a signed comparison, but __v16qi is a char
1549  which may be signed or unsigned, so use __v16qs. */
1550  return (__m128i)((__v16qs)__a > (__v16qs)__b);
1551 }
1552 
1553 /// \brief Compares each of the corresponding signed 16-bit values of the
1554 /// 128-bit integer vectors to determine if the values in the first operand
1555 /// are greater than those in the second operand. Each comparison yields 0h
1556 /// for false, FFFFh for true.
1557 ///
1558 /// \headerfile <x86intrin.h>
1559 ///
1560 /// This intrinsic corresponds to the \c VPCMPGTW / PCMPGTW instruction.
1561 ///
1562 /// \param __a
1563 /// A 128-bit integer vector.
1564 /// \param __b
1565 /// A 128-bit integer vector.
1566 /// \returns A 128-bit integer vector containing the comparison results.
1567 static __inline__ __m128i __DEFAULT_FN_ATTRS
1568 _mm_cmpgt_epi16(__m128i __a, __m128i __b)
1569 {
1570  return (__m128i)((__v8hi)__a > (__v8hi)__b);
1571 }
1572 
1573 /// \brief Compares each of the corresponding signed 32-bit values of the
1574 /// 128-bit integer vectors to determine if the values in the first operand
1575 /// are greater than those in the second operand. Each comparison yields 0h
1576 /// for false, FFFFFFFFh for true.
1577 ///
1578 /// \headerfile <x86intrin.h>
1579 ///
1580 /// This intrinsic corresponds to the \c VPCMPGTD / PCMPGTD instruction.
1581 ///
1582 /// \param __a
1583 /// A 128-bit integer vector.
1584 /// \param __b
1585 /// A 128-bit integer vector.
1586 /// \returns A 128-bit integer vector containing the comparison results.
1587 static __inline__ __m128i __DEFAULT_FN_ATTRS
1588 _mm_cmpgt_epi32(__m128i __a, __m128i __b)
1589 {
1590  return (__m128i)((__v4si)__a > (__v4si)__b);
1591 }
1592 
1593 /// \brief Compares each of the corresponding signed 8-bit values of the 128-bit
1594 /// integer vectors to determine if the values in the first operand are less
1595 /// than those in the second operand. Each comparison yields 0h for false,
1596 /// FFh for true.
1597 ///
1598 /// \headerfile <x86intrin.h>
1599 ///
1600 /// This intrinsic corresponds to the \c VPCMPGTB / PCMPGTB instruction.
1601 ///
1602 /// \param __a
1603 /// A 128-bit integer vector.
1604 /// \param __b
1605 /// A 128-bit integer vector.
1606 /// \returns A 128-bit integer vector containing the comparison results.
1607 static __inline__ __m128i __DEFAULT_FN_ATTRS
1608 _mm_cmplt_epi8(__m128i __a, __m128i __b)
1609 {
1610  return _mm_cmpgt_epi8(__b, __a);
1611 }
1612 
1613 /// \brief Compares each of the corresponding signed 16-bit values of the
1614 /// 128-bit integer vectors to determine if the values in the first operand
1615 /// are less than those in the second operand. Each comparison yields 0h for
1616 /// false, FFFFh for true.
1617 ///
1618 /// \headerfile <x86intrin.h>
1619 ///
1620 /// This intrinsic corresponds to the \c VPCMPGTW / PCMPGTW instruction.
1621 ///
1622 /// \param __a
1623 /// A 128-bit integer vector.
1624 /// \param __b
1625 /// A 128-bit integer vector.
1626 /// \returns A 128-bit integer vector containing the comparison results.
1627 static __inline__ __m128i __DEFAULT_FN_ATTRS
1628 _mm_cmplt_epi16(__m128i __a, __m128i __b)
1629 {
1630  return _mm_cmpgt_epi16(__b, __a);
1631 }
1632 
1633 /// \brief Compares each of the corresponding signed 32-bit values of the
1634 /// 128-bit integer vectors to determine if the values in the first operand
1635 /// are less than those in the second operand. Each comparison yields 0h for
1636 /// false, FFFFFFFFh for true.
1637 ///
1638 /// \headerfile <x86intrin.h>
1639 ///
1640 /// This intrinsic corresponds to the \c VPCMPGTD / PCMPGTD instruction.
1641 ///
1642 /// \param __a
1643 /// A 128-bit integer vector.
1644 /// \param __b
1645 /// A 128-bit integer vector.
1646 /// \returns A 128-bit integer vector containing the comparison results.
1647 static __inline__ __m128i __DEFAULT_FN_ATTRS
1648 _mm_cmplt_epi32(__m128i __a, __m128i __b)
1649 {
1650  return _mm_cmpgt_epi32(__b, __a);
1651 }
1652 
1653 #ifdef __x86_64__
1654 /// \brief Converts a 64-bit signed integer value from the second operand into a
1655 /// double-precision value and returns it in the lower element of a [2 x
1656 /// double] vector; the upper element of the returned vector is copied from
1657 /// the upper element of the first operand.
1658 ///
1659 /// \headerfile <x86intrin.h>
1660 ///
1661 /// This intrinsic corresponds to the \c VCVTSI2SD / CVTSI2SD instruction.
1662 ///
1663 /// \param __a
1664 /// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are
1665 /// copied to the upper 64 bits of the destination.
1666 /// \param __b
1667 /// A 64-bit signed integer operand containing the value to be converted.
1668 /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
1669 /// converted value of the second operand. The upper 64 bits are copied from
1670 /// the upper 64 bits of the first operand.
1671 static __inline__ __m128d __DEFAULT_FN_ATTRS
1672 _mm_cvtsi64_sd(__m128d __a, long long __b)
1673 {
1674  __a[0] = __b;
1675  return __a;
1676 }
1677 
1678 /// \brief Converts the first (lower) element of a vector of [2 x double] into a
1679 /// 64-bit signed integer value, according to the current rounding mode.
1680 ///
1681 /// \headerfile <x86intrin.h>
1682 ///
1683 /// This intrinsic corresponds to the \c VCVTSD2SI / CVTSD2SI instruction.
1684 ///
1685 /// \param __a
1686 /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
1687 /// conversion.
1688 /// \returns A 64-bit signed integer containing the converted value.
1689 static __inline__ long long __DEFAULT_FN_ATTRS
1690 _mm_cvtsd_si64(__m128d __a)
1691 {
1692  return __builtin_ia32_cvtsd2si64((__v2df)__a);
1693 }
1694 
1695 /// \brief Converts the first (lower) element of a vector of [2 x double] into a
1696 /// 64-bit signed integer value, truncating the result when it is inexact.
1697 ///
1698 /// \headerfile <x86intrin.h>
1699 ///
1700 /// This intrinsic corresponds to the \c VCVTTSD2SI / CVTTSD2SI instruction.
1701 ///
1702 /// \param __a
1703 /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the
1704 /// conversion.
1705 /// \returns A 64-bit signed integer containing the converted value.
1706 static __inline__ long long __DEFAULT_FN_ATTRS
1707 _mm_cvttsd_si64(__m128d __a)
1708 {
1709  return __builtin_ia32_cvttsd2si64((__v2df)__a);
1710 }
1711 #endif
1712 
1713 /// \brief Converts a vector of [4 x i32] into a vector of [4 x float].
1714 ///
1715 /// \headerfile <x86intrin.h>
1716 ///
1717 /// This intrinsic corresponds to the \c VCVTDQ2PS / CVTDQ2PS instruction.
1718 ///
1719 /// \param __a
1720 /// A 128-bit integer vector.
1721 /// \returns A 128-bit vector of [4 x float] containing the converted values.
1722 static __inline__ __m128 __DEFAULT_FN_ATTRS
1723 _mm_cvtepi32_ps(__m128i __a)
1724 {
1725  return __builtin_ia32_cvtdq2ps((__v4si)__a);
1726 }
1727 
1728 /// \brief Converts a vector of [4 x float] into a vector of [4 x i32].
1729 ///
1730 /// \headerfile <x86intrin.h>
1731 ///
1732 /// This intrinsic corresponds to the \c VCVTPS2DQ / CVTPS2DQ instruction.
1733 ///
1734 /// \param __a
1735 /// A 128-bit vector of [4 x float].
1736 /// \returns A 128-bit integer vector of [4 x i32] containing the converted
1737 /// values.
1738 static __inline__ __m128i __DEFAULT_FN_ATTRS
1739 _mm_cvtps_epi32(__m128 __a)
1740 {
1741  return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a);
1742 }
1743 
1744 /// \brief Converts a vector of [4 x float] into a vector of [4 x i32],
1745 /// truncating the result when it is inexact.
1746 ///
1747 /// \headerfile <x86intrin.h>
1748 ///
1749 /// This intrinsic corresponds to the \c VCVTTPS2DQ / CVTTPS2DQ instruction.
1750 ///
1751 /// \param __a
1752 /// A 128-bit vector of [4 x float].
1753 /// \returns A 128-bit vector of [4 x i32] containing the converted values.
1754 static __inline__ __m128i __DEFAULT_FN_ATTRS
1755 _mm_cvttps_epi32(__m128 __a)
1756 {
1757  return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);
1758 }
1759 
1760 /// \brief Returns a vector of [4 x i32] where the lowest element is the input
1761 /// operand and the remaining elements are zero.
1762 ///
1763 /// \headerfile <x86intrin.h>
1764 ///
1765 /// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
1766 ///
1767 /// \param __a
1768 /// A 32-bit signed integer operand.
1769 /// \returns A 128-bit vector of [4 x i32].
1770 static __inline__ __m128i __DEFAULT_FN_ATTRS
1772 {
1773  return (__m128i)(__v4si){ __a, 0, 0, 0 };
1774 }
1775 
1776 #ifdef __x86_64__
1777 /// \brief Returns a vector of [2 x i64] where the lower element is the input
1778 /// operand and the upper element is zero.
1779 ///
1780 /// \headerfile <x86intrin.h>
1781 ///
1782 /// This intrinsic corresponds to the \c VMOVQ / MOVQ instruction.
1783 ///
1784 /// \param __a
1785 /// A 64-bit signed integer operand containing the value to be converted.
1786 /// \returns A 128-bit vector of [2 x i64] containing the converted value.
1787 static __inline__ __m128i __DEFAULT_FN_ATTRS
1788 _mm_cvtsi64_si128(long long __a)
1789 {
1790  return (__m128i){ __a, 0 };
1791 }
1792 #endif
1793 
1794 /// \brief Moves the least significant 32 bits of a vector of [4 x i32] to a
1795 /// 32-bit signed integer value.
1796 ///
1797 /// \headerfile <x86intrin.h>
1798 ///
1799 /// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
1800 ///
1801 /// \param __a
1802 /// A vector of [4 x i32]. The least significant 32 bits are moved to the
1803 /// destination.
1804 /// \returns A 32-bit signed integer containing the moved value.
1805 static __inline__ int __DEFAULT_FN_ATTRS
1806 _mm_cvtsi128_si32(__m128i __a)
1807 {
1808  __v4si __b = (__v4si)__a;
1809  return __b[0];
1810 }
1811 
1812 #ifdef __x86_64__
1813 /// \brief Moves the least significant 64 bits of a vector of [2 x i64] to a
1814 /// 64-bit signed integer value.
1815 ///
1816 /// \headerfile <x86intrin.h>
1817 ///
1818 /// This intrinsic corresponds to the \c VMOVQ / MOVQ instruction.
1819 ///
1820 /// \param __a
1821 /// A vector of [2 x i64]. The least significant 64 bits are moved to the
1822 /// destination.
1823 /// \returns A 64-bit signed integer containing the moved value.
1824 static __inline__ long long __DEFAULT_FN_ATTRS
1825 _mm_cvtsi128_si64(__m128i __a)
1826 {
1827  return __a[0];
1828 }
1829 #endif
1830 
1831 /// \brief Moves packed integer values from an aligned 128-bit memory location
1832 /// to elements in a 128-bit integer vector.
1833 ///
1834 /// \headerfile <x86intrin.h>
1835 ///
1836 /// This intrinsic corresponds to the \c VMOVDQA / MOVDQA instruction.
1837 ///
1838 /// \param __p
1839 /// An aligned pointer to a memory location containing integer values.
1840 /// \returns A 128-bit integer vector containing the moved values.
1841 static __inline__ __m128i __DEFAULT_FN_ATTRS
1842 _mm_load_si128(__m128i const *__p)
1843 {
1844  return *__p;
1845 }
1846 
1847 /// \brief Moves packed integer values from an unaligned 128-bit memory location
1848 /// to elements in a 128-bit integer vector.
1849 ///
1850 /// \headerfile <x86intrin.h>
1851 ///
1852 /// This intrinsic corresponds to the \c VMOVDQU / MOVDQU instruction.
1853 ///
1854 /// \param __p
1855 /// A pointer to a memory location containing integer values.
1856 /// \returns A 128-bit integer vector containing the moved values.
1857 static __inline__ __m128i __DEFAULT_FN_ATTRS
1858 _mm_loadu_si128(__m128i const *__p)
1859 {
1860  struct __loadu_si128 {
1861  __m128i __v;
1862  } __attribute__((__packed__, __may_alias__));
1863  return ((struct __loadu_si128*)__p)->__v;
1864 }
1865 
1866 /// \brief Returns a vector of [2 x i64] where the lower element is taken from
1867 /// the lower element of the operand, and the upper element is zero.
1868 ///
1869 /// \headerfile <x86intrin.h>
1870 ///
1871 /// This intrinsic corresponds to the \c VMOVQ / MOVQ instruction.
1872 ///
1873 /// \param __p
1874 /// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of
1875 /// the destination.
1876 /// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the
1877 /// moved value. The higher order bits are cleared.
1878 static __inline__ __m128i __DEFAULT_FN_ATTRS
1879 _mm_loadl_epi64(__m128i const *__p)
1880 {
1881  struct __mm_loadl_epi64_struct {
1882  long long __u;
1883  } __attribute__((__packed__, __may_alias__));
1884  return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
1885 }
1886 
1887 /// \brief Generates a 128-bit vector of [4 x i32] with unspecified content.
1888 /// This could be used as an argument to another intrinsic function where the
1889 /// argument is required but the value is not actually used.
1890 ///
1891 /// \headerfile <x86intrin.h>
1892 ///
1893 /// This intrinsic has no corresponding instruction.
1894 ///
1895 /// \returns A 128-bit vector of [4 x i32] with unspecified content.
1896 static __inline__ __m128i __DEFAULT_FN_ATTRS
1898 {
1899  return (__m128i)__builtin_ia32_undef128();
1900 }
1901 
1902 /// \brief Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
1903 /// the specified 64-bit integer values.
1904 ///
1905 /// \headerfile <x86intrin.h>
1906 ///
1907 /// This intrinsic is a utility function and does not correspond to a specific
1908 /// instruction.
1909 ///
1910 /// \param __q1
1911 /// A 64-bit integer value used to initialize the upper 64 bits of the
1912 /// destination vector of [2 x i64].
1913 /// \param __q0
1914 /// A 64-bit integer value used to initialize the lower 64 bits of the
1915 /// destination vector of [2 x i64].
1916 /// \returns An initialized 128-bit vector of [2 x i64] containing the values
1917 /// provided in the operands.
1918 static __inline__ __m128i __DEFAULT_FN_ATTRS
1919 _mm_set_epi64x(long long __q1, long long __q0)
1920 {
1921  return (__m128i){ __q0, __q1 };
1922 }
1923 
1924 /// \brief Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
1925 /// the specified 64-bit integer values.
1926 ///
1927 /// \headerfile <x86intrin.h>
1928 ///
1929 /// This intrinsic is a utility function and does not correspond to a specific
1930 /// instruction.
1931 ///
1932 /// \param __q1
1933 /// A 64-bit integer value used to initialize the upper 64 bits of the
1934 /// destination vector of [2 x i64].
1935 /// \param __q0
1936 /// A 64-bit integer value used to initialize the lower 64 bits of the
1937 /// destination vector of [2 x i64].
1938 /// \returns An initialized 128-bit vector of [2 x i64] containing the values
1939 /// provided in the operands.
1940 static __inline__ __m128i __DEFAULT_FN_ATTRS
1941 _mm_set_epi64(__m64 __q1, __m64 __q0)
1942 {
1943  return (__m128i){ (long long)__q0, (long long)__q1 };
1944 }
1945 
1946 /// \brief Initializes the 32-bit values in a 128-bit vector of [4 x i32] with
1947 /// the specified 32-bit integer values.
1948 ///
1949 /// \headerfile <x86intrin.h>
1950 ///
1951 /// This intrinsic is a utility function and does not correspond to a specific
1952 /// instruction.
1953 ///
1954 /// \param __i3
1955 /// A 32-bit integer value used to initialize bits [127:96] of the
1956 /// destination vector.
1957 /// \param __i2
1958 /// A 32-bit integer value used to initialize bits [95:64] of the destination
1959 /// vector.
1960 /// \param __i1
1961 /// A 32-bit integer value used to initialize bits [63:32] of the destination
1962 /// vector.
1963 /// \param __i0
1964 /// A 32-bit integer value used to initialize bits [31:0] of the destination
1965 /// vector.
1966 /// \returns An initialized 128-bit vector of [4 x i32] containing the values
1967 /// provided in the operands.
1968 static __inline__ __m128i __DEFAULT_FN_ATTRS
1969 _mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
1970 {
1971  return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
1972 }
1973 
1974 /// \brief Initializes the 16-bit values in a 128-bit vector of [8 x i16] with
1975 /// the specified 16-bit integer values.
1976 ///
1977 /// \headerfile <x86intrin.h>
1978 ///
1979 /// This intrinsic is a utility function and does not correspond to a specific
1980 /// instruction.
1981 ///
1982 /// \param __w7
1983 /// A 16-bit integer value used to initialize bits [127:112] of the
1984 /// destination vector.
1985 /// \param __w6
1986 /// A 16-bit integer value used to initialize bits [111:96] of the
1987 /// destination vector.
1988 /// \param __w5
1989 /// A 16-bit integer value used to initialize bits [95:80] of the destination
1990 /// vector.
1991 /// \param __w4
1992 /// A 16-bit integer value used to initialize bits [79:64] of the destination
1993 /// vector.
1994 /// \param __w3
1995 /// A 16-bit integer value used to initialize bits [63:48] of the destination
1996 /// vector.
1997 /// \param __w2
1998 /// A 16-bit integer value used to initialize bits [47:32] of the destination
1999 /// vector.
2000 /// \param __w1
2001 /// A 16-bit integer value used to initialize bits [31:16] of the destination
2002 /// vector.
2003 /// \param __w0
2004 /// A 16-bit integer value used to initialize bits [15:0] of the destination
2005 /// vector.
2006 /// \returns An initialized 128-bit vector of [8 x i16] containing the values
2007 /// provided in the operands.
2008 static __inline__ __m128i __DEFAULT_FN_ATTRS
2009 _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
2010 {
2011  return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
2012 }
2013 
2014 /// \brief Initializes the 8-bit values in a 128-bit vector of [16 x i8] with
2015 /// the specified 8-bit integer values.
2016 ///
2017 /// \headerfile <x86intrin.h>
2018 ///
2019 /// This intrinsic is a utility function and does not correspond to a specific
2020 /// instruction.
2021 ///
2022 /// \param __b15
2023 /// Initializes bits [127:120] of the destination vector.
2024 /// \param __b14
2025 /// Initializes bits [119:112] of the destination vector.
2026 /// \param __b13
2027 /// Initializes bits [111:104] of the destination vector.
2028 /// \param __b12
2029 /// Initializes bits [103:96] of the destination vector.
2030 /// \param __b11
2031 /// Initializes bits [95:88] of the destination vector.
2032 /// \param __b10
2033 /// Initializes bits [87:80] of the destination vector.
2034 /// \param __b9
2035 /// Initializes bits [79:72] of the destination vector.
2036 /// \param __b8
2037 /// Initializes bits [71:64] of the destination vector.
2038 /// \param __b7
2039 /// Initializes bits [63:56] of the destination vector.
2040 /// \param __b6
2041 /// Initializes bits [55:48] of the destination vector.
2042 /// \param __b5
2043 /// Initializes bits [47:40] of the destination vector.
2044 /// \param __b4
2045 /// Initializes bits [39:32] of the destination vector.
2046 /// \param __b3
2047 /// Initializes bits [31:24] of the destination vector.
2048 /// \param __b2
2049 /// Initializes bits [23:16] of the destination vector.
2050 /// \param __b1
2051 /// Initializes bits [15:8] of the destination vector.
2052 /// \param __b0
2053 /// Initializes bits [7:0] of the destination vector.
2054 /// \returns An initialized 128-bit vector of [16 x i8] containing the values
2055 /// provided in the operands.
2056 static __inline__ __m128i __DEFAULT_FN_ATTRS
2057 _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
2058 {
2059  return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
2060 }
2061 
2062 /// \brief Initializes both values in a 128-bit integer vector with the
2063 /// specified 64-bit integer value.
2064 ///
2065 /// \headerfile <x86intrin.h>
2066 ///
2067 /// This intrinsic is a utility function and does not correspond to a specific
2068 /// instruction.
2069 ///
2070 /// \param __q
2071 /// Integer value used to initialize the elements of the destination integer
2072 /// vector.
2073 /// \returns An initialized 128-bit integer vector of [2 x i64] with both
2074 /// elements containing the value provided in the operand.
2075 static __inline__ __m128i __DEFAULT_FN_ATTRS
2076 _mm_set1_epi64x(long long __q)
2077 {
2078  return (__m128i){ __q, __q };
2079 }
2080 
2081 /// \brief Initializes both values in a 128-bit vector of [2 x i64] with the
2082 /// specified 64-bit value.
2083 ///
2084 /// \headerfile <x86intrin.h>
2085 ///
2086 /// This intrinsic is a utility function and does not correspond to a specific
2087 /// instruction.
2088 ///
2089 /// \param __q
2090 /// A 64-bit value used to initialize the elements of the destination integer
2091 /// vector.
2092 /// \returns An initialized 128-bit vector of [2 x i64] with all elements
2093 /// containing the value provided in the operand.
2094 static __inline__ __m128i __DEFAULT_FN_ATTRS
2095 _mm_set1_epi64(__m64 __q)
2096 {
2097  return (__m128i){ (long long)__q, (long long)__q };
2098 }
2099 
2100 /// \brief Initializes all values in a 128-bit vector of [4 x i32] with the
2101 /// specified 32-bit value.
2102 ///
2103 /// \headerfile <x86intrin.h>
2104 ///
2105 /// This intrinsic is a utility function and does not correspond to a specific
2106 /// instruction.
2107 ///
2108 /// \param __i
2109 /// A 32-bit value used to initialize the elements of the destination integer
2110 /// vector.
2111 /// \returns An initialized 128-bit vector of [4 x i32] with all elements
2112 /// containing the value provided in the operand.
2113 static __inline__ __m128i __DEFAULT_FN_ATTRS
2115 {
2116  return (__m128i)(__v4si){ __i, __i, __i, __i };
2117 }
2118 
2119 /// \brief Initializes all values in a 128-bit vector of [8 x i16] with the
2120 /// specified 16-bit value.
2121 ///
2122 /// \headerfile <x86intrin.h>
2123 ///
2124 /// This intrinsic is a utility function and does not correspond to a specific
2125 /// instruction.
2126 ///
2127 /// \param __w
2128 /// A 16-bit value used to initialize the elements of the destination integer
2129 /// vector.
2130 /// \returns An initialized 128-bit vector of [8 x i16] with all elements
2131 /// containing the value provided in the operand.
2132 static __inline__ __m128i __DEFAULT_FN_ATTRS
2133 _mm_set1_epi16(short __w)
2134 {
2135  return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
2136 }
2137 
2138 /// \brief Initializes all values in a 128-bit vector of [16 x i8] with the
2139 /// specified 8-bit value.
2140 ///
2141 /// \headerfile <x86intrin.h>
2142 ///
2143 /// This intrinsic is a utility function and does not correspond to a specific
2144 /// instruction.
2145 ///
2146 /// \param __b
2147 /// An 8-bit value used to initialize the elements of the destination integer
2148 /// vector.
2149 /// \returns An initialized 128-bit vector of [16 x i8] with all elements
2150 /// containing the value provided in the operand.
2151 static __inline__ __m128i __DEFAULT_FN_ATTRS
2153 {
2154  return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b };
2155 }
2156 
2157 static __inline__ __m128i __DEFAULT_FN_ATTRS
2158 _mm_setr_epi64(__m64 __q0, __m64 __q1)
2159 {
2160  return (__m128i){ (long long)__q0, (long long)__q1 };
2161 }
2162 
2163 static __inline__ __m128i __DEFAULT_FN_ATTRS
2164 _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
2165 {
2166  return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
2167 }
2168 
2169 static __inline__ __m128i __DEFAULT_FN_ATTRS
2170 _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
2171 {
2172  return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
2173 }
2174 
2175 static __inline__ __m128i __DEFAULT_FN_ATTRS
2176 _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
2177 {
2178  return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
2179 }
2180 
2181 static __inline__ __m128i __DEFAULT_FN_ATTRS
2183 {
2184  return (__m128i){ 0LL, 0LL };
2185 }
2186 
2187 static __inline__ void __DEFAULT_FN_ATTRS
2188 _mm_store_si128(__m128i *__p, __m128i __b)
2189 {
2190  *__p = __b;
2191 }
2192 
2193 static __inline__ void __DEFAULT_FN_ATTRS
2194 _mm_storeu_si128(__m128i *__p, __m128i __b)
2195 {
2196  struct __storeu_si128 {
2197  __m128i __v;
2198  } __attribute__((__packed__, __may_alias__));
2199  ((struct __storeu_si128*)__p)->__v = __b;
2200 }
2201 
2202 static __inline__ void __DEFAULT_FN_ATTRS
2203 _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
2204 {
2205  __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
2206 }
2207 
2208 static __inline__ void __DEFAULT_FN_ATTRS
2209 _mm_storel_epi64(__m128i *__p, __m128i __a)
2210 {
2211  struct __mm_storel_epi64_struct {
2212  long long __u;
2213  } __attribute__((__packed__, __may_alias__));
2214  ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
2215 }
2216 
2217 static __inline__ void __DEFAULT_FN_ATTRS
2218 _mm_stream_pd(double *__p, __m128d __a)
2219 {
2220  __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p);
2221 }
2222 
2223 static __inline__ void __DEFAULT_FN_ATTRS
2224 _mm_stream_si128(__m128i *__p, __m128i __a)
2225 {
2226  __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);
2227 }
2228 
2229 static __inline__ void __DEFAULT_FN_ATTRS
2230 _mm_stream_si32(int *__p, int __a)
2231 {
2232  __builtin_ia32_movnti(__p, __a);
2233 }
2234 
2235 #ifdef __x86_64__
2236 static __inline__ void __DEFAULT_FN_ATTRS
2237 _mm_stream_si64(long long *__p, long long __a)
2238 {
2239  __builtin_ia32_movnti64(__p, __a);
2240 }
2241 #endif
2242 
2243 static __inline__ void __DEFAULT_FN_ATTRS
2244 _mm_clflush(void const *__p)
2245 {
2246  __builtin_ia32_clflush(__p);
2247 }
2248 
2249 static __inline__ void __DEFAULT_FN_ATTRS
2251 {
2252  __builtin_ia32_lfence();
2253 }
2254 
2255 static __inline__ void __DEFAULT_FN_ATTRS
2257 {
2258  __builtin_ia32_mfence();
2259 }
2260 
2261 static __inline__ __m128i __DEFAULT_FN_ATTRS
2262 _mm_packs_epi16(__m128i __a, __m128i __b)
2263 {
2264  return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
2265 }
2266 
2267 static __inline__ __m128i __DEFAULT_FN_ATTRS
2268 _mm_packs_epi32(__m128i __a, __m128i __b)
2269 {
2270  return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
2271 }
2272 
2273 static __inline__ __m128i __DEFAULT_FN_ATTRS
2274 _mm_packus_epi16(__m128i __a, __m128i __b)
2275 {
2276  return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
2277 }
2278 
2279 static __inline__ int __DEFAULT_FN_ATTRS
2280 _mm_extract_epi16(__m128i __a, int __imm)
2281 {
2282  __v8hi __b = (__v8hi)__a;
2283  return (unsigned short)__b[__imm & 7];
2284 }
2285 
2286 static __inline__ __m128i __DEFAULT_FN_ATTRS
2287 _mm_insert_epi16(__m128i __a, int __b, int __imm)
2288 {
2289  __v8hi __c = (__v8hi)__a;
2290  __c[__imm & 7] = __b;
2291  return (__m128i)__c;
2292 }
2293 
2294 static __inline__ int __DEFAULT_FN_ATTRS
2295 _mm_movemask_epi8(__m128i __a)
2296 {
2297  return __builtin_ia32_pmovmskb128((__v16qi)__a);
2298 }
2299 
2300 #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
2301  (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
2302  (__v4si)_mm_undefined_si128(), \
2303  ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \
2304  ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3); })
2305 
2306 #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
2307  (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
2308  (__v8hi)_mm_undefined_si128(), \
2309  ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \
2310  ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3, \
2311  4, 5, 6, 7); })
2312 
2313 #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
2314  (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
2315  (__v8hi)_mm_undefined_si128(), \
2316  0, 1, 2, 3, \
2317  4 + (((imm) >> 0) & 0x3), \
2318  4 + (((imm) >> 2) & 0x3), \
2319  4 + (((imm) >> 4) & 0x3), \
2320  4 + (((imm) >> 6) & 0x3)); })
2321 
2322 static __inline__ __m128i __DEFAULT_FN_ATTRS
2323 _mm_unpackhi_epi8(__m128i __a, __m128i __b)
2324 {
2325  return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
2326 }
2327 
2328 static __inline__ __m128i __DEFAULT_FN_ATTRS
2329 _mm_unpackhi_epi16(__m128i __a, __m128i __b)
2330 {
2331  return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
2332 }
2333 
2334 static __inline__ __m128i __DEFAULT_FN_ATTRS
2335 _mm_unpackhi_epi32(__m128i __a, __m128i __b)
2336 {
2337  return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
2338 }
2339 
2340 static __inline__ __m128i __DEFAULT_FN_ATTRS
2341 _mm_unpackhi_epi64(__m128i __a, __m128i __b)
2342 {
2343  return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1);
2344 }
2345 
2346 static __inline__ __m128i __DEFAULT_FN_ATTRS
2347 _mm_unpacklo_epi8(__m128i __a, __m128i __b)
2348 {
2349  return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
2350 }
2351 
2352 static __inline__ __m128i __DEFAULT_FN_ATTRS
2353 _mm_unpacklo_epi16(__m128i __a, __m128i __b)
2354 {
2355  return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
2356 }
2357 
2358 static __inline__ __m128i __DEFAULT_FN_ATTRS
2359 _mm_unpacklo_epi32(__m128i __a, __m128i __b)
2360 {
2361  return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
2362 }
2363 
2364 static __inline__ __m128i __DEFAULT_FN_ATTRS
2365 _mm_unpacklo_epi64(__m128i __a, __m128i __b)
2366 {
2367  return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0);
2368 }
2369 
2370 static __inline__ __m64 __DEFAULT_FN_ATTRS
2371 _mm_movepi64_pi64(__m128i __a)
2372 {
2373  return (__m64)__a[0];
2374 }
2375 
2376 static __inline__ __m128i __DEFAULT_FN_ATTRS
2378 {
2379  return (__m128i){ (long long)__a, 0 };
2380 }
2381 
2382 static __inline__ __m128i __DEFAULT_FN_ATTRS
2383 _mm_move_epi64(__m128i __a)
2384 {
2385  return __builtin_shufflevector((__v2di)__a, (__m128i){ 0 }, 0, 2);
2386 }
2387 
2388 static __inline__ __m128d __DEFAULT_FN_ATTRS
2389 _mm_unpackhi_pd(__m128d __a, __m128d __b)
2390 {
2391  return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);
2392 }
2393 
2394 static __inline__ __m128d __DEFAULT_FN_ATTRS
2395 _mm_unpacklo_pd(__m128d __a, __m128d __b)
2396 {
2397  return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0);
2398 }
2399 
2400 static __inline__ int __DEFAULT_FN_ATTRS
2401 _mm_movemask_pd(__m128d __a)
2402 {
2403  return __builtin_ia32_movmskpd((__v2df)__a);
2404 }
2405 
2406 #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
2407  (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
2408  0 + (((i) >> 0) & 0x1), \
2409  2 + (((i) >> 1) & 0x1)); })
2410 
2411 static __inline__ __m128 __DEFAULT_FN_ATTRS
2412 _mm_castpd_ps(__m128d __a)
2413 {
2414  return (__m128)__a;
2415 }
2416 
2417 static __inline__ __m128i __DEFAULT_FN_ATTRS
2418 _mm_castpd_si128(__m128d __a)
2419 {
2420  return (__m128i)__a;
2421 }
2422 
2423 static __inline__ __m128d __DEFAULT_FN_ATTRS
2424 _mm_castps_pd(__m128 __a)
2425 {
2426  return (__m128d)__a;
2427 }
2428 
2429 static __inline__ __m128i __DEFAULT_FN_ATTRS
2430 _mm_castps_si128(__m128 __a)
2431 {
2432  return (__m128i)__a;
2433 }
2434 
2435 static __inline__ __m128 __DEFAULT_FN_ATTRS
2436 _mm_castsi128_ps(__m128i __a)
2437 {
2438  return (__m128)__a;
2439 }
2440 
2441 static __inline__ __m128d __DEFAULT_FN_ATTRS
2442 _mm_castsi128_pd(__m128i __a)
2443 {
2444  return (__m128d)__a;
2445 }
2446 
2447 static __inline__ void __DEFAULT_FN_ATTRS
2449 {
2450  __builtin_ia32_pause();
2451 }
2452 
2453 #undef __DEFAULT_FN_ATTRS
2454 
2455 #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
2456 
2457 #endif /* __EMMINTRIN_H */
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:184
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
Definition: emmintrin.h:1177
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, int __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:1374
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a)
Definition: emmintrin.h:412
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:374
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
Initializes the 16-bit values in a 128-bit vector of [8 x i16] with the specified 16-bit integer valu...
Definition: emmintrin.h:2009
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
Definition: emmintrin.h:112
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
Definition: emmintrin.h:2176
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
Definition: emmintrin.h:1196
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, int __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
Definition: emmintrin.h:1254
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:362
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a)
Definition: emmintrin.h:438
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a)
Definition: emmintrin.h:399
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q)
Initializes both values in a 128-bit vector of [2 x i64] with the specified 64-bit value...
Definition: emmintrin.h:2095
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:1412
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
Definition: emmintrin.h:2164
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, __m128i __b)
Definition: emmintrin.h:2268
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q)
Initializes both values in a 128-bit integer vector with the specified 64-bit integer value...
Definition: emmintrin.h:2076
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w)
Definition: emmintrin.h:554
static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:326
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, __m128d __a)
Definition: emmintrin.h:643
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a)
Definition: emmintrin.h:2430
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w)
Definition: emmintrin.h:560
static __inline__ void __DEFAULT_FN_ATTRS _mm_clflush(void const *__p)
Definition: emmintrin.h:2244
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b)
Definition: emmintrin.h:664
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
Definition: emmintrin.h:1139
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:256
static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:338
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si64(void const *__a)
Definition: emmintrin.h:508
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b)
Subtracts the corresponding elements of two [2 x i64] vectors.
Definition: emmintrin.h:924
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:130
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:1450
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:368
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:2395
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a)
Definition: emmintrin.h:386
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, __m128i __b)
Subtracts the corresponding 8-bit integer values in the operands.
Definition: emmintrin.h:851
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a)
Definition: emmintrin.h:406
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a, double const *__dp)
Definition: emmintrin.h:538
static __inline__ int __DEFAULT_FN_ATTRS _mm_extract_epi16(__m128i __a, int __imm)
Definition: emmintrin.h:2280
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit unsigned integer values in the input and returns the differences in th...
Definition: emmintrin.h:1006
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b)
Definition: emmintrin.h:2335
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding 16-bit values of the 128-bit integer vectors for equality...
Definition: emmintrin.h:1507
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
Definition: emmintrin.h:2170
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:226
static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
Definition: emmintrin.h:2203
double __m128d __attribute__((__vector_size__(16)))
Definition: emmintrin.h:29
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding signed 16-bit values of the 128-bit integer vectors to determine i...
Definition: emmintrin.h:1628
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, __m64 __q0)
Initializes both 64-bit values in a 128-bit vector of [2 x i64] with the specified 64-bit integer val...
Definition: emmintrin.h:1941
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a)
Definition: emmintrin.h:2436
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadl_epi64(__m128i const *__p)
Returns a vector of [2 x i64] where the lower element is taken from the lower element of the operand...
Definition: emmintrin.h:1879
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:380
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:118
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:53
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
Definition: emmintrin.h:2152
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:263
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a)
Definition: emmintrin.h:2377
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a)
Definition: emmintrin.h:2371
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp)
Definition: emmintrin.h:499
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits...
Definition: emmintrin.h:1234
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtpd_pi32(__m128d __a)
Definition: emmintrin.h:450
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp)
Definition: emmintrin.h:480
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a)
Definition: emmintrin.h:2412
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding signed 32-bit values of the 128-bit integer vectors to determine i...
Definition: emmintrin.h:1648
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:238
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
Definition: emmintrin.h:1858
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, long long __q0)
Initializes both 64-bit values in a 128-bit vector of [2 x i64] with the specified 64-bit integer val...
Definition: emmintrin.h:1919
static __inline unsigned char unsigned int unsigned int unsigned int * __p
Definition: adxintrin.h:38
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, __m128i __b)
Definition: emmintrin.h:688
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp, __m128d __a)
Definition: emmintrin.h:599
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:356
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, __m128i __b)
Definition: emmintrin.h:2262
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
Definition: emmintrin.h:1294
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b)
Subtracts the corresponding 32-bit integer values in the operands.
Definition: emmintrin.h:887
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp)
Definition: emmintrin.h:518
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding signed 32-bit values of the 128-bit integer vectors to determine i...
Definition: emmintrin.h:1588
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, double __x)
Definition: emmintrin.h:572
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a)
Returns a vector of [4 x i32] where the lowest element is the input operand and the remaining element...
Definition: emmintrin.h:1771
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:232
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
Definition: emmintrin.h:1842
static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp, __m128d __a)
Definition: emmintrin.h:627
static __inline__ void __DEFAULT_FN_ATTRS _mm_mfence(void)
Definition: emmintrin.h:2256
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Definition: emmintrin.h:578
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:73
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a)
Definition: emmintrin.h:2424
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:92
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, __m128d __b)
Definition: emmintrin.h:418
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a)
Definition: emmintrin.h:392
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b)
Definition: emmintrin.h:676
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:244
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:178
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32], truncating the result when it is inexact...
Definition: emmintrin.h:1755
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, double __x)
Definition: emmintrin.h:566
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits...
Definition: emmintrin.h:1215
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:350
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b)
Definition: emmintrin.h:736
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp)
Definition: emmintrin.h:474
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
Definition: emmintrin.h:1274
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:300
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:60
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:270
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:307
static __inline__ void __DEFAULT_FN_ATTRS _mm_lfence(void)
Definition: emmintrin.h:2250
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, __m128i __b)
Definition: emmintrin.h:652
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, __m128i __b)
Definition: emmintrin.h:682
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding 32-bit values of the 128-bit integer vectors for equality...
Definition: emmintrin.h:1526
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a)
Definition: emmintrin.h:2383
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:154
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp, __m128d __a)
Definition: emmintrin.h:590
static __inline__ vector float vector float __b
Definition: altivec.h:431
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:294
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding signed 8-bit values of the 128-bit integer vectors to determine if...
Definition: emmintrin.h:1546
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:214
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:99
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b)
Definition: emmintrin.h:2365
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:105
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
Initializes the 32-bit values in a 128-bit vector of [4 x i32] with the specified 32-bit integer valu...
Definition: emmintrin.h:1969
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding signed 16-bit values of the 128-bit integer vectors to determine i...
Definition: emmintrin.h:1568
static __inline unsigned char unsigned int __x
Definition: adxintrin.h:36
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, __m128i __b)
Definition: emmintrin.h:658
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:2389
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, __m128i __a)
Definition: emmintrin.h:2224
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b)
Definition: emmintrin.h:742
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, __m128d __a)
Definition: emmintrin.h:618
static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a)
Definition: emmintrin.h:468
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:79
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i *__p, __m128i __a)
Definition: emmintrin.h:2209
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
Definition: emmintrin.h:1314
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:66
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:142
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a)
Definition: emmintrin.h:2418
static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:344
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two [8 x short] vectors and returns a vector containing the ...
Definition: emmintrin.h:773
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_insert_epi16(__m128i __a, int __b, int __imm)
Definition: emmintrin.h:2287
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a)
Definition: emmintrin.h:444
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
Definition: emmintrin.h:1024
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a)
Moves the least significant 32 bits of a vector of [4 x i32] to a 32-bit signed integer value...
Definition: emmintrin.h:1806
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Initializes the 8-bit values in a 128-bit vector of [16 x i8] with the specified 8-bit integer values...
Definition: emmintrin.h:2057
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtpi32_pd(__m64 __a)
Definition: emmintrin.h:462
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the two 64-bit integer vecto...
Definition: emmintrin.h:792
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:1431
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b)
Definition: emmintrin.h:2359
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:288
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b)
Subtracts signed or unsigned 64-bit integer values and writes the difference to the corresponding bit...
Definition: emmintrin.h:906
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a)
Definition: emmintrin.h:2442
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:124
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b)
Definition: emmintrin.h:724
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b)
Definition: emmintrin.h:730
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:196
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:160
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, __m64 __q1)
Definition: emmintrin.h:2158
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, __m128i __b)
Definition: emmintrin.h:700
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, __m128i __b)
Definition: emmintrin.h:2347
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp, __m128d __a)
Definition: emmintrin.h:634
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, int __b)
Definition: emmintrin.h:424
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Definition: emmintrin.h:2182
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the corresponding elements o...
Definition: emmintrin.h:811
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:148
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b)
Definition: emmintrin.h:2341
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:250
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, __m128i __b)
Definition: emmintrin.h:718
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value...
Definition: emmintrin.h:2114
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, __m128i __b)
Definition: emmintrin.h:2353
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit signed integer values in the input and returns the differences in the ...
Definition: emmintrin.h:966
static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:314
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, __m128i __b)
Definition: emmintrin.h:748
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, __m128i __b)
Definition: emmintrin.h:2274
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit unsigned integer values in the input and returns the differences in the...
Definition: emmintrin.h:986
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p, __m128i __b)
Definition: emmintrin.h:2188
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding signed 8-bit values of the 128-bit integer vectors to determine if...
Definition: emmintrin.h:1608
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition: emmintrin.h:1897
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, __m128i __b)
Computes the absolute differences of corresponding 8-bit integer values in two 128-bit vectors...
Definition: emmintrin.h:833
static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:332
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, __m128i __b)
Definition: emmintrin.h:754
static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:320
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b)
Definition: emmintrin.h:2323
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:1469
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, __m128d __a)
Definition: emmintrin.h:2218
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w)
Initializes all values in a 128-bit vector of [8 x i16] with the specified 16-bit value...
Definition: emmintrin.h:2133
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:584
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:276
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:190
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a, __m128d __b)
Definition: emmintrin.h:282
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b)
Definition: emmintrin.h:712
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:172
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:1393
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
Definition: emmintrin.h:1158
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a, double const *__dp)
Definition: emmintrin.h:528
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:208
#define __DEFAULT_FN_ATTRS
Definition: emmintrin.h:50
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32].
Definition: emmintrin.h:1739
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, __m128i __b)
Subtracts the corresponding 16-bit integer values in the operands.
Definition: emmintrin.h:869
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:166
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:136
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b)
Definition: emmintrin.h:670
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a, __m128 __b)
Definition: emmintrin.h:431
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, __m128i __b)
Performs a bitwise exclusive OR of two 128-bit integer vectors.
Definition: emmintrin.h:1079
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a)
Definition: emmintrin.h:2401
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit signed integer values in the input and returns the differences in the c...
Definition: emmintrin.h:945
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, __m128i __b)
Performs a bitwise OR of two 128-bit integer vectors.
Definition: emmintrin.h:1061
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si128(__m128i *__p, __m128i __b)
Definition: emmintrin.h:2194
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si32(int *__p, int __a)
Definition: emmintrin.h:2230
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Definition: emmintrin.h:2295
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:86
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp)
Definition: emmintrin.h:492
static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a)
Definition: emmintrin.h:605
static __inline__ void __DEFAULT_FN_ATTRS _mm_pause(void)
Definition: emmintrin.h:2448
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvttpd_pi32(__m128d __a)
Definition: emmintrin.h:456
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x float].
Definition: emmintrin.h:1723
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b)
Definition: emmintrin.h:706
static __inline__ vector float vector float vector float __c
Definition: altivec.h:2990
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, __m128i __b)
Definition: emmintrin.h:694
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Definition: emmintrin.h:548
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp, __m128d __a)
Definition: emmintrin.h:612
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:220
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors, using the one's complement of the values conta...
Definition: emmintrin.h:1044
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality...
Definition: emmintrin.h:1488
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, __m128i __b)
Definition: emmintrin.h:2329
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a, __m128d __b)
Definition: emmintrin.h:202