25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512VLINTRIN_H
29 #define __AVX512VLINTRIN_H
31 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
34 static __inline __m128i
__attribute__((__always_inline__, __nodebug__, __target__(
"avx512f")))
35 _mm_setzero_di(
void) {
36 return (__m128i)(__v2di){ 0LL, 0LL};
43 return (
__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
49 return (
__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
55 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
61 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
67 return (
__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
73 return (
__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
79 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
85 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
91 return (
__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
97 return (
__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
103 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
109 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
115 return (
__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
121 return (
__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
127 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
133 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
140 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
146 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
152 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
158 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
164 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
170 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
176 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
182 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
188 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
194 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
200 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
206 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
212 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
218 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
224 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
230 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
236 return (
__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
242 return (
__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
248 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
254 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
260 return (
__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
266 return (
__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
272 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
278 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
284 return (
__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
290 return (
__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
296 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
302 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
308 return (
__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
314 return (
__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
320 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
326 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
332 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
338 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
344 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
350 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
356 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
362 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
368 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
374 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
380 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
386 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
392 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
398 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
404 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
410 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
416 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
422 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
428 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
434 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
440 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
446 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
452 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
458 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
464 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
470 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
476 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
482 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
488 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
494 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
500 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
506 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
512 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
518 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
524 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
530 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
536 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
542 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
548 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
554 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
560 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
566 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
572 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
578 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
584 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
590 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
596 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
602 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
608 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
614 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
622 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
631 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
642 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
651 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
662 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
671 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
682 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
691 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
702 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
711 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
722 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
731 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
742 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
751 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
762 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
771 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
782 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
790 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
801 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
809 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
820 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
828 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
839 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
847 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
857 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
868 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
876 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
887 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
895 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
909 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
923 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
938 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
952 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
966 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
980 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
995 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
1009 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
1023 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
1037 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
1052 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
1066 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
1080 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
1094 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
1109 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
1120 #define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \
1121 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
1122 (__v4si)(__m128i)(b), (int)(p), \
1125 #define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
1126 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
1127 (__v4si)(__m128i)(b), (int)(p), \
1130 #define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \
1131 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
1132 (__v4si)(__m128i)(b), (int)(p), \
1135 #define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
1136 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
1137 (__v4si)(__m128i)(b), (int)(p), \
1140 #define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \
1141 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
1142 (__v8si)(__m256i)(b), (int)(p), \
1145 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
1146 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
1147 (__v8si)(__m256i)(b), (int)(p), \
1150 #define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \
1151 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
1152 (__v8si)(__m256i)(b), (int)(p), \
1155 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
1156 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
1157 (__v8si)(__m256i)(b), (int)(p), \
1160 #define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \
1161 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
1162 (__v2di)(__m128i)(b), (int)(p), \
1165 #define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
1166 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
1167 (__v2di)(__m128i)(b), (int)(p), \
1170 #define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \
1171 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
1172 (__v2di)(__m128i)(b), (int)(p), \
1175 #define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
1176 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
1177 (__v2di)(__m128i)(b), (int)(p), \
1180 #define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \
1181 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
1182 (__v4di)(__m256i)(b), (int)(p), \
1185 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
1186 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
1187 (__v4di)(__m256i)(b), (int)(p), \
1190 #define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \
1191 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
1192 (__v4di)(__m256i)(b), (int)(p), \
1195 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
1196 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
1197 (__v4di)(__m256i)(b), (int)(p), \
1200 #define _mm256_cmp_ps_mask(a, b, p) __extension__ ({ \
1201 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
1202 (__v8sf)(__m256)(b), (int)(p), \
1205 #define _mm256_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \
1206 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
1207 (__v8sf)(__m256)(b), (int)(p), \
1210 #define _mm256_cmp_pd_mask(a, b, p) __extension__ ({ \
1211 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
1212 (__v4df)(__m256d)(b), (int)(p), \
1215 #define _mm256_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \
1216 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
1217 (__v4df)(__m256d)(b), (int)(p), \
1220 #define _mm_cmp_ps_mask(a, b, p) __extension__ ({ \
1221 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
1222 (__v4sf)(__m128)(b), (int)(p), \
1225 #define _mm_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \
1226 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
1227 (__v4sf)(__m128)(b), (int)(p), \
1230 #define _mm_cmp_pd_mask(a, b, p) __extension__ ({ \
1231 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
1232 (__v2df)(__m128d)(b), (int)(p), \
1235 #define _mm_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \
1236 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
1237 (__v2df)(__m128d)(b), (int)(p), \
1243 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
1252 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
1261 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
1270 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
1279 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
1288 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
1297 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
1306 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
1315 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
1324 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
1333 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
1342 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
1351 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
1360 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
1369 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
1378 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
1387 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
1396 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
1405 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
1414 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
1423 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
1432 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
1441 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
1450 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
1459 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
1468 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
1477 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
1486 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
1495 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
1504 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
1513 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
1522 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
1531 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
1540 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
1550 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
1560 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
1569 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
1579 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
1588 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
1598 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
1608 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
1617 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
1627 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
1636 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
1645 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
1654 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
1663 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
1673 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
1682 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
1691 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
1700 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
1709 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
1718 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
1727 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
1736 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
1745 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
1754 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
1764 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
1774 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
1783 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
1792 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
1801 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
1810 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
1819 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
1828 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
1837 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
1846 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
1855 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
1864 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
1873 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
1882 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
1891 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
1899 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
1907 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
1916 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
1924 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
1933 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
1941 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
1950 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
1958 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
1967 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
1974 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
1981 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
1988 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
1995 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
2002 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
2009 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
2016 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
2023 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
2030 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
2038 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
2045 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
2053 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
2060 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
2068 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
2075 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
2083 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
2090 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
2098 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
2105 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
2113 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
2120 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
2128 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
2135 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
2143 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
2150 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
2157 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
2164 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
2171 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
2178 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
2185 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
2192 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
2199 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
2206 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
2214 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
2221 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
2229 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
2236 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
2244 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
2251 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
2259 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
2266 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
2274 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
2281 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
2289 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2296 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2304 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
2311 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
2319 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2327 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2334 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2342 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2350 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2357 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2365 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
2372 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
2380 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
2387 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
2395 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
2402 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
2410 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
2417 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
2425 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2433 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2440 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2448 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2456 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2463 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2471 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2478 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2486 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
2493 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
2501 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2509 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2516 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2524 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2532 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2539 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2547 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
2554 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
2562 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
2569 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
2577 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2585 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2592 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2600 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2608 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2615 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2623 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
2631 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
2638 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
2646 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
2654 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
2661 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
2669 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2677 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2684 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2692 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2700 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2707 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2715 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
2723 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
2733 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
2741 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
2750 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
2758 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
2767 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
2775 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
2784 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2791 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2799 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2806 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2814 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2821 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2829 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2836 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2844 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2852 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2861 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2869 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2878 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2886 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2896 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2904 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2913 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2920 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2929 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2936 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2945 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2953 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2962 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2970 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2979 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2986 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2994 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
3001 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
3009 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
3016 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
3024 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
3031 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
3039 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3047 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3054 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3062 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3070 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3077 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3085 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3093 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3100 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3108 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3116 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3123 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3131 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
3139 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
3149 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
3157 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
3166 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
3174 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
3183 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
3191 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
3200 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
3208 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
3218 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
3226 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
3235 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
3243 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
3252 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
3260 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
3269 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
3277 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
3287 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
3295 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
3304 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
3312 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
3321 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
3329 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
3338 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
3345 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
3353 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
3360 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
3368 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
3376 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
3383 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
3391 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
3399 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
3406 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
3414 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
3424 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
3431 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
3441 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
3448 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
3458 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
3465 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
3474 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
3484 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
3491 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
3500 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
3510 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
3517 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
3527 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
3534 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3543 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3553 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3560 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3569 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3579 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3586 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
3596 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
3603 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
3613 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
3620 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3630 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3637 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3646 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3656 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3663 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3672 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
3682 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
3689 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
3699 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
3706 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3716 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3723 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3732 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3742 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3749 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3756 #define _mm_roundscale_pd(A, imm) __extension__ ({ \
3757 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3759 (__v2df)_mm_setzero_pd(), \
3763 #define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
3764 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3766 (__v2df)(__m128d)(W), \
3770 #define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
3771 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3773 (__v2df)_mm_setzero_pd(), \
3777 #define _mm256_roundscale_pd(A, imm) __extension__ ({ \
3778 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3780 (__v4df)_mm256_setzero_pd(), \
3784 #define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
3785 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3787 (__v4df)(__m256d)(W), \
3791 #define _mm256_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
3792 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3794 (__v4df)_mm256_setzero_pd(), \
3797 #define _mm_roundscale_ps(A, imm) __extension__ ({ \
3798 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3799 (__v4sf)_mm_setzero_ps(), \
3803 #define _mm_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \
3804 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3805 (__v4sf)(__m128)(W), \
3809 #define _mm_maskz_roundscale_ps(U, A, imm) __extension__ ({ \
3810 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3811 (__v4sf)_mm_setzero_ps(), \
3814 #define _mm256_roundscale_ps(A, imm) __extension__ ({ \
3815 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3816 (__v8sf)_mm256_setzero_ps(), \
3819 #define _mm256_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \
3820 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3821 (__v8sf)(__m256)(W), \
3825 #define _mm256_maskz_roundscale_ps(U, A, imm) __extension__ ({ \
3826 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3827 (__v8sf)_mm256_setzero_ps(), \
3832 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3842 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3850 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3859 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3869 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3877 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3886 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3895 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3903 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3912 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3922 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3930 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3937 #define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
3938 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
3939 (__v2di)(__m128i)(index), \
3940 (__v2df)(__m128d)(v1), (int)(scale)); })
3942 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3943 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
3944 (__v2di)(__m128i)(index), \
3945 (__v2df)(__m128d)(v1), (int)(scale)); })
3947 #define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3948 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
3949 (__v2di)(__m128i)(index), \
3950 (__v2di)(__m128i)(v1), (int)(scale)); })
3952 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3953 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
3954 (__v2di)(__m128i)(index), \
3955 (__v2di)(__m128i)(v1), (int)(scale)); })
3957 #define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
3958 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
3959 (__v4di)(__m256i)(index), \
3960 (__v4df)(__m256d)(v1), (int)(scale)); })
3962 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3963 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
3964 (__v4di)(__m256i)(index), \
3965 (__v4df)(__m256d)(v1), (int)(scale)); })
3967 #define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3968 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
3969 (__v4di)(__m256i)(index), \
3970 (__v4di)(__m256i)(v1), (int)(scale)); })
3972 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3973 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
3974 (__v4di)(__m256i)(index), \
3975 (__v4di)(__m256i)(v1), (int)(scale)); })
3977 #define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
3978 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
3979 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3982 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3983 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
3984 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3987 #define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3988 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
3989 (__v2di)(__m128i)(index), \
3990 (__v4si)(__m128i)(v1), (int)(scale)); })
3992 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3993 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
3994 (__v2di)(__m128i)(index), \
3995 (__v4si)(__m128i)(v1), (int)(scale)); })
3997 #define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
3998 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
3999 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
4002 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
4003 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
4004 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
4007 #define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
4008 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
4009 (__v4di)(__m256i)(index), \
4010 (__v4si)(__m128i)(v1), (int)(scale)); })
4012 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
4013 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
4014 (__v4di)(__m256i)(index), \
4015 (__v4si)(__m128i)(v1), (int)(scale)); })
4017 #define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
4018 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
4019 (__v4si)(__m128i)(index), \
4020 (__v2df)(__m128d)(v1), (int)(scale)); })
4022 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
4023 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
4024 (__v4si)(__m128i)(index), \
4025 (__v2df)(__m128d)(v1), (int)(scale)); })
4027 #define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
4028 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
4029 (__v4si)(__m128i)(index), \
4030 (__v2di)(__m128i)(v1), (int)(scale)); })
4032 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
4033 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
4034 (__v4si)(__m128i)(index), \
4035 (__v2di)(__m128i)(v1), (int)(scale)); })
4037 #define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
4038 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
4039 (__v4si)(__m128i)(index), \
4040 (__v4df)(__m256d)(v1), (int)(scale)); })
4042 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
4043 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
4044 (__v4si)(__m128i)(index), \
4045 (__v4df)(__m256d)(v1), (int)(scale)); })
4047 #define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
4048 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
4049 (__v4si)(__m128i)(index), \
4050 (__v4di)(__m256i)(v1), (int)(scale)); })
4052 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
4053 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
4054 (__v4si)(__m128i)(index), \
4055 (__v4di)(__m256i)(v1), (int)(scale)); })
4057 #define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
4058 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
4059 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
4062 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
4063 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
4064 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
4067 #define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
4068 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
4069 (__v4si)(__m128i)(index), \
4070 (__v4si)(__m128i)(v1), (int)(scale)); })
4072 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
4073 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
4074 (__v4si)(__m128i)(index), \
4075 (__v4si)(__m128i)(v1), (int)(scale)); })
4077 #define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
4078 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
4079 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
4082 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
4083 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
4084 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
4087 #define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
4088 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
4089 (__v8si)(__m256i)(index), \
4090 (__v8si)(__m256i)(v1), (int)(scale)); })
4092 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
4093 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
4094 (__v8si)(__m256i)(index), \
4095 (__v8si)(__m256i)(v1), (int)(scale)); })
4099 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
4106 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
4114 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
4121 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
4129 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
4136 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
4144 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
4151 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
4159 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
4167 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
4177 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
4185 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
4194 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
4202 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
4211 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
4219 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
4229 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
4239 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
4249 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
4260 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
4271 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
4281 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
4291 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
4301 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
4310 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
4320 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
4330 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
4340 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
4350 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
4359 __m256i __I, __m256i __B) {
4360 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
4370 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
4381 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
4392 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
4402 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
4413 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
4424 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
4434 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
4444 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
4454 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
4464 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
4474 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
4484 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
4494 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
4504 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
4514 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
4525 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
4535 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
4544 __m256i __I, __m256i __B) {
4545 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
4556 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
4564 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
4573 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
4581 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
4590 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
4598 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
4607 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
4615 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
4624 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
4632 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
4641 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
4649 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
4658 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
4666 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
4675 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
4683 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
4692 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
4700 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
4709 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
4717 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
4727 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
4735 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
4744 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
4752 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
4761 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
4769 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
4778 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
4786 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
4795 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
4803 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
4812 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
4820 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
4829 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
4837 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
4846 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
4854 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
4863 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
4871 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
4880 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
4888 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
4895 #define _mm_rol_epi32(a, b) __extension__ ({\
4896 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4897 (__v4si)_mm_setzero_si128(), \
4900 #define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\
4901 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4902 (__v4si)(__m128i)(w), (__mmask8)(u)); })
4904 #define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\
4905 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4906 (__v4si)_mm_setzero_si128(), \
4909 #define _mm256_rol_epi32(a, b) __extension__ ({\
4910 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4911 (__v8si)_mm256_setzero_si256(), \
4914 #define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\
4915 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4916 (__v8si)(__m256i)(w), (__mmask8)(u)); })
4918 #define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\
4919 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4920 (__v8si)_mm256_setzero_si256(), \
4923 #define _mm_rol_epi64(a, b) __extension__ ({\
4924 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4925 (__v2di)_mm_setzero_di(), \
4928 #define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\
4929 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4930 (__v2di)(__m128i)(w), (__mmask8)(u)); })
4932 #define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\
4933 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4934 (__v2di)_mm_setzero_di(), \
4937 #define _mm256_rol_epi64(a, b) __extension__ ({\
4938 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4939 (__v4di)_mm256_setzero_si256(), \
4942 #define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\
4943 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4944 (__v4di)(__m256i)(w), (__mmask8)(u)); })
4946 #define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\
4947 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4948 (__v4di)_mm256_setzero_si256(), \
4954 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4965 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4974 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4984 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4995 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
5004 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
5014 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
5025 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
5034 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
5044 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
5055 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
5064 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
5071 #define _mm_ror_epi32(A, B) __extension__ ({ \
5072 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
5073 (__v4si)_mm_setzero_si128(), \
5076 #define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5077 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
5078 (__v4si)(__m128i)(W), (__mmask8)(U)); })
5080 #define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \
5081 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
5082 (__v4si)_mm_setzero_si128(), \
5085 #define _mm256_ror_epi32(A, B) __extension__ ({ \
5086 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
5087 (__v8si)_mm256_setzero_si256(), \
5090 #define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5091 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
5092 (__v8si)(__m256i)(W), (__mmask8)(U)); })
5094 #define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \
5095 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
5096 (__v8si)_mm256_setzero_si256(), \
5099 #define _mm_ror_epi64(A, B) __extension__ ({ \
5100 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
5101 (__v2di)_mm_setzero_di(), \
5104 #define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5105 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
5106 (__v2di)(__m128i)(W), (__mmask8)(U)); })
5108 #define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \
5109 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
5110 (__v2di)_mm_setzero_di(), \
5113 #define _mm256_ror_epi64(A, B) __extension__ ({ \
5114 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
5115 (__v4di)_mm256_setzero_si256(), \
5118 #define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5119 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
5120 (__v4di)(__m256i)(W), (__mmask8)(U)); })
5122 #define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \
5123 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
5124 (__v4di)_mm256_setzero_si256(), \
5131 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
5140 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
5151 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
5160 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
5167 #define _mm_mask_slli_epi32(W, U, A, B) __extension__ ({ \
5168 (__m128i)__builtin_ia32_pslldi128_mask((__v4si)(__m128i)(A), (int)(B), \
5169 (__v4si)(__m128i)(W), \
5172 #define _mm_maskz_slli_epi32(U, A, B) __extension__ ({ \
5173 (__m128i)__builtin_ia32_pslldi128_mask((__v4si)(__m128i)(A), (int)(B), \
5174 (__v4si)_mm_setzero_si128(), \
5177 #define _mm256_mask_slli_epi32(W, U, A, B) __extension__ ({ \
5178 (__m256i)__builtin_ia32_pslldi256_mask((__v8si)(__m256i)(A), (int)(B), \
5179 (__v8si)(__m256i)(W), \
5182 #define _mm256_maskz_slli_epi32(U, A, B) __extension__ ({ \
5183 (__m256i)__builtin_ia32_pslldi256_mask((__v8si)(__m256i)(A), (int)(B), \
5184 (__v8si)_mm256_setzero_si256(), \
5191 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
5200 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
5211 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
5220 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
5227 #define _mm_mask_slli_epi64(W, U, A, B) __extension__ ({ \
5228 (__m128i)__builtin_ia32_psllqi128_mask((__v2di)(__m128i)(A), (int)(B), \
5229 (__v2di)(__m128i)(W), \
5232 #define _mm_maskz_slli_epi64(U, A, B) __extension__ ({ \
5233 (__m128i)__builtin_ia32_psllqi128_mask((__v2di)(__m128i)(A), (int)(B), \
5234 (__v2di)_mm_setzero_di(), \
5237 #define _mm256_mask_slli_epi64(W, U, A, B) __extension__ ({ \
5238 (__m256i)__builtin_ia32_psllqi256_mask((__v4di)(__m256i)(A), (int)(B), \
5239 (__v4di)(__m256i)(W), \
5242 #define _mm256_maskz_slli_epi64(U, A, B) __extension__ ({ \
5243 (__m256i)__builtin_ia32_psllqi256_mask((__v4di)(__m256i)(A), (int)(B), \
5244 (__v4di)_mm256_setzero_si256(), \
5251 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
5262 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
5271 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
5281 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
5292 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
5301 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
5311 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
5322 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
5331 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
5341 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
5352 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
5361 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
5372 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
5381 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
5392 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
5401 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
5412 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
5421 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
5432 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
5441 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
5454 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
5463 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
5474 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
5483 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
5494 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
5503 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
5514 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
5523 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
5536 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
5545 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
5556 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
5565 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
5572 #define _mm_mask_srli_epi32(W, U, A, imm) __extension__ ({ \
5573 (__m128i)__builtin_ia32_psrldi128_mask((__v4si)(__m128i)(A), (int)(imm), \
5574 (__v4si)(__m128i)(W), \
5577 #define _mm_maskz_srli_epi32(U, A, imm) __extension__ ({ \
5578 (__m128i)__builtin_ia32_psrldi128_mask((__v4si)(__m128i)(A), (int)(imm), \
5579 (__v4si)_mm_setzero_si128(), \
5582 #define _mm256_mask_srli_epi32(W, U, A, imm) __extension__ ({ \
5583 (__m256i)__builtin_ia32_psrldi256_mask((__v8si)(__m256i)(A), (int)(imm), \
5584 (__v8si)(__m256i)(W), \
5587 #define _mm256_maskz_srli_epi32(U, A, imm) __extension__ ({ \
5588 (__m256i)__builtin_ia32_psrldi256_mask((__v8si)(__m256i)(A), (int)(imm), \
5589 (__v8si)_mm256_setzero_si256(), \
5596 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
5605 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
5616 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
5625 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
5632 #define _mm_mask_srli_epi64(W, U, A, imm) __extension__ ({ \
5633 (__m128i)__builtin_ia32_psrlqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
5634 (__v2di)(__m128i)(W), \
5637 #define _mm_maskz_srli_epi64(U, A, imm) __extension__ ({ \
5638 (__m128i)__builtin_ia32_psrlqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
5639 (__v2di)_mm_setzero_si128(), \
5642 #define _mm256_mask_srli_epi64(W, U, A, imm) __extension__ ({ \
5643 (__m256i)__builtin_ia32_psrlqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
5644 (__v4di)(__m256i)(W), \
5647 #define _mm256_maskz_srli_epi64(U, A, imm) __extension__ ({ \
5648 (__m256i)__builtin_ia32_psrlqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
5649 (__v4di)_mm256_setzero_si256(), \
5656 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
5665 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
5676 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
5685 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
5695 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
5706 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
5715 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
5725 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
5736 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
5745 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
5755 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
5763 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
5772 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
5780 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
5788 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5797 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5807 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5816 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5826 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5834 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5842 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
5850 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
5852 (__v2di) _mm_setzero_di ());
5858 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
5866 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
5874 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5883 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5893 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5902 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5912 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5920 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5928 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5936 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5944 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5952 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5958 #define _mm_mask_set1_epi32(O, M, A) __extension__ ({ \
5959 (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
5960 (__v4si)(__m128i)(O), \
5963 #define _mm_maskz_set1_epi32(M, A) __extension__ ({ \
5964 (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
5965 (__v4si)_mm_setzero_si128(), \
5968 #define _mm256_mask_set1_epi32(O, M, A) __extension__ ({ \
5969 (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
5970 (__v8si)(__m256i)(O), \
5973 #define _mm256_maskz_set1_epi32(M, A) __extension__ ({ \
5974 (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
5975 (__v8si)_mm256_setzero_si256(), \
5981 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
5988 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
5997 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
6004 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
6010 #define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \
6011 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
6012 (__v2df)(__m128d)(B), \
6013 (__v2di)(__m128i)(C), (int)(imm), \
6016 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
6017 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
6018 (__v2df)(__m128d)(B), \
6019 (__v2di)(__m128i)(C), (int)(imm), \
6022 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
6023 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
6024 (__v2df)(__m128d)(B), \
6025 (__v2di)(__m128i)(C), \
6026 (int)(imm), (__mmask8)(U)); })
6028 #define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \
6029 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
6030 (__v4df)(__m256d)(B), \
6031 (__v4di)(__m256i)(C), (int)(imm), \
6034 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
6035 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
6036 (__v4df)(__m256d)(B), \
6037 (__v4di)(__m256i)(C), (int)(imm), \
6040 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
6041 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
6042 (__v4df)(__m256d)(B), \
6043 (__v4di)(__m256i)(C), \
6044 (int)(imm), (__mmask8)(U)); })
6046 #define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \
6047 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
6048 (__v4sf)(__m128)(B), \
6049 (__v4si)(__m128i)(C), (int)(imm), \
6052 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
6053 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
6054 (__v4sf)(__m128)(B), \
6055 (__v4si)(__m128i)(C), (int)(imm), \
6058 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
6059 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
6060 (__v4sf)(__m128)(B), \
6061 (__v4si)(__m128i)(C), (int)(imm), \
6064 #define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \
6065 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
6066 (__v8sf)(__m256)(B), \
6067 (__v8si)(__m256i)(C), (int)(imm), \
6070 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
6071 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
6072 (__v8sf)(__m256)(B), \
6073 (__v8si)(__m256i)(C), (int)(imm), \
6076 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
6077 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
6078 (__v8sf)(__m256)(B), \
6079 (__v8si)(__m256i)(C), (int)(imm), \
6085 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
6093 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
6102 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
6110 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
6119 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
6127 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
6136 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
6144 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
6153 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
6161 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
6170 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
6178 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
6187 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
6195 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
6204 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
6212 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
6221 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
6229 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
6238 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
6246 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
6255 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
6263 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
6272 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
6280 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
6289 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
6297 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
6305 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
6313 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
6321 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
6329 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
6337 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
6345 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
6353 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
6361 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
6369 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
6377 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
6386 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6394 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6402 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6410 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6418 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6426 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6434 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6442 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6450 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6458 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6466 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6474 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6482 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6490 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6498 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6506 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6514 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
6523 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
6531 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
6540 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
6549 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
6557 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
6566 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6575 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6583 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6592 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6601 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6609 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6615 #define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \
6616 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6617 (__v2df)_mm_permute_pd((X), (C)), \
6618 (__v2df)(__m128d)(W)); })
6620 #define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \
6621 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6622 (__v2df)_mm_permute_pd((X), (C)), \
6623 (__v2df)_mm_setzero_pd()); })
6625 #define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \
6626 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6627 (__v4df)_mm256_permute_pd((X), (C)), \
6628 (__v4df)(__m256d)(W)); })
6630 #define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \
6631 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6632 (__v4df)_mm256_permute_pd((X), (C)), \
6633 (__v4df)_mm256_setzero_pd()); })
6635 #define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \
6636 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6637 (__v4sf)_mm_permute_ps((X), (C)), \
6638 (__v4sf)(__m128)(W)); })
6640 #define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \
6641 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6642 (__v4sf)_mm_permute_ps((X), (C)), \
6643 (__v4sf)_mm_setzero_ps()); })
6645 #define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \
6646 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6647 (__v8sf)_mm256_permute_ps((X), (C)), \
6648 (__v8sf)(__m256)(W)); })
6650 #define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \
6651 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6652 (__v8sf)_mm256_permute_ps((X), (C)), \
6653 (__v8sf)_mm256_setzero_ps()); })
6659 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
6668 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
6679 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
6689 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
6701 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
6710 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
6721 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
6730 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
6740 return (
__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
6748 return (
__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
6755 return (
__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
6763 return (
__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
6770 return (
__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
6778 return (
__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
6785 return (
__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
6793 return (
__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
6800 return (
__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
6808 return (
__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
6815 return (
__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
6823 return (
__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
6830 return (
__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
6838 return (
__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
6845 return (
__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
6853 return (
__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
6862 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6870 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6878 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6886 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6894 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6902 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6904 (__v2di)_mm_setzero_di());
6910 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6918 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6926 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6934 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6942 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6950 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6958 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6966 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6968 (__v2di)_mm_setzero_di());
6974 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6982 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6991 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
7000 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
7011 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
7020 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
7027 #define _mm_mask_srai_epi32(W, U, A, imm) __extension__ ({ \
7028 (__m128i)__builtin_ia32_psradi128_mask((__v4si)(__m128i)(A), (int)(imm), \
7029 (__v4si)(__m128i)(W), \
7032 #define _mm_maskz_srai_epi32(U, A, imm) __extension__ ({ \
7033 (__m128i)__builtin_ia32_psradi128_mask((__v4si)(__m128i)(A), (int)(imm), \
7034 (__v4si)_mm_setzero_si128(), \
7037 #define _mm256_mask_srai_epi32(W, U, A, imm) __extension__ ({ \
7038 (__m256i)__builtin_ia32_psradi256_mask((__v8si)(__m256i)(A), (int)(imm), \
7039 (__v8si)(__m256i)(W), \
7042 #define _mm256_maskz_srai_epi32(U, A, imm) __extension__ ({ \
7043 (__m256i)__builtin_ia32_psradi256_mask((__v8si)(__m256i)(A), (int)(imm), \
7044 (__v8si)_mm256_setzero_si256(), \
7050 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
7061 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
7070 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
7080 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
7091 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
7100 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
7107 #define _mm_srai_epi64(A, imm) __extension__ ({ \
7108 (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
7109 (__v2di)_mm_setzero_di(), \
7112 #define _mm_mask_srai_epi64(W, U, A, imm) __extension__ ({ \
7113 (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
7114 (__v2di)(__m128i)(W), \
7117 #define _mm_maskz_srai_epi64(U, A, imm) __extension__ ({ \
7118 (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
7119 (__v2di)_mm_setzero_si128(), \
7122 #define _mm256_srai_epi64(A, imm) __extension__ ({ \
7123 (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
7124 (__v4di)_mm256_setzero_si256(), \
7127 #define _mm256_mask_srai_epi64(W, U, A, imm) __extension__ ({ \
7128 (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
7129 (__v4di)(__m256i)(W), \
7132 #define _mm256_maskz_srai_epi64(U, A, imm) __extension__ ({ \
7133 (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
7134 (__v4di)_mm256_setzero_si256(), \
7137 #define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
7138 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
7139 (__v4si)(__m128i)(B), \
7140 (__v4si)(__m128i)(C), (int)(imm), \
7143 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
7144 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
7145 (__v4si)(__m128i)(B), \
7146 (__v4si)(__m128i)(C), (int)(imm), \
7149 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
7150 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
7151 (__v4si)(__m128i)(B), \
7152 (__v4si)(__m128i)(C), (int)(imm), \
7155 #define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
7156 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
7157 (__v8si)(__m256i)(B), \
7158 (__v8si)(__m256i)(C), (int)(imm), \
7161 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
7162 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
7163 (__v8si)(__m256i)(B), \
7164 (__v8si)(__m256i)(C), (int)(imm), \
7167 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
7168 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
7169 (__v8si)(__m256i)(B), \
7170 (__v8si)(__m256i)(C), (int)(imm), \
7173 #define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
7174 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
7175 (__v2di)(__m128i)(B), \
7176 (__v2di)(__m128i)(C), (int)(imm), \
7179 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
7180 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
7181 (__v2di)(__m128i)(B), \
7182 (__v2di)(__m128i)(C), (int)(imm), \
7185 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
7186 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
7187 (__v2di)(__m128i)(B), \
7188 (__v2di)(__m128i)(C), (int)(imm), \
7191 #define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
7192 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
7193 (__v4di)(__m256i)(B), \
7194 (__v4di)(__m256i)(C), (int)(imm), \
7197 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
7198 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
7199 (__v4di)(__m256i)(B), \
7200 (__v4di)(__m256i)(C), (int)(imm), \
7203 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
7204 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
7205 (__v4di)(__m256i)(B), \
7206 (__v4di)(__m256i)(C), (int)(imm), \
7211 #define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \
7212 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
7213 (__v8sf)(__m256)(B), (int)(imm), \
7214 (__v8sf)_mm256_setzero_ps(), \
7217 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
7218 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
7219 (__v8sf)(__m256)(B), (int)(imm), \
7220 (__v8sf)(__m256)(W), \
7223 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
7224 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
7225 (__v8sf)(__m256)(B), (int)(imm), \
7226 (__v8sf)_mm256_setzero_ps(), \
7229 #define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \
7230 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
7231 (__v4df)(__m256d)(B), \
7233 (__v4df)_mm256_setzero_pd(), \
7236 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
7237 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
7238 (__v4df)(__m256d)(B), \
7240 (__v4df)(__m256d)(W), \
7243 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
7244 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
7245 (__v4df)(__m256d)(B), \
7247 (__v4df)_mm256_setzero_pd(), \
7250 #define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \
7251 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7252 (__v8si)(__m256i)(B), \
7254 (__v8si)_mm256_setzero_si256(), \
7257 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
7258 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7259 (__v8si)(__m256i)(B), \
7261 (__v8si)(__m256i)(W), \
7264 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
7265 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7266 (__v8si)(__m256i)(B), \
7268 (__v8si)_mm256_setzero_si256(), \
7271 #define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \
7272 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7273 (__v4di)(__m256i)(B), \
7275 (__v4di)_mm256_setzero_si256(), \
7278 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
7279 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7280 (__v4di)(__m256i)(B), \
7282 (__v4di)(__m256i)(W), \
7285 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
7286 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7287 (__v4di)(__m256i)(B), \
7289 (__v4di)_mm256_setzero_si256(), \
7292 #define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7293 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
7294 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
7295 (__v2df)(__m128d)(W)); })
7297 #define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7298 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
7299 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
7300 (__v2df)_mm_setzero_pd()); })
7302 #define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7303 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7304 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
7305 (__v4df)(__m256d)(W)); })
7307 #define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7308 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7309 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
7310 (__v4df)_mm256_setzero_pd()); })
7312 #define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7313 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
7314 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
7315 (__v4sf)(__m128)(W)); })
7317 #define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7318 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
7319 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
7320 (__v4sf)_mm_setzero_ps()); })
7322 #define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7323 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7324 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
7325 (__v8sf)(__m256)(W)); })
7327 #define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7328 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7329 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
7330 (__v8sf)_mm256_setzero_ps()); })
7335 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
7344 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
7352 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
7361 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
7370 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
7378 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
7387 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
7396 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
7404 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
7413 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
7422 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
7430 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
7439 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
7447 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
7455 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
7463 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A,
7471 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A,
7479 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
7488 return (__m256d)__builtin_ia32_selectpd_256(__M,
7496 return (__m256d)__builtin_ia32_selectpd_256(__M,
7504 return (__m128)__builtin_ia32_selectps_128(__M,
7512 return (__m128)__builtin_ia32_selectps_128(__M,
7520 return (__m256)__builtin_ia32_selectps_256(__M,
7528 return (__m256)__builtin_ia32_selectps_256(__M,
7536 return (__m128i)__builtin_ia32_selectd_128(__M,
7544 return (__m128i)__builtin_ia32_selectd_128(__M,
7552 return (__m256i)__builtin_ia32_selectd_256(__M,
7560 return (__m256i)__builtin_ia32_selectd_256(__M,
7568 return (__m128i)__builtin_ia32_selectq_128(__M,
7576 return (__m128i)__builtin_ia32_selectq_128(__M,
7584 return (__m256i)__builtin_ia32_selectq_256(__M,
7592 return (__m256i)__builtin_ia32_selectq_256(__M,
7600 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
7608 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
7609 (__v16qi) __O, __M);
7615 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
7623 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7629 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7637 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7638 (__v16qi) __O, __M);
7644 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7652 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7658 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7666 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7674 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7682 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7688 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7696 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7703 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7711 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7717 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7725 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7726 (__v16qi) __O, __M);
7732 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7740 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7746 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7754 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7755 (__v16qi) __O, __M);
7761 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7769 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7775 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7783 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7790 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7798 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7804 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7812 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7820 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7828 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7834 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7842 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7849 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7857 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7863 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7871 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7878 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7886 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7892 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7900 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7908 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7916 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7922 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7930 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7938 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7946 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7952 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7960 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7967 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7975 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7981 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7989 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7996 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
8004 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
8010 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
8018 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
8026 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
8034 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
8040 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
8048 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
8056 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
8064 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
8070 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
8078 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
8085 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
8093 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
8099 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
8107 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
8114 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
8122 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
8128 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
8136 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
8143 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
8151 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
8157 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
8165 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
8172 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
8180 return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
8186 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
8194 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
8195 (__v16qi) __O, __M);
8201 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
8210 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
8216 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
8224 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
8225 (__v16qi) __O, __M);
8231 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
8239 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
8245 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
8253 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
8260 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
8268 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
8274 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
8282 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
8289 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
8297 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
8303 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
8311 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
8312 (__v16qi) __O, __M);
8318 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
8326 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
8332 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
8340 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
8341 (__v16qi) __O, __M);
8347 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
8355 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
8361 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
8369 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
8376 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
8384 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
8390 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
8398 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
8405 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
8413 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
8419 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
8427 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
8435 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
8443 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
8449 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
8457 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
8464 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
8472 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
8475 #define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \
8476 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
8478 (__v4sf)_mm_setzero_ps(), \
8481 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \
8482 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
8484 (__v4sf)(__m128)(W), \
8487 #define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \
8488 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
8490 (__v4sf)_mm_setzero_ps(), \
8493 #define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \
8494 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
8496 (__v4si)_mm_setzero_si128(), \
8499 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
8500 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
8502 (__v4si)(__m128i)(W), \
8505 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
8506 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
8508 (__v4si)_mm_setzero_si128(), \
8511 #define _mm256_insertf32x4(A, B, imm) __extension__ ({ \
8512 (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
8513 (__v4sf)(__m128)(B), (int)(imm), \
8514 (__v8sf)_mm256_setzero_ps(), \
8517 #define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
8518 (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
8519 (__v4sf)(__m128)(B), (int)(imm), \
8520 (__v8sf)(__m256)(W), \
8523 #define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
8524 (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
8525 (__v4sf)(__m128)(B), (int)(imm), \
8526 (__v8sf)_mm256_setzero_ps(), \
8529 #define _mm256_inserti32x4(A, B, imm) __extension__ ({ \
8530 (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \
8531 (__v4si)(__m128i)(B), \
8533 (__v8si)_mm256_setzero_si256(), \
8536 #define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
8537 (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \
8538 (__v4si)(__m128i)(B), \
8540 (__v8si)(__m256i)(W), \
8543 #define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
8544 (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \
8545 (__v4si)(__m128i)(B), \
8547 (__v8si)_mm256_setzero_si256(), \
8550 #define _mm_getmant_pd(A, B, C) __extension__({\
8551 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8552 (int)(((C)<<2) | (B)), \
8553 (__v2df)_mm_setzero_pd(), \
8556 #define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\
8557 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8558 (int)(((C)<<2) | (B)), \
8559 (__v2df)(__m128d)(W), \
8562 #define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\
8563 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8564 (int)(((C)<<2) | (B)), \
8565 (__v2df)_mm_setzero_pd(), \
8568 #define _mm256_getmant_pd(A, B, C) __extension__ ({ \
8569 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8570 (int)(((C)<<2) | (B)), \
8571 (__v4df)_mm256_setzero_pd(), \
8574 #define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
8575 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8576 (int)(((C)<<2) | (B)), \
8577 (__v4df)(__m256d)(W), \
8580 #define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
8581 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8582 (int)(((C)<<2) | (B)), \
8583 (__v4df)_mm256_setzero_pd(), \
8586 #define _mm_getmant_ps(A, B, C) __extension__ ({ \
8587 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8588 (int)(((C)<<2) | (B)), \
8589 (__v4sf)_mm_setzero_ps(), \
8592 #define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8593 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8594 (int)(((C)<<2) | (B)), \
8595 (__v4sf)(__m128)(W), \
8598 #define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8599 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8600 (int)(((C)<<2) | (B)), \
8601 (__v4sf)_mm_setzero_ps(), \
8604 #define _mm256_getmant_ps(A, B, C) __extension__ ({ \
8605 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8606 (int)(((C)<<2) | (B)), \
8607 (__v8sf)_mm256_setzero_ps(), \
8610 #define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8611 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8612 (int)(((C)<<2) | (B)), \
8613 (__v8sf)(__m256)(W), \
8616 #define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8617 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8618 (int)(((C)<<2) | (B)), \
8619 (__v8sf)_mm256_setzero_ps(), \
8622 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8623 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
8624 (double const *)(addr), \
8625 (__v2di)(__m128i)(index), \
8626 (__mmask8)(mask), (int)(scale)); })
8628 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8629 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
8630 (long long const *)(addr), \
8631 (__v2di)(__m128i)(index), \
8632 (__mmask8)(mask), (int)(scale)); })
8634 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8635 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
8636 (double const *)(addr), \
8637 (__v4di)(__m256i)(index), \
8638 (__mmask8)(mask), (int)(scale)); })
8640 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8641 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
8642 (long long const *)(addr), \
8643 (__v4di)(__m256i)(index), \
8644 (__mmask8)(mask), (int)(scale)); })
8646 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8647 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
8648 (float const *)(addr), \
8649 (__v2di)(__m128i)(index), \
8650 (__mmask8)(mask), (int)(scale)); })
8652 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8653 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
8654 (int const *)(addr), \
8655 (__v2di)(__m128i)(index), \
8656 (__mmask8)(mask), (int)(scale)); })
8658 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8659 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8660 (float const *)(addr), \
8661 (__v4di)(__m256i)(index), \
8662 (__mmask8)(mask), (int)(scale)); })
8664 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8665 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8666 (int const *)(addr), \
8667 (__v4di)(__m256i)(index), \
8668 (__mmask8)(mask), (int)(scale)); })
8670 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8671 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8672 (double const *)(addr), \
8673 (__v4si)(__m128i)(index), \
8674 (__mmask8)(mask), (int)(scale)); })
8676 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8677 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8678 (long long const *)(addr), \
8679 (__v4si)(__m128i)(index), \
8680 (__mmask8)(mask), (int)(scale)); })
8682 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8683 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8684 (double const *)(addr), \
8685 (__v4si)(__m128i)(index), \
8686 (__mmask8)(mask), (int)(scale)); })
8688 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8689 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8690 (long long const *)(addr), \
8691 (__v4si)(__m128i)(index), \
8692 (__mmask8)(mask), (int)(scale)); })
8694 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8695 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8696 (float const *)(addr), \
8697 (__v4si)(__m128i)(index), \
8698 (__mmask8)(mask), (int)(scale)); })
8700 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8701 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8702 (int const *)(addr), \
8703 (__v4si)(__m128i)(index), \
8704 (__mmask8)(mask), (int)(scale)); })
8706 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8707 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8708 (float const *)(addr), \
8709 (__v8si)(__m256i)(index), \
8710 (__mmask8)(mask), (int)(scale)); })
8712 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8713 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8714 (int const *)(addr), \
8715 (__v8si)(__m256i)(index), \
8716 (__mmask8)(mask), (int)(scale)); })
8718 #define _mm256_permutex_pd(X, C) __extension__ ({ \
8719 (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \
8720 (__v4df)_mm256_undefined_pd(), \
8721 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
8722 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
8724 #define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8725 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8726 (__v4df)_mm256_permutex_pd((X), (C)), \
8727 (__v4df)(__m256d)(W)); })
8729 #define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \
8730 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8731 (__v4df)_mm256_permutex_pd((X), (C)), \
8732 (__v4df)_mm256_setzero_pd()); })
8734 #define _mm256_permutex_epi64(X, C) __extension__ ({ \
8735 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \
8736 (__v4di)_mm256_undefined_si256(), \
8737 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
8738 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
8740 #define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8741 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8742 (__v4di)_mm256_permutex_epi64((X), (C)), \
8743 (__v4di)(__m256i)(W)); })
8745 #define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8746 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8747 (__v4di)_mm256_permutex_epi64((X), (C)), \
8748 (__v4di)_mm256_setzero_si256()); })
8753 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8763 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8772 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8781 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8790 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8800 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8810 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8819 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8828 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8837 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8847 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8856 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8862 #define _mm_alignr_epi32(A, B, imm) __extension__ ({ \
8863 (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \
8864 (__v4si)(__m128i)(B), (int)(imm), \
8865 (__v4si)_mm_undefined_si128(), \
8868 #define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
8869 (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \
8870 (__v4si)(__m128i)(B), (int)(imm), \
8871 (__v4si)(__m128i)(W), \
8874 #define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
8875 (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \
8876 (__v4si)(__m128i)(B), (int)(imm), \
8877 (__v4si)_mm_setzero_si128(), \
8880 #define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \
8881 (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \
8882 (__v8si)(__m256i)(B), (int)(imm), \
8883 (__v8si)_mm256_undefined_si256(), \
8886 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
8887 (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \
8888 (__v8si)(__m256i)(B), (int)(imm), \
8889 (__v8si)(__m256i)(W), \
8892 #define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
8893 (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \
8894 (__v8si)(__m256i)(B), (int)(imm), \
8895 (__v8si)_mm256_setzero_si256(), \
8898 #define _mm_alignr_epi64(A, B, imm) __extension__ ({ \
8899 (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \
8900 (__v2di)(__m128i)(B), (int)(imm), \
8901 (__v2di)_mm_setzero_di(), \
8904 #define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
8905 (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \
8906 (__v2di)(__m128i)(B), (int)(imm), \
8907 (__v2di)(__m128i)(W), \
8910 #define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
8911 (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \
8912 (__v2di)(__m128i)(B), (int)(imm), \
8913 (__v2di)_mm_setzero_di(), \
8916 #define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \
8917 (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \
8918 (__v4di)(__m256i)(B), (int)(imm), \
8919 (__v4di)_mm256_undefined_pd(), \
8922 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
8923 (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \
8924 (__v4di)(__m256i)(B), (int)(imm), \
8925 (__v4di)(__m256i)(W), \
8928 #define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
8929 (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \
8930 (__v4di)(__m256i)(B), (int)(imm), \
8931 (__v4di)_mm256_setzero_si256(), \
8937 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8945 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8953 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8961 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8969 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8977 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8985 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8993 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8998 #define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\
8999 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
9000 (__v8si)_mm256_shuffle_epi32((A), (I)), \
9001 (__v8si)(__m256i)(W)); })
9003 #define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\
9004 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
9005 (__v8si)_mm256_shuffle_epi32((A), (I)), \
9006 (__v8si)_mm256_setzero_si256()); })
9008 #define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\
9009 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
9010 (__v4si)_mm_shuffle_epi32((A), (I)), \
9011 (__v4si)(__m128i)(W)); })
9013 #define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\
9014 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
9015 (__v4si)_mm_shuffle_epi32((A), (I)), \
9016 (__v4si)_mm_setzero_si128()); })
9021 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
9029 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
9037 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
9045 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
9053 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
9061 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
9069 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
9077 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
9085 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
9093 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
9102 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
9110 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
9132 #define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
9133 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
9134 (__v8hi)(__m128i)(W), \
9137 #define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
9138 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
9139 (__v8hi)_mm_setzero_si128(), \
9157 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
9158 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
9159 (__v8hi)(__m128i)(W), \
9162 #define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
9163 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
9164 (__v8hi)_mm_setzero_si128(), \
9168 #undef __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_scalef_pd(__m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi64_epi16(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_testn_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_rsqrt14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi64_epi8(__m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_broadcastss_ps(__m128 __X)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpneq_epu32_mask(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_rorv_epi64(__m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmplt_epi64_mask(__m128i __a, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_storeu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_sub_ps(__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi64_epi8(__m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmple_epu32_mask(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_rolv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_abs_epi64(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_undefined_pd(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi32_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_rsqrt14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_rorv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpeq_epu32_mask(__m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_scalef_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmple_epi32_mask(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_testn_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcast_i32x4(__m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_test_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpeq_epu64_mask(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_loadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi32_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpackhi_ps(__m128 __a, __m128 __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x float] and interleaves the...
static __inline __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtps_ph(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcastq_epi64(__m128i __X)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_cvtps_pd(__m256d __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvttps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_moveldup_ps(__m256 __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rolv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srav_epi64(__m128i __X, __m128i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_scalef_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi64_epi32(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvttps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_test_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutexvar_epi64(__m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_getexp_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu64(__m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpgt_epu64_mask(__m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmplt_epi32_mask(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_load_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_getexp_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_movedup_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_test_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_compress_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_testn_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mov_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_rsqrt14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpeq_epi64_mask(__m128i __a, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi32_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi32_epi16(__m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpge_epu32_mask(__m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_ps(__mmask16 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_store_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_test_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtps_epu32(__mmask8 __U, __m256 __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpackhi_pd(__m256d __a, __m256d __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mov_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_rorv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmplt_epu64_mask(__m128i __a, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_storeu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rolv_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_getexp_pd(__mmask8 __U, __m128d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpackhi_ps(__m256 __a, __m256 __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epu64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi64_epi8(__m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpgt_epi32_mask(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpacklo_ps(__m128 __a, __m128 __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x float] and interleaves them...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_storeu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A)
static __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) _mm_setzero_di(void)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epu32(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_add_ps(__mmask16 __U, __m256 __A, __m256 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp14_ps(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srav_epi64(__m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_expandloadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_rolv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutexvar_epi32(__m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi64_epi16(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmplt_epu32_mask(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_compress_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_mov_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_storeu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epu32(__m128 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_testn_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_loadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epu32(__m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_loadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_moveldup_ps(__m128 __a)
Duplicates low-order (even-indexed) values from a 128-bit vector of [4 x float] to float values store...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_rolv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_getexp_pd(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmple_epu64_mask(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi32_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvttpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_add_ps(__mmask16 __U, __m128 __A, __m128 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastq_epi64(__m128i __X)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi64_epi32(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_rcp14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_ps(__mmask16 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastd_epi32(__m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi64_epi32(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_scalef_ps(__m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_or_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_add_ps(__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mullo_epi32(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_rcp14_ps(__mmask8 __U, __m256 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_permutexvar_pd(__m256i __X, __m256d __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_expandloadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_andnot_si256(__m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_getexp_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt14_ps(__m128 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi64_epi16(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_load_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepu32_ps(__m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_getexp_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_expand_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi64(__m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_movedup_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_loadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_broadcastss_ps(__m128 __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mov_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_rsqrt14_pd(__m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rsqrt14_pd(__m128d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_and_si256(__m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_loadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi64_epi8(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_loadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epu32(__m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi64(__m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_scalef_pd(__m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepu32_pd(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi64_epi32(__m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_getexp_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi64_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, __m256 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvttps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_testn_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_movedup_pd(__m256d __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_test_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_rsqrt14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epu32(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_rorv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_storeu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_rolv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvttpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_expandloadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_expandloadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_sub_ps(__mmask16 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_compress_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srav_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpneq_epi32_mask(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_rorv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mov_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpneq_epi64_mask(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_sub_ps(__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi16(__m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmplt_epi64_mask(__m256i __a, __m256i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_scalef_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sra_epi64(__m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpge_epi32_mask(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_rcp14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_testn_epi64_mask(__m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_storeu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_broadcastsd_pd(__m128d __X)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmplt_epi32_mask(__m256i __a, __m256i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rorv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_rcp14_pd(__mmask8 __U, __m256d __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_store_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ vector float vector float __b
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpge_epu32_mask(__m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmple_epi64_mask(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epu32(__m256d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpge_epu64_mask(__m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_rolv_epi64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epu32(__m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpacklo_pd(__m256d __a, __m256d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpeq_epi32_mask(__m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_rsqrt14_ps(__m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_load_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_scalef_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi32_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_rcp14_ps(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_mov_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_abs_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi32_epi8(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_expandloadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpneq_epu64_mask(__m128i __a, __m128i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A)
#define __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_add_ps(__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epi64(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_rsqrt14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcastd_epi32(__m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi64(__m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi8(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_scalef_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_rsqrt14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_getexp_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_movedup_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi64_epi16(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_storeu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_cvtepu32_pd(__m128i __A)
static __inline __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi32_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_scalef_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_loadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mov_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_compress_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_expandloadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_xor_si256(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmple_epu64_mask(__m256i __a, __m256i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmple_epi32_mask(__m256i __a, __m256i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_scalef_ps(__m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_cvtph_ps(__m256 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi64_epi32(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mov_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_f32x4(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi32_epi8(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_rcp14_pd(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_load_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_testn_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpgt_epi64_mask(__m128i __a, __m128i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epu32(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_test_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_load_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_rorv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mov_epi32(__m128i __W, __mmask8 __U, __m128i __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_rorv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_or_si256(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_load_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_loadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_store_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_mov_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_expand_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_test_epi32_mask(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_expand_pd(__mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_storeu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmplt_epu64_mask(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_cvtepu32_ps(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_rorv_epi32(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_cvtps_pd(__m128d __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_load_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_rolv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpge_epi32_mask(__m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_expand_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_sub_ps(__mmask16 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_load_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_compress_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_rolv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_store_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpge_epu64_mask(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_rolv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehdup_ps(__m128 __a)
Moves and duplicates high-order (odd-indexed) values from a 128-bit vector of [4 x float] to float va...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi32_epi16(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_rcp14_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_test_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_movedup_pd(__m128d __a)
Moves and duplicates the double-precision value in the lower bits of a 128-bit vector of [2 x double]...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_expandloadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_permutexvar_ps(__m256i __X, __m256 __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rorv_epi64(__m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmple_epu32_mask(__m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtps_epi32(__mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_mov_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rcp14_pd(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpge_epi64_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_rorv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_rorv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpacklo_ps(__m256 __a, __m256 __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmplt_epu32_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_movedup_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_scalef_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_rolv_epi32(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_getexp_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_rcp14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpgt_epu32_mask(__m128i __a, __m128i __b)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_movehdup_ps(__m256 __a)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi64_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epu64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_testn_epi32_mask(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, __m128i __b)
Performs a bitwise exclusive OR of two 128-bit integer vectors.
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, __m128i __b)
Performs a bitwise OR of two 128-bit integer vectors.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvttps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mov_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_rolv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_rsqrt14_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_compress_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_expand_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_getexp_ps(__m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_expand_pd(__mmask8 __U, __m256d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtps_ph(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi64_epi8(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_rcp14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_getexp_ps(__m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epi64(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_scalef_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_rcp14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_getexp_pd(__m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpge_epi64_mask(__m128i __a, __m128i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_rsqrt14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors, using the one's complement of the values conta...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi64_epi32(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmple_epi64_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttps_epi32(__mmask8 __U, __m128 __A)