117 bool LogicalShift =
false;
118 bool ShiftLeft =
false;
121 switch (
II.getIntrinsicID()) {
124 case Intrinsic::x86_sse2_psrai_d:
125 case Intrinsic::x86_sse2_psrai_w:
126 case Intrinsic::x86_avx2_psrai_d:
127 case Intrinsic::x86_avx2_psrai_w:
128 case Intrinsic::x86_avx512_psrai_q_128:
129 case Intrinsic::x86_avx512_psrai_q_256:
130 case Intrinsic::x86_avx512_psrai_d_512:
131 case Intrinsic::x86_avx512_psrai_q_512:
132 case Intrinsic::x86_avx512_psrai_w_512:
135 case Intrinsic::x86_sse2_psra_d:
136 case Intrinsic::x86_sse2_psra_w:
137 case Intrinsic::x86_avx2_psra_d:
138 case Intrinsic::x86_avx2_psra_w:
139 case Intrinsic::x86_avx512_psra_q_128:
140 case Intrinsic::x86_avx512_psra_q_256:
141 case Intrinsic::x86_avx512_psra_d_512:
142 case Intrinsic::x86_avx512_psra_q_512:
143 case Intrinsic::x86_avx512_psra_w_512:
144 LogicalShift =
false;
147 case Intrinsic::x86_sse2_psrli_d:
148 case Intrinsic::x86_sse2_psrli_q:
149 case Intrinsic::x86_sse2_psrli_w:
150 case Intrinsic::x86_avx2_psrli_d:
151 case Intrinsic::x86_avx2_psrli_q:
152 case Intrinsic::x86_avx2_psrli_w:
153 case Intrinsic::x86_avx512_psrli_d_512:
154 case Intrinsic::x86_avx512_psrli_q_512:
155 case Intrinsic::x86_avx512_psrli_w_512:
158 case Intrinsic::x86_sse2_psrl_d:
159 case Intrinsic::x86_sse2_psrl_q:
160 case Intrinsic::x86_sse2_psrl_w:
161 case Intrinsic::x86_avx2_psrl_d:
162 case Intrinsic::x86_avx2_psrl_q:
163 case Intrinsic::x86_avx2_psrl_w:
164 case Intrinsic::x86_avx512_psrl_d_512:
165 case Intrinsic::x86_avx512_psrl_q_512:
166 case Intrinsic::x86_avx512_psrl_w_512:
170 case Intrinsic::x86_sse2_pslli_d:
171 case Intrinsic::x86_sse2_pslli_q:
172 case Intrinsic::x86_sse2_pslli_w:
173 case Intrinsic::x86_avx2_pslli_d:
174 case Intrinsic::x86_avx2_pslli_q:
175 case Intrinsic::x86_avx2_pslli_w:
176 case Intrinsic::x86_avx512_pslli_d_512:
177 case Intrinsic::x86_avx512_pslli_q_512:
178 case Intrinsic::x86_avx512_pslli_w_512:
181 case Intrinsic::x86_sse2_psll_d:
182 case Intrinsic::x86_sse2_psll_q:
183 case Intrinsic::x86_sse2_psll_w:
184 case Intrinsic::x86_avx2_psll_d:
185 case Intrinsic::x86_avx2_psll_q:
186 case Intrinsic::x86_avx2_psll_w:
187 case Intrinsic::x86_avx512_psll_d_512:
188 case Intrinsic::x86_avx512_psll_q_512:
189 case Intrinsic::x86_avx512_psll_w_512:
194 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
196 Value *Vec =
II.getArgOperand(0);
197 Value *Amt =
II.getArgOperand(1);
199 Type *SVT = VT->getElementType();
201 unsigned VWidth = VT->getNumElements();
212 Amt = Builder.CreateZExtOrTrunc(Amt, SVT);
213 Amt = Builder.CreateVectorSplat(VWidth, Amt);
214 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
215 : Builder.CreateLShr(Vec, Amt))
216 : Builder.CreateAShr(Vec, Amt));
221 Amt = ConstantInt::get(SVT,
BitWidth - 1);
222 return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt));
229 "Unexpected shift-by-scalar type");
234 Amt, DemandedLower,
II.getDataLayout());
236 Amt, DemandedUpper,
II.getDataLayout());
240 Amt = Builder.CreateShuffleVector(Amt, ZeroSplat);
241 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
242 : Builder.CreateLShr(Vec, Amt))
243 : Builder.CreateAShr(Vec, Amt));
256 "Unexpected shift-by-scalar type");
260 for (
unsigned i = 0, NumSubElts = 64 /
BitWidth; i != NumSubElts; ++i) {
261 unsigned SubEltIdx = (NumSubElts - 1) - i;
264 Count |= SubElt->getValue().zextOrTrunc(64);
282 auto ShiftAmt = ConstantInt::get(SVT,
Count.zextOrTrunc(
BitWidth));
283 auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
286 return Builder.CreateShl(Vec, ShiftVec);
289 return Builder.CreateLShr(Vec, ShiftVec);
291 return Builder.CreateAShr(Vec, ShiftVec);
2178 auto SimplifyDemandedVectorEltsLow = [&IC](
Value *
Op,
unsigned Width,
2179 unsigned DemandedWidth) {
2180 APInt UndefElts(Width, 0);
2187 case Intrinsic::x86_bmi_bextr_32:
2188 case Intrinsic::x86_bmi_bextr_64:
2189 case Intrinsic::x86_tbm_bextri_u32:
2190 case Intrinsic::x86_tbm_bextri_u64:
2196 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2203 uint64_t Result = InC->getZExtValue() >> Shift;
2208 ConstantInt::get(
II.getType(), Result));
2215 case Intrinsic::x86_bmi_bzhi_32:
2216 case Intrinsic::x86_bmi_bzhi_64:
2219 uint64_t Index =
C->getZExtValue() & 0xff;
2220 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2229 uint64_t Result = InC->getZExtValue();
2232 ConstantInt::get(
II.getType(), Result));
2237 case Intrinsic::x86_bmi_pext_32:
2238 case Intrinsic::x86_bmi_pext_64:
2240 if (MaskC->isNullValue()) {
2243 if (MaskC->isAllOnesValue()) {
2247 unsigned MaskIdx, MaskLen;
2248 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2254 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2260 uint64_t Src = SrcC->getZExtValue();
2261 uint64_t Mask = MaskC->getZExtValue();
2268 if (BitToTest & Src)
2277 ConstantInt::get(
II.getType(), Result));
2281 case Intrinsic::x86_bmi_pdep_32:
2282 case Intrinsic::x86_bmi_pdep_64:
2284 if (MaskC->isNullValue()) {
2287 if (MaskC->isAllOnesValue()) {
2291 unsigned MaskIdx, MaskLen;
2292 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2297 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2304 uint64_t Src = SrcC->getZExtValue();
2305 uint64_t Mask = MaskC->getZExtValue();
2312 if (BitToTest & Src)
2321 ConstantInt::get(
II.getType(), Result));
2326 case Intrinsic::x86_sse_cvtss2si:
2327 case Intrinsic::x86_sse_cvtss2si64:
2328 case Intrinsic::x86_sse_cvttss2si:
2329 case Intrinsic::x86_sse_cvttss2si64:
2330 case Intrinsic::x86_sse2_cvtsd2si:
2331 case Intrinsic::x86_sse2_cvtsd2si64:
2332 case Intrinsic::x86_sse2_cvttsd2si:
2333 case Intrinsic::x86_sse2_cvttsd2si64:
2334 case Intrinsic::x86_avx512_vcvtss2si32:
2335 case Intrinsic::x86_avx512_vcvtss2si64:
2336 case Intrinsic::x86_avx512_vcvtss2usi32:
2337 case Intrinsic::x86_avx512_vcvtss2usi64:
2338 case Intrinsic::x86_avx512_vcvtsd2si32:
2339 case Intrinsic::x86_avx512_vcvtsd2si64:
2340 case Intrinsic::x86_avx512_vcvtsd2usi32:
2341 case Intrinsic::x86_avx512_vcvtsd2usi64:
2342 case Intrinsic::x86_avx512_cvttss2si:
2343 case Intrinsic::x86_avx512_cvttss2si64:
2344 case Intrinsic::x86_avx512_cvttss2usi:
2345 case Intrinsic::x86_avx512_cvttss2usi64:
2346 case Intrinsic::x86_avx512_cvttsd2si:
2347 case Intrinsic::x86_avx512_cvttsd2si64:
2348 case Intrinsic::x86_avx512_cvttsd2usi:
2349 case Intrinsic::x86_avx512_cvttsd2usi64: {
2352 Value *Arg =
II.getArgOperand(0);
2354 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2360 case Intrinsic::x86_mmx_pmovmskb:
2361 case Intrinsic::x86_sse_movmsk_ps:
2362 case Intrinsic::x86_sse2_movmsk_pd:
2363 case Intrinsic::x86_sse2_pmovmskb_128:
2364 case Intrinsic::x86_avx_movmsk_pd_256:
2365 case Intrinsic::x86_avx_movmsk_ps_256:
2366 case Intrinsic::x86_avx2_pmovmskb:
2372 case Intrinsic::x86_sse_comieq_ss:
2373 case Intrinsic::x86_sse_comige_ss:
2374 case Intrinsic::x86_sse_comigt_ss:
2375 case Intrinsic::x86_sse_comile_ss:
2376 case Intrinsic::x86_sse_comilt_ss:
2377 case Intrinsic::x86_sse_comineq_ss:
2378 case Intrinsic::x86_sse_ucomieq_ss:
2379 case Intrinsic::x86_sse_ucomige_ss:
2380 case Intrinsic::x86_sse_ucomigt_ss:
2381 case Intrinsic::x86_sse_ucomile_ss:
2382 case Intrinsic::x86_sse_ucomilt_ss:
2383 case Intrinsic::x86_sse_ucomineq_ss:
2384 case Intrinsic::x86_sse2_comieq_sd:
2385 case Intrinsic::x86_sse2_comige_sd:
2386 case Intrinsic::x86_sse2_comigt_sd:
2387 case Intrinsic::x86_sse2_comile_sd:
2388 case Intrinsic::x86_sse2_comilt_sd:
2389 case Intrinsic::x86_sse2_comineq_sd:
2390 case Intrinsic::x86_sse2_ucomieq_sd:
2391 case Intrinsic::x86_sse2_ucomige_sd:
2392 case Intrinsic::x86_sse2_ucomigt_sd:
2393 case Intrinsic::x86_sse2_ucomile_sd:
2394 case Intrinsic::x86_sse2_ucomilt_sd:
2395 case Intrinsic::x86_sse2_ucomineq_sd:
2396 case Intrinsic::x86_avx512_vcomi_ss:
2397 case Intrinsic::x86_avx512_vcomi_sd:
2398 case Intrinsic::x86_avx512_mask_cmp_ss:
2399 case Intrinsic::x86_avx512_mask_cmp_sd: {
2402 bool MadeChange =
false;
2403 Value *Arg0 =
II.getArgOperand(0);
2404 Value *Arg1 =
II.getArgOperand(1);
2406 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2410 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2420 case Intrinsic::x86_avx512_add_ps_512:
2421 case Intrinsic::x86_avx512_div_ps_512:
2422 case Intrinsic::x86_avx512_mul_ps_512:
2423 case Intrinsic::x86_avx512_sub_ps_512:
2424 case Intrinsic::x86_avx512_add_pd_512:
2425 case Intrinsic::x86_avx512_div_pd_512:
2426 case Intrinsic::x86_avx512_mul_pd_512:
2427 case Intrinsic::x86_avx512_sub_pd_512:
2431 if (R->getValue() == 4) {
2432 Value *Arg0 =
II.getArgOperand(0);
2433 Value *Arg1 =
II.getArgOperand(1);
2439 case Intrinsic::x86_avx512_add_ps_512:
2440 case Intrinsic::x86_avx512_add_pd_512:
2443 case Intrinsic::x86_avx512_sub_ps_512:
2444 case Intrinsic::x86_avx512_sub_pd_512:
2447 case Intrinsic::x86_avx512_mul_ps_512:
2448 case Intrinsic::x86_avx512_mul_pd_512:
2451 case Intrinsic::x86_avx512_div_ps_512:
2452 case Intrinsic::x86_avx512_div_pd_512:
2462 case Intrinsic::x86_avx512_mask_add_ss_round:
2463 case Intrinsic::x86_avx512_mask_div_ss_round:
2464 case Intrinsic::x86_avx512_mask_mul_ss_round:
2465 case Intrinsic::x86_avx512_mask_sub_ss_round:
2466 case Intrinsic::x86_avx512_mask_add_sd_round:
2467 case Intrinsic::x86_avx512_mask_div_sd_round:
2468 case Intrinsic::x86_avx512_mask_mul_sd_round:
2469 case Intrinsic::x86_avx512_mask_sub_sd_round:
2473 if (R->getValue() == 4) {
2475 Value *Arg0 =
II.getArgOperand(0);
2476 Value *Arg1 =
II.getArgOperand(1);
2484 case Intrinsic::x86_avx512_mask_add_ss_round:
2485 case Intrinsic::x86_avx512_mask_add_sd_round:
2488 case Intrinsic::x86_avx512_mask_sub_ss_round:
2489 case Intrinsic::x86_avx512_mask_sub_sd_round:
2492 case Intrinsic::x86_avx512_mask_mul_ss_round:
2493 case Intrinsic::x86_avx512_mask_mul_sd_round:
2496 case Intrinsic::x86_avx512_mask_div_ss_round:
2497 case Intrinsic::x86_avx512_mask_div_sd_round:
2503 Value *Mask =
II.getArgOperand(3);
2506 if (!
C || !
C->getValue()[0]) {
2530 case Intrinsic::x86_sse2_psrai_d:
2531 case Intrinsic::x86_sse2_psrai_w:
2532 case Intrinsic::x86_avx2_psrai_d:
2533 case Intrinsic::x86_avx2_psrai_w:
2534 case Intrinsic::x86_avx512_psrai_q_128:
2535 case Intrinsic::x86_avx512_psrai_q_256:
2536 case Intrinsic::x86_avx512_psrai_d_512:
2537 case Intrinsic::x86_avx512_psrai_q_512:
2538 case Intrinsic::x86_avx512_psrai_w_512:
2539 case Intrinsic::x86_sse2_psrli_d:
2540 case Intrinsic::x86_sse2_psrli_q:
2541 case Intrinsic::x86_sse2_psrli_w:
2542 case Intrinsic::x86_avx2_psrli_d:
2543 case Intrinsic::x86_avx2_psrli_q:
2544 case Intrinsic::x86_avx2_psrli_w:
2545 case Intrinsic::x86_avx512_psrli_d_512:
2546 case Intrinsic::x86_avx512_psrli_q_512:
2547 case Intrinsic::x86_avx512_psrli_w_512:
2548 case Intrinsic::x86_sse2_pslli_d:
2549 case Intrinsic::x86_sse2_pslli_q:
2550 case Intrinsic::x86_sse2_pslli_w:
2551 case Intrinsic::x86_avx2_pslli_d:
2552 case Intrinsic::x86_avx2_pslli_q:
2553 case Intrinsic::x86_avx2_pslli_w:
2554 case Intrinsic::x86_avx512_pslli_d_512:
2555 case Intrinsic::x86_avx512_pslli_q_512:
2556 case Intrinsic::x86_avx512_pslli_w_512:
2562 case Intrinsic::x86_sse2_psra_d:
2563 case Intrinsic::x86_sse2_psra_w:
2564 case Intrinsic::x86_avx2_psra_d:
2565 case Intrinsic::x86_avx2_psra_w:
2566 case Intrinsic::x86_avx512_psra_q_128:
2567 case Intrinsic::x86_avx512_psra_q_256:
2568 case Intrinsic::x86_avx512_psra_d_512:
2569 case Intrinsic::x86_avx512_psra_q_512:
2570 case Intrinsic::x86_avx512_psra_w_512:
2571 case Intrinsic::x86_sse2_psrl_d:
2572 case Intrinsic::x86_sse2_psrl_q:
2573 case Intrinsic::x86_sse2_psrl_w:
2574 case Intrinsic::x86_avx2_psrl_d:
2575 case Intrinsic::x86_avx2_psrl_q:
2576 case Intrinsic::x86_avx2_psrl_w:
2577 case Intrinsic::x86_avx512_psrl_d_512:
2578 case Intrinsic::x86_avx512_psrl_q_512:
2579 case Intrinsic::x86_avx512_psrl_w_512:
2580 case Intrinsic::x86_sse2_psll_d:
2581 case Intrinsic::x86_sse2_psll_q:
2582 case Intrinsic::x86_sse2_psll_w:
2583 case Intrinsic::x86_avx2_psll_d:
2584 case Intrinsic::x86_avx2_psll_q:
2585 case Intrinsic::x86_avx2_psll_w:
2586 case Intrinsic::x86_avx512_psll_d_512:
2587 case Intrinsic::x86_avx512_psll_q_512:
2588 case Intrinsic::x86_avx512_psll_w_512: {
2595 Value *Arg1 =
II.getArgOperand(1);
2597 "Unexpected packed shift size");
2600 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2606 case Intrinsic::x86_avx2_psllv_d:
2607 case Intrinsic::x86_avx2_psllv_d_256:
2608 case Intrinsic::x86_avx2_psllv_q:
2609 case Intrinsic::x86_avx2_psllv_q_256:
2610 case Intrinsic::x86_avx512_psllv_d_512:
2611 case Intrinsic::x86_avx512_psllv_q_512:
2612 case Intrinsic::x86_avx512_psllv_w_128:
2613 case Intrinsic::x86_avx512_psllv_w_256:
2614 case Intrinsic::x86_avx512_psllv_w_512:
2615 case Intrinsic::x86_avx2_psrav_d:
2616 case Intrinsic::x86_avx2_psrav_d_256:
2617 case Intrinsic::x86_avx512_psrav_q_128:
2618 case Intrinsic::x86_avx512_psrav_q_256:
2619 case Intrinsic::x86_avx512_psrav_d_512:
2620 case Intrinsic::x86_avx512_psrav_q_512:
2621 case Intrinsic::x86_avx512_psrav_w_128:
2622 case Intrinsic::x86_avx512_psrav_w_256:
2623 case Intrinsic::x86_avx512_psrav_w_512:
2624 case Intrinsic::x86_avx2_psrlv_d:
2625 case Intrinsic::x86_avx2_psrlv_d_256:
2626 case Intrinsic::x86_avx2_psrlv_q:
2627 case Intrinsic::x86_avx2_psrlv_q_256:
2628 case Intrinsic::x86_avx512_psrlv_d_512:
2629 case Intrinsic::x86_avx512_psrlv_q_512:
2630 case Intrinsic::x86_avx512_psrlv_w_128:
2631 case Intrinsic::x86_avx512_psrlv_w_256:
2632 case Intrinsic::x86_avx512_psrlv_w_512:
2638 case Intrinsic::x86_sse2_packssdw_128:
2639 case Intrinsic::x86_sse2_packsswb_128:
2640 case Intrinsic::x86_avx2_packssdw:
2641 case Intrinsic::x86_avx2_packsswb:
2642 case Intrinsic::x86_avx512_packssdw_512:
2643 case Intrinsic::x86_avx512_packsswb_512:
2649 case Intrinsic::x86_sse2_packuswb_128:
2650 case Intrinsic::x86_sse41_packusdw:
2651 case Intrinsic::x86_avx2_packusdw:
2652 case Intrinsic::x86_avx2_packuswb:
2653 case Intrinsic::x86_avx512_packusdw_512:
2654 case Intrinsic::x86_avx512_packuswb_512:
2660 case Intrinsic::x86_sse2_pmulh_w:
2661 case Intrinsic::x86_avx2_pmulh_w:
2662 case Intrinsic::x86_avx512_pmulh_w_512:
2668 case Intrinsic::x86_sse2_pmulhu_w:
2669 case Intrinsic::x86_avx2_pmulhu_w:
2670 case Intrinsic::x86_avx512_pmulhu_w_512:
2676 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
2677 case Intrinsic::x86_avx2_pmul_hr_sw:
2678 case Intrinsic::x86_avx512_pmul_hr_sw_512:
2684 case Intrinsic::x86_sse2_pmadd_wd:
2685 case Intrinsic::x86_avx2_pmadd_wd:
2686 case Intrinsic::x86_avx512_pmaddw_d_512:
2692 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
2693 case Intrinsic::x86_avx2_pmadd_ub_sw:
2694 case Intrinsic::x86_avx512_pmaddubs_w_512:
2700 case Intrinsic::x86_pclmulqdq:
2701 case Intrinsic::x86_pclmulqdq_256:
2702 case Intrinsic::x86_pclmulqdq_512: {
2704 unsigned Imm =
C->getZExtValue();
2706 bool MadeChange =
false;
2707 Value *Arg0 =
II.getArgOperand(0);
2708 Value *Arg1 =
II.getArgOperand(1);
2712 APInt UndefElts1(VWidth, 0);
2713 APInt DemandedElts1 =
2721 APInt UndefElts2(VWidth, 0);
2722 APInt DemandedElts2 =
2744 case Intrinsic::x86_sse41_insertps:
2750 case Intrinsic::x86_sse4a_extrq: {
2751 Value *Op0 =
II.getArgOperand(0);
2752 Value *Op1 =
II.getArgOperand(1);
2757 VWidth1 == 16 &&
"Unexpected operand sizes");
2775 bool MadeChange =
false;
2776 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2780 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2790 case Intrinsic::x86_sse4a_extrqi: {
2793 Value *Op0 =
II.getArgOperand(0);
2796 "Unexpected operand size");
2809 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2815 case Intrinsic::x86_sse4a_insertq: {
2816 Value *Op0 =
II.getArgOperand(0);
2817 Value *Op1 =
II.getArgOperand(1);
2822 "Unexpected operand size");
2832 const APInt &V11 = CI11->getValue();
2842 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2848 case Intrinsic::x86_sse4a_insertqi: {
2852 Value *Op0 =
II.getArgOperand(0);
2853 Value *Op1 =
II.getArgOperand(1);
2858 VWidth1 == 2 &&
"Unexpected operand sizes");
2865 if (CILength && CIIndex) {
2866 APInt Len = CILength->getValue().zextOrTrunc(6);
2875 bool MadeChange =
false;
2876 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2880 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2890 case Intrinsic::x86_sse41_pblendvb:
2891 case Intrinsic::x86_sse41_blendvps:
2892 case Intrinsic::x86_sse41_blendvpd:
2893 case Intrinsic::x86_avx_blendv_ps_256:
2894 case Intrinsic::x86_avx_blendv_pd_256:
2895 case Intrinsic::x86_avx2_pblendvb: {
2898 Value *Op0 =
II.getArgOperand(0);
2899 Value *Op1 =
II.getArgOperand(1);
2900 Value *Mask =
II.getArgOperand(2);
2916 unsigned BitWidth = Mask->getType()->getScalarSizeInBits();
2918 if (Mask->getType()->isIntOrIntVectorTy()) {
2923 if (BC->hasOneUse()) {
2924 Value *Src = BC->getOperand(0);
2925 if (Src->getType()->isIntOrIntVectorTy()) {
2926 unsigned SrcBitWidth = Src->getType()->getScalarSizeInBits();
2942 if (MaskTy->getScalarSizeInBits() == OpTy->getScalarSizeInBits()) {
2954 Value *MaskSrc =
nullptr;
2957 m_Mask(ShuffleMask))))) {
2960 if (NumElts < (
int)ShuffleMask.size() || !
isPowerOf2_32(NumElts) ||
2962 [NumElts](
int M) {
return M < 0 || M >= NumElts; }))
2974 unsigned NumMaskElts = MaskTy->getNumElements();
2975 unsigned NumOperandElts = OpTy->getNumElements();
2979 unsigned NumMaskSrcElts =
2981 NumMaskElts = (ShuffleMask.size() * NumMaskElts) / NumMaskSrcElts;
2983 if (NumMaskElts > NumOperandElts)
2991 assert(MaskTy->getPrimitiveSizeInBits() ==
2992 OpTy->getPrimitiveSizeInBits() &&
2993 "Not expecting mask and operands with different sizes");
2995 if (NumMaskElts == NumOperandElts) {
3001 if (NumMaskElts < NumOperandElts) {
3012 case Intrinsic::x86_ssse3_pshuf_b_128:
3013 case Intrinsic::x86_avx2_pshuf_b:
3014 case Intrinsic::x86_avx512_pshuf_b_512: {
3025 case Intrinsic::x86_avx_vpermilvar_ps:
3026 case Intrinsic::x86_avx_vpermilvar_ps_256:
3027 case Intrinsic::x86_avx512_vpermilvar_ps_512: {
3038 case Intrinsic::x86_avx_vpermilvar_pd:
3039 case Intrinsic::x86_avx_vpermilvar_pd_256:
3040 case Intrinsic::x86_avx512_vpermilvar_pd_512: {
3051 case Intrinsic::x86_avx2_permd:
3052 case Intrinsic::x86_avx2_permps:
3053 case Intrinsic::x86_avx512_permvar_df_256:
3054 case Intrinsic::x86_avx512_permvar_df_512:
3055 case Intrinsic::x86_avx512_permvar_di_256:
3056 case Intrinsic::x86_avx512_permvar_di_512:
3057 case Intrinsic::x86_avx512_permvar_hi_128:
3058 case Intrinsic::x86_avx512_permvar_hi_256:
3059 case Intrinsic::x86_avx512_permvar_hi_512:
3060 case Intrinsic::x86_avx512_permvar_qi_128:
3061 case Intrinsic::x86_avx512_permvar_qi_256:
3062 case Intrinsic::x86_avx512_permvar_qi_512:
3063 case Intrinsic::x86_avx512_permvar_sf_512:
3064 case Intrinsic::x86_avx512_permvar_si_512:
3072 case Intrinsic::x86_avx512_vpermi2var_d_128:
3073 case Intrinsic::x86_avx512_vpermi2var_d_256:
3074 case Intrinsic::x86_avx512_vpermi2var_d_512:
3075 case Intrinsic::x86_avx512_vpermi2var_hi_128:
3076 case Intrinsic::x86_avx512_vpermi2var_hi_256:
3077 case Intrinsic::x86_avx512_vpermi2var_hi_512:
3078 case Intrinsic::x86_avx512_vpermi2var_pd_128:
3079 case Intrinsic::x86_avx512_vpermi2var_pd_256:
3080 case Intrinsic::x86_avx512_vpermi2var_pd_512:
3081 case Intrinsic::x86_avx512_vpermi2var_ps_128:
3082 case Intrinsic::x86_avx512_vpermi2var_ps_256:
3083 case Intrinsic::x86_avx512_vpermi2var_ps_512:
3084 case Intrinsic::x86_avx512_vpermi2var_q_128:
3085 case Intrinsic::x86_avx512_vpermi2var_q_256:
3086 case Intrinsic::x86_avx512_vpermi2var_q_512:
3087 case Intrinsic::x86_avx512_vpermi2var_qi_128:
3088 case Intrinsic::x86_avx512_vpermi2var_qi_256:
3089 case Intrinsic::x86_avx512_vpermi2var_qi_512:
3097 case Intrinsic::x86_avx_maskload_ps:
3098 case Intrinsic::x86_avx_maskload_pd:
3099 case Intrinsic::x86_avx_maskload_ps_256:
3100 case Intrinsic::x86_avx_maskload_pd_256:
3101 case Intrinsic::x86_avx2_maskload_d:
3102 case Intrinsic::x86_avx2_maskload_q:
3103 case Intrinsic::x86_avx2_maskload_d_256:
3104 case Intrinsic::x86_avx2_maskload_q_256:
3110 case Intrinsic::x86_sse2_maskmov_dqu:
3111 case Intrinsic::x86_avx_maskstore_ps:
3112 case Intrinsic::x86_avx_maskstore_pd:
3113 case Intrinsic::x86_avx_maskstore_ps_256:
3114 case Intrinsic::x86_avx_maskstore_pd_256:
3115 case Intrinsic::x86_avx2_maskstore_d:
3116 case Intrinsic::x86_avx2_maskstore_q:
3117 case Intrinsic::x86_avx2_maskstore_d_256:
3118 case Intrinsic::x86_avx2_maskstore_q_256:
3124 case Intrinsic::x86_addcarry_32:
3125 case Intrinsic::x86_addcarry_64:
3131 case Intrinsic::x86_avx512_pternlog_d_128:
3132 case Intrinsic::x86_avx512_pternlog_d_256:
3133 case Intrinsic::x86_avx512_pternlog_d_512:
3134 case Intrinsic::x86_avx512_pternlog_q_128:
3135 case Intrinsic::x86_avx512_pternlog_q_256:
3136 case Intrinsic::x86_avx512_pternlog_q_512:
3144 return std::nullopt;
3191 simplifyAndSetOp)
const {
3193 switch (
II.getIntrinsicID()) {
3196 case Intrinsic::x86_xop_vfrcz_ss:
3197 case Intrinsic::x86_xop_vfrcz_sd:
3202 if (!DemandedElts[0]) {
3209 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3212 UndefElts = UndefElts[0];
3216 case Intrinsic::x86_sse_rcp_ss:
3217 case Intrinsic::x86_sse_rsqrt_ss:
3218 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3221 if (!DemandedElts[0]) {
3223 return II.getArgOperand(0);
3232 case Intrinsic::x86_sse_min_ss:
3233 case Intrinsic::x86_sse_max_ss:
3234 case Intrinsic::x86_sse_cmp_ss:
3235 case Intrinsic::x86_sse2_min_sd:
3236 case Intrinsic::x86_sse2_max_sd:
3237 case Intrinsic::x86_sse2_cmp_sd: {
3238 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3241 if (!DemandedElts[0]) {
3243 return II.getArgOperand(0);
3248 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3260 case Intrinsic::x86_sse41_round_ss:
3261 case Intrinsic::x86_sse41_round_sd: {
3263 APInt DemandedElts2 = DemandedElts;
3265 simplifyAndSetOp(&
II, 0, DemandedElts2, UndefElts);
3268 if (!DemandedElts[0]) {
3270 return II.getArgOperand(0);
3275 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3280 UndefElts |= UndefElts2[0];
3287 case Intrinsic::x86_avx512_mask_add_ss_round:
3288 case Intrinsic::x86_avx512_mask_div_ss_round:
3289 case Intrinsic::x86_avx512_mask_mul_ss_round:
3290 case Intrinsic::x86_avx512_mask_sub_ss_round:
3291 case Intrinsic::x86_avx512_mask_max_ss_round:
3292 case Intrinsic::x86_avx512_mask_min_ss_round:
3293 case Intrinsic::x86_avx512_mask_add_sd_round:
3294 case Intrinsic::x86_avx512_mask_div_sd_round:
3295 case Intrinsic::x86_avx512_mask_mul_sd_round:
3296 case Intrinsic::x86_avx512_mask_sub_sd_round:
3297 case Intrinsic::x86_avx512_mask_max_sd_round:
3298 case Intrinsic::x86_avx512_mask_min_sd_round:
3299 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3302 if (!DemandedElts[0]) {
3304 return II.getArgOperand(0);
3309 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3310 simplifyAndSetOp(&
II, 2, DemandedElts, UndefElts3);
3314 if (!UndefElts2[0] || !UndefElts3[0])
3319 case Intrinsic::x86_sse3_addsub_pd:
3320 case Intrinsic::x86_sse3_addsub_ps:
3321 case Intrinsic::x86_avx_addsub_pd_256:
3322 case Intrinsic::x86_avx_addsub_ps_256: {
3327 bool IsSubOnly = DemandedElts.
isSubsetOf(SubMask);
3328 bool IsAddOnly = DemandedElts.
isSubsetOf(AddMask);
3329 if (IsSubOnly || IsAddOnly) {
3330 assert((IsSubOnly ^ IsAddOnly) &&
"Can't be both add-only and sub-only");
3333 Value *Arg0 =
II.getArgOperand(0), *Arg1 =
II.getArgOperand(1);
3335 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3338 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3339 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3340 UndefElts &= UndefElts2;
3345 case Intrinsic::x86_sse_max_ps:
3346 case Intrinsic::x86_sse2_max_pd:
3347 case Intrinsic::x86_avx_max_pd_256:
3348 case Intrinsic::x86_avx_max_ps_256:
3349 case Intrinsic::x86_avx512_max_pd_512:
3350 case Intrinsic::x86_avx512_max_ps_512:
3351 case Intrinsic::x86_avx512fp16_max_ph_128:
3352 case Intrinsic::x86_avx512fp16_max_ph_256:
3353 case Intrinsic::x86_avx512fp16_max_ph_512:
3357 case Intrinsic::x86_sse_min_ps:
3358 case Intrinsic::x86_sse2_min_pd:
3359 case Intrinsic::x86_avx_min_pd_256:
3360 case Intrinsic::x86_avx_min_ps_256:
3361 case Intrinsic::x86_avx512_min_pd_512:
3362 case Intrinsic::x86_avx512_min_ps_512:
3363 case Intrinsic::x86_avx512fp16_min_ph_128:
3364 case Intrinsic::x86_avx512fp16_min_ph_256:
3365 case Intrinsic::x86_avx512fp16_min_ph_512:
3371 case Intrinsic::x86_avx2_psllv_d:
3372 case Intrinsic::x86_avx2_psllv_d_256:
3373 case Intrinsic::x86_avx2_psllv_q:
3374 case Intrinsic::x86_avx2_psllv_q_256:
3375 case Intrinsic::x86_avx2_psrlv_d:
3376 case Intrinsic::x86_avx2_psrlv_d_256:
3377 case Intrinsic::x86_avx2_psrlv_q:
3378 case Intrinsic::x86_avx2_psrlv_q_256:
3379 case Intrinsic::x86_avx2_psrav_d:
3380 case Intrinsic::x86_avx2_psrav_d_256: {
3381 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3382 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3383 UndefElts &= UndefElts2;
3387 case Intrinsic::x86_sse2_pmulh_w:
3388 case Intrinsic::x86_avx2_pmulh_w:
3389 case Intrinsic::x86_avx512_pmulh_w_512:
3390 case Intrinsic::x86_sse2_pmulhu_w:
3391 case Intrinsic::x86_avx2_pmulhu_w:
3392 case Intrinsic::x86_avx512_pmulhu_w_512:
3393 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
3394 case Intrinsic::x86_avx2_pmul_hr_sw:
3395 case Intrinsic::x86_avx512_pmul_hr_sw_512: {
3396 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3397 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3402 case Intrinsic::x86_sse2_packssdw_128:
3403 case Intrinsic::x86_sse2_packsswb_128:
3404 case Intrinsic::x86_sse2_packuswb_128:
3405 case Intrinsic::x86_sse41_packusdw:
3406 case Intrinsic::x86_avx2_packssdw:
3407 case Intrinsic::x86_avx2_packsswb:
3408 case Intrinsic::x86_avx2_packusdw:
3409 case Intrinsic::x86_avx2_packuswb:
3410 case Intrinsic::x86_avx512_packssdw_512:
3411 case Intrinsic::x86_avx512_packsswb_512:
3412 case Intrinsic::x86_avx512_packusdw_512:
3413 case Intrinsic::x86_avx512_packuswb_512: {
3414 auto *Ty0 =
II.getArgOperand(0)->getType();
3416 assert(VWidth == (InnerVWidth * 2) &&
"Unexpected input size");
3418 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3419 unsigned VWidthPerLane = VWidth / NumLanes;
3420 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3426 for (
int OpNum = 0; OpNum != 2; ++OpNum) {
3427 APInt OpDemandedElts(InnerVWidth, 0);
3428 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3429 unsigned LaneIdx = Lane * VWidthPerLane;
3430 for (
unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3431 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3432 if (DemandedElts[Idx])
3433 OpDemandedElts.
setBit((Lane * InnerVWidthPerLane) + Elt);
3438 APInt OpUndefElts(InnerVWidth, 0);
3439 simplifyAndSetOp(&
II, OpNum, OpDemandedElts, OpUndefElts);
3442 OpUndefElts = OpUndefElts.
zext(VWidth);
3443 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3444 APInt LaneElts = OpUndefElts.
lshr(InnerVWidthPerLane * Lane);
3445 LaneElts = LaneElts.
getLoBits(InnerVWidthPerLane);
3446 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3447 UndefElts |= LaneElts;
3453 case Intrinsic::x86_sse2_pmadd_wd:
3454 case Intrinsic::x86_avx2_pmadd_wd:
3455 case Intrinsic::x86_avx512_pmaddw_d_512:
3456 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3457 case Intrinsic::x86_avx2_pmadd_ub_sw:
3458 case Intrinsic::x86_avx512_pmaddubs_w_512: {
3460 auto *ArgTy =
II.getArgOperand(0)->getType();
3462 assert((VWidth * 2) == InnerVWidth &&
"Unexpected input size");
3464 APInt Op0UndefElts(InnerVWidth, 0);
3465 APInt Op1UndefElts(InnerVWidth, 0);
3466 simplifyAndSetOp(&
II, 0, OpDemandedElts, Op0UndefElts);
3467 simplifyAndSetOp(&
II, 1, OpDemandedElts, Op1UndefElts);
3473 case Intrinsic::x86_ssse3_pshuf_b_128:
3474 case Intrinsic::x86_avx2_pshuf_b:
3475 case Intrinsic::x86_avx512_pshuf_b_512:
3477 case Intrinsic::x86_avx_vpermilvar_ps:
3478 case Intrinsic::x86_avx_vpermilvar_ps_256:
3479 case Intrinsic::x86_avx512_vpermilvar_ps_512:
3480 case Intrinsic::x86_avx_vpermilvar_pd:
3481 case Intrinsic::x86_avx_vpermilvar_pd_256:
3482 case Intrinsic::x86_avx512_vpermilvar_pd_512:
3484 case Intrinsic::x86_avx2_permd:
3485 case Intrinsic::x86_avx2_permps: {
3486 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts);
3492 case Intrinsic::x86_sse4a_extrq:
3493 case Intrinsic::x86_sse4a_extrqi:
3494 case Intrinsic::x86_sse4a_insertq:
3495 case Intrinsic::x86_sse4a_insertqi:
3499 return std::nullopt;