18#include "llvm/IR/IntrinsicsX86.h"
26#define DEBUG_TYPE "x86tti"
31 VectorType *IntTy = VectorType::getInteger(cast<VectorType>(V->getType()));
35 assert(V &&
"Vector must be foldable");
43 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask))
64 if (isa<ConstantAggregateZero>(Mask))
72 unsigned AddrSpace = cast<PointerType>(
Ptr->getType())->getAddressSpace();
73 PointerType *VecPtrTy = PointerType::get(
II.getType(), AddrSpace);
78 II.getType(), PtrCast,
Align(1), BoolMask, ZeroVec);
94 if (isa<ConstantAggregateZero>(Mask)) {
101 if (
II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
107 unsigned AddrSpace = cast<PointerType>(
Ptr->getType())->getAddressSpace();
123 bool LogicalShift =
false;
124 bool ShiftLeft =
false;
127 switch (
II.getIntrinsicID()) {
130 case Intrinsic::x86_sse2_psrai_d:
131 case Intrinsic::x86_sse2_psrai_w:
132 case Intrinsic::x86_avx2_psrai_d:
133 case Intrinsic::x86_avx2_psrai_w:
134 case Intrinsic::x86_avx512_psrai_q_128:
135 case Intrinsic::x86_avx512_psrai_q_256:
136 case Intrinsic::x86_avx512_psrai_d_512:
137 case Intrinsic::x86_avx512_psrai_q_512:
138 case Intrinsic::x86_avx512_psrai_w_512:
141 case Intrinsic::x86_sse2_psra_d:
142 case Intrinsic::x86_sse2_psra_w:
143 case Intrinsic::x86_avx2_psra_d:
144 case Intrinsic::x86_avx2_psra_w:
145 case Intrinsic::x86_avx512_psra_q_128:
146 case Intrinsic::x86_avx512_psra_q_256:
147 case Intrinsic::x86_avx512_psra_d_512:
148 case Intrinsic::x86_avx512_psra_q_512:
149 case Intrinsic::x86_avx512_psra_w_512:
150 LogicalShift =
false;
153 case Intrinsic::x86_sse2_psrli_d:
154 case Intrinsic::x86_sse2_psrli_q:
155 case Intrinsic::x86_sse2_psrli_w:
156 case Intrinsic::x86_avx2_psrli_d:
157 case Intrinsic::x86_avx2_psrli_q:
158 case Intrinsic::x86_avx2_psrli_w:
159 case Intrinsic::x86_avx512_psrli_d_512:
160 case Intrinsic::x86_avx512_psrli_q_512:
161 case Intrinsic::x86_avx512_psrli_w_512:
164 case Intrinsic::x86_sse2_psrl_d:
165 case Intrinsic::x86_sse2_psrl_q:
166 case Intrinsic::x86_sse2_psrl_w:
167 case Intrinsic::x86_avx2_psrl_d:
168 case Intrinsic::x86_avx2_psrl_q:
169 case Intrinsic::x86_avx2_psrl_w:
170 case Intrinsic::x86_avx512_psrl_d_512:
171 case Intrinsic::x86_avx512_psrl_q_512:
172 case Intrinsic::x86_avx512_psrl_w_512:
176 case Intrinsic::x86_sse2_pslli_d:
177 case Intrinsic::x86_sse2_pslli_q:
178 case Intrinsic::x86_sse2_pslli_w:
179 case Intrinsic::x86_avx2_pslli_d:
180 case Intrinsic::x86_avx2_pslli_q:
181 case Intrinsic::x86_avx2_pslli_w:
182 case Intrinsic::x86_avx512_pslli_d_512:
183 case Intrinsic::x86_avx512_pslli_q_512:
184 case Intrinsic::x86_avx512_pslli_w_512:
187 case Intrinsic::x86_sse2_psll_d:
188 case Intrinsic::x86_sse2_psll_q:
189 case Intrinsic::x86_sse2_psll_w:
190 case Intrinsic::x86_avx2_psll_d:
191 case Intrinsic::x86_avx2_psll_q:
192 case Intrinsic::x86_avx2_psll_w:
193 case Intrinsic::x86_avx512_psll_d_512:
194 case Intrinsic::x86_avx512_psll_q_512:
195 case Intrinsic::x86_avx512_psll_w_512:
200 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
202 Value *Vec =
II.getArgOperand(0);
203 Value *Amt =
II.getArgOperand(1);
204 auto *VT = cast<FixedVectorType>(Vec->
getType());
205 Type *SVT = VT->getElementType();
207 unsigned VWidth = VT->getNumElements();
220 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
227 Amt = ConstantInt::get(SVT,
BitWidth - 1);
234 cast<VectorType>(AmtVT)->getElementType() == SVT &&
235 "Unexpected shift-by-scalar type");
236 unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements();
240 Amt, DemandedLower,
II.getDataLayout());
242 Amt, DemandedUpper,
II.getDataLayout());
247 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
254 auto *CDV = dyn_cast<ConstantDataVector>(Amt);
261 cast<VectorType>(AmtVT)->getElementType() == SVT &&
262 "Unexpected shift-by-scalar type");
266 for (
unsigned i = 0, NumSubElts = 64 /
BitWidth; i != NumSubElts; ++i) {
267 unsigned SubEltIdx = (NumSubElts - 1) - i;
268 auto *SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
305 bool LogicalShift =
false;
306 bool ShiftLeft =
false;
308 switch (
II.getIntrinsicID()) {
311 case Intrinsic::x86_avx2_psrav_d:
312 case Intrinsic::x86_avx2_psrav_d_256:
313 case Intrinsic::x86_avx512_psrav_q_128:
314 case Intrinsic::x86_avx512_psrav_q_256:
315 case Intrinsic::x86_avx512_psrav_d_512:
316 case Intrinsic::x86_avx512_psrav_q_512:
317 case Intrinsic::x86_avx512_psrav_w_128:
318 case Intrinsic::x86_avx512_psrav_w_256:
319 case Intrinsic::x86_avx512_psrav_w_512:
320 LogicalShift =
false;
323 case Intrinsic::x86_avx2_psrlv_d:
324 case Intrinsic::x86_avx2_psrlv_d_256:
325 case Intrinsic::x86_avx2_psrlv_q:
326 case Intrinsic::x86_avx2_psrlv_q_256:
327 case Intrinsic::x86_avx512_psrlv_d_512:
328 case Intrinsic::x86_avx512_psrlv_q_512:
329 case Intrinsic::x86_avx512_psrlv_w_128:
330 case Intrinsic::x86_avx512_psrlv_w_256:
331 case Intrinsic::x86_avx512_psrlv_w_512:
335 case Intrinsic::x86_avx2_psllv_d:
336 case Intrinsic::x86_avx2_psllv_d_256:
337 case Intrinsic::x86_avx2_psllv_q:
338 case Intrinsic::x86_avx2_psllv_q_256:
339 case Intrinsic::x86_avx512_psllv_d_512:
340 case Intrinsic::x86_avx512_psllv_q_512:
341 case Intrinsic::x86_avx512_psllv_w_128:
342 case Intrinsic::x86_avx512_psllv_w_256:
343 case Intrinsic::x86_avx512_psllv_w_512:
348 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
350 Value *Vec =
II.getArgOperand(0);
351 Value *Amt =
II.getArgOperand(1);
352 auto *VT = cast<FixedVectorType>(
II.getType());
353 Type *SVT = VT->getElementType();
354 int NumElts = VT->getNumElements();
362 return (LogicalShift ? (ShiftLeft ? Builder.
CreateShl(Vec, Amt)
368 auto *CShift = dyn_cast<Constant>(Amt);
374 bool AnyOutOfRange =
false;
376 for (
int I = 0;
I < NumElts; ++
I) {
377 auto *CElt = CShift->getAggregateElement(
I);
378 if (isa_and_nonnull<UndefValue>(CElt)) {
383 auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
390 APInt ShiftVal = COp->getValue();
392 AnyOutOfRange = LogicalShift;
405 for (
int Idx : ShiftAmts) {
409 assert(LogicalShift &&
"Logical shift expected");
410 ConstantVec.
push_back(ConstantInt::getNullValue(SVT));
422 for (
int Idx : ShiftAmts) {
441 Value *Arg0 =
II.getArgOperand(0);
442 Value *Arg1 =
II.getArgOperand(1);
443 Type *ResTy =
II.getType();
446 if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
449 auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
451 unsigned NumSrcElts = ArgTy->getNumElements();
453 "Unexpected packing types");
455 unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
457 unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
458 assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
459 "Unexpected packing types");
462 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
467 APInt MinValue, MaxValue;
493 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
494 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
495 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane));
496 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
497 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
508 Value *Arg0 =
II.getArgOperand(0);
509 Value *Arg1 =
II.getArgOperand(1);
510 auto *ResTy = cast<FixedVectorType>(
II.getType());
511 auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
512 assert(ArgTy == ResTy && ResTy->getScalarSizeInBits() == 16 &&
513 "Unexpected PMULH types");
514 assert((!IsRounding || IsSigned) &&
"PMULHRS instruction must be signed");
517 if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
521 if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
527 return IsSigned ? Builder.
CreateAShr(Arg1, 15)
530 return IsSigned ? Builder.
CreateAShr(Arg0, 15)
535 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
540 IsSigned ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt;
566 Value *Arg0 =
II.getArgOperand(0);
567 Value *Arg1 =
II.getArgOperand(1);
568 auto *ResTy = cast<FixedVectorType>(
II.getType());
569 [[maybe_unused]]
auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
571 unsigned NumDstElts = ResTy->getNumElements();
572 assert(ArgTy->getNumElements() == (2 * NumDstElts) &&
573 ResTy->getScalarSizeInBits() == (2 * ArgTy->getScalarSizeInBits()) &&
574 "Unexpected PMADD types");
577 if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
581 if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
585 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
594 for (
unsigned I = 0;
I != NumDstElts; ++
I) {
605 IsPMADDWD ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt;
606 LHSLo = Builder.
CreateCast(LHSCast, LHSLo, ResTy);
607 LHSHi = Builder.
CreateCast(LHSCast, LHSHi, ResTy);
608 RHSLo = Builder.
CreateCast(Instruction::CastOps::SExt, RHSLo, ResTy);
609 RHSHi = Builder.
CreateCast(Instruction::CastOps::SExt, RHSHi, ResTy);
619 Value *Arg =
II.getArgOperand(0);
620 Type *ResTy =
II.getType();
623 if (isa<UndefValue>(Arg))
626 auto *ArgTy = dyn_cast<FixedVectorType>(Arg->
getType());
636 unsigned NumElts = ArgTy->getNumElements();
648 Value *CarryIn =
II.getArgOperand(0);
649 Value *Op1 =
II.getArgOperand(1);
650 Value *Op2 =
II.getArgOperand(2);
653 assert(
RetTy->getStructElementType(0)->isIntegerTy(8) &&
654 RetTy->getStructElementType(1) == OpTy && OpTy == Op2->
getType() &&
655 "Unexpected types for x86 addcarry");
676 auto *ArgImm = dyn_cast<ConstantInt>(
II.getArgOperand(3));
677 if (!ArgImm || ArgImm->getValue().uge(256))
680 Value *ArgA =
II.getArgOperand(0);
681 Value *ArgB =
II.getArgOperand(1);
682 Value *ArgC =
II.getArgOperand(2);
686 auto Or = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
687 return {Builder.
CreateOr(Lhs.first, Rhs.first), Lhs.second | Rhs.second};
689 auto Xor = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
690 return {Builder.
CreateXor(Lhs.first, Rhs.first), Lhs.second ^ Rhs.second};
692 auto And = [&](
auto Lhs,
auto Rhs) -> std::pair<Value *, uint8_t> {
693 return {Builder.
CreateAnd(Lhs.first, Rhs.first), Lhs.second & Rhs.second};
695 auto Not = [&](
auto V) -> std::pair<Value *, uint8_t> {
696 return {Builder.
CreateNot(V.first), ~V.second};
698 auto Nor = [&](
auto Lhs,
auto Rhs) {
return Not(
Or(Lhs, Rhs)); };
699 auto Xnor = [&](
auto Lhs,
auto Rhs) {
return Not(
Xor(Lhs, Rhs)); };
700 auto Nand = [&](
auto Lhs,
auto Rhs) {
return Not(
And(Lhs, Rhs)); };
706 bool ABIsConst = AIsConst && BIsConst;
707 bool ACIsConst = AIsConst && CIsConst;
708 bool BCIsConst = BIsConst && CIsConst;
709 bool ABCIsConst = AIsConst && BIsConst && CIsConst;
715 std::pair<Value *, uint8_t>
A = {ArgA, 0xf0};
716 std::pair<Value *, uint8_t>
B = {ArgB, 0xcc};
717 std::pair<Value *, uint8_t>
C = {ArgC, 0xaa};
718 std::pair<Value *, uint8_t> Res = {
nullptr, 0};
725 uint8_t Imm = ArgImm->getValue().getZExtValue();
732 Res = Nor(
Or(
A,
B),
C);
752 Res = Nor(
A, Xnor(
B,
C));
760 Res = Nor(
A, Nand(
B,
C));
768 Res = Nor(
A, Not(
C));
772 Res = Nor(
A, Nor(
C, Not(
B)));
776 Res = Nor(
A, Not(
B));
780 Res = Nor(
A, Nor(
B, Not(
C)));
784 Res = Nor(
A, Nor(
B,
C));
799 Res = Nor(Xnor(
A,
C),
B);
807 Res = Nor(Xnor(
A,
B),
C);
823 Res = Nor(Xnor(
A,
B), Xnor(
A,
C));
827 Res =
And(Nand(
A,
B), Xnor(
B,
C));
851 Res = Nand(
A,
Or(
B,
C));
855 Res = Nor(Nand(
A,
C),
B);
863 Res = Nor(
B, Not(
C));
867 Res = Nor(
B, Nor(
C, Not(
A)));
871 Res = Nor(Xnor(
A,
B),
Xor(
A,
C));
875 Res =
Xor(
A, Nand(Nand(
A,
B),
C));
903 Res = Nor(Xnor(
A,
B), Nor(
B,
C));
915 Res = Nand(
A,
Or(
B, Not(
C)));
919 Res = Nor(
B, Not(
A));
923 Res = Nor(Nor(
A, Not(
C)),
B);
927 Res = Nor(Nor(
A,
C),
B);
946 Res = Nand(
Or(
A,
C),
B);
950 Res = Nor(Xnor(
A,
B), Nor(
A,
C));
962 Res = Nand(
Or(
A, Not(
C)),
B);
981 Res = Nor(Nand(
A,
B),
C);
989 Res = Nor(
Xor(
A,
B), Xnor(
A,
C));
993 Res =
Xor(
A, Nand(Nand(
A,
C),
B));
997 Res = Nor(
C, Not(
B));
1001 Res = Nor(Nor(
B, Not(
A)),
C);
1021 Res = Nor(Xnor(
A,
C), Nor(
B,
C));
1029 Res =
And(Nand(
A,
C),
B);
1041 Res = Nand(
A, Nand(
B, Not(
C)));
1045 Res = Nor(
C, Not(
A));
1049 Res = Nor(Nor(
A, Not(
B)),
C);
1061 Res = Nor(Nor(
A,
B),
C);
1072 Res = Nand(
Or(
A,
B),
C);
1076 Res = Nor(Nor(
A,
B), Xnor(
A,
C));
1095 Res = Nand(
Or(
A, Not(
B)),
C);
1115 Res = Nor(Nor(
A,
C), Xnor(
B,
C));
1123 Res = Nor(Nor(
A,
B), Xnor(
B,
C));
1142 Res =
Xor(Xnor(
A,
B),
C);
1166 Res = Nand(
A, Xnor(
B,
C));
1170 Res =
And(
A, Nand(
B,
C));
1182 Res = Nand(Nand(
A, Not(
C)),
B);
1190 Res = Nand(Nand(
A, Not(
B)),
C);
1214 Res = Nand(Xnor(
A,
C),
B);
1222 Res = Nand(Xnor(
A,
B),
C);
1230 Res = Nand(
And(
A,
B),
C);
1242 Res =
And(Xnor(
A,
B),
C);
1246 Res = Nor(
Xor(
A,
B), Nor(
C, Not(
A)));
1250 Res =
And(Xnor(
A,
C),
B);
1254 Res = Nor(
Xor(
A,
C), Nor(
B, Not(
A)));
1258 Res =
Xor(Nor(Xnor(
A,
B), Nor(
B,
C)),
C);
1262 Res =
Xor(
A, Nand(
B,
C));
1269 Res =
Xor(
B, Nor(Nor(
B, Not(
A)),
C));
1273 Res =
And(Nand(
A, Not(
B)),
C);
1281 Res =
And(Nand(
A, Not(
C)),
B);
1293 Res = Nand(
A, Nand(
B,
C));
1297 Res =
And(
A, Xnor(
B,
C));
1301 Res = Nor(Nor(
A, Not(
B)),
Xor(
B,
C));
1305 Res =
Xor(Nor(Xnor(
A,
B), Nor(
A,
C)),
C);
1309 Res =
Xor(Nand(
A,
C),
B);
1313 Res = Nor(Nor(
A,
B),
Xor(Xnor(
A,
B),
C));
1317 Res =
Xor(Nand(
A,
B),
C);
1329 Res = Nor(Nor(
A,
B),
Xor(
B,
C));
1337 Res =
Xor(Nor(
B, Not(
A)),
C);
1341 Res =
Or(Nor(
A,
B), Xnor(
B,
C));
1345 Res =
Xor(
B, Nor(
C, Not(
A)));
1349 Res =
Or(Nor(
A,
C), Xnor(
B,
C));
1357 Res = Nand(
A,
Xor(
B,
C));
1364 Res =
Xor(
A, Nor(Nor(
A, Not(
B)),
C));
1376 Res =
Xor(
A, Nor(Nor(
A,
B),
C));
1384 Res =
Xor(Nor(
A, Not(
B)),
C);
1388 Res =
Or(Nor(
A,
B), Xnor(
A,
C));
1403 Res =
Or(Nor(
A,
B),
C);
1407 Res =
Xor(Nor(Xnor(
B,
C),
A),
C);
1415 Res =
Or(Nor(
A, Not(
B)),
C);
1419 Res =
Or(
C, Not(
A));
1423 Res =
And(
A, Nand(
B, Not(
C)));
1435 Res = Nand(Nand(
A,
C),
B);
1439 Res =
Xor(
A, Nor(
C, Not(
B)));
1443 Res =
Or(Xnor(
A,
C), Nor(
B,
C));
1451 Res = Nand(
Xor(
A,
C),
B);
1455 Res =
Xor(Nor(Xnor(
A,
C),
B),
C);
1463 Res =
Or(Nor(
B, Not(
A)),
C);
1467 Res =
Or(
C, Not(
B));
1483 Res =
Or(Nand(
A,
B),
C);
1490 Res =
Xor(
A, Nor(Nor(
A, Not(
C)),
B));
1494 Res =
Xor(
A, Nor(Nor(
A,
C),
B));
1510 Res =
Xor(Nor(
A, Not(
C)),
B);
1514 Res =
Or(Xnor(
A,
B), Nor(
A,
C));
1526 Res =
Xor(
B, Nor(
A, Xnor(
B,
C)));
1537 Res =
Or(Nor(
A,
C),
B);
1541 Res =
Or(Nor(
A, Not(
C)),
B);
1545 Res =
Or(
B, Not(
A));
1557 Res =
Xor(
A, Nor(
B, Not(
C)));
1561 Res =
Or(Xnor(
A,
B), Nor(
B,
C));
1569 Res = Nand(Nand(
A,
B),
C);
1577 Res = Nand(
Xor(
A,
B),
C);
1581 Res =
Xor(Nor(Xnor(
A,
B),
C),
B);
1597 Res =
Or(
B, Nor(
C, Not(
A)));
1601 Res =
Or(
B, Not(
C));
1609 Res =
Or(Nand(
A,
C),
B);
1621 Res =
Xor(
A, Nor(Xnor(
A,
C),
B));
1629 Res =
Xor(
A, Nor(Xnor(
A,
B),
C));
1641 Res =
Or(Xnor(
A,
B), Xnor(
A,
C));
1657 Res =
Or(Xnor(
A,
B),
C);
1665 Res =
Or(Xnor(
A,
C),
B);
1672 Res = Nand(
A, Nor(
B,
C));
1679 Res =
Or(
A, Nor(
B,
C));
1683 Res =
Or(
A, Nor(
B, Not(
C)));
1687 Res =
Or(
A, Not(
B));
1691 Res =
Or(
A, Nor(
C, Not(
B)));
1695 Res =
Or(
A, Not(
C));
1703 Res =
Or(
A, Nand(
B,
C));
1711 Res =
Or(
A, Xnor(
B,
C));
1718 Res = Nand(Nor(
A,
C),
B);
1725 Res = Nand(Nor(
A,
B),
C);
1736 assert((Res.first ==
nullptr || Res.second == Imm) &&
1737 "Simplification of ternary logic does not verify!");
1743 auto *CInt = dyn_cast<ConstantInt>(
II.getArgOperand(2));
1747 auto *VecTy = cast<FixedVectorType>(
II.getType());
1748 assert(VecTy->getNumElements() == 4 &&
"insertps with wrong vector type");
1755 uint8_t Imm = CInt->getZExtValue();
1756 uint8_t ZMask = Imm & 0xf;
1757 uint8_t DestLane = (Imm >> 4) & 0x3;
1758 uint8_t SourceLane = (Imm >> 6) & 0x3;
1768 int ShuffleMask[4] = {0, 1, 2, 3};
1771 Value *V1 =
II.getArgOperand(1);
1776 if ((
II.getArgOperand(0) ==
II.getArgOperand(1)) ||
1777 (ZMask & (1 << DestLane))) {
1781 ShuffleMask[DestLane] = SourceLane;
1783 for (
unsigned i = 0; i < 4; ++i)
1784 if ((ZMask >> i) & 0x1)
1785 ShuffleMask[i] = i + 4;
1792 ShuffleMask[DestLane] = SourceLane + 4;
1803 auto LowConstantHighUndef = [&](
uint64_t Val) {
1805 Constant *Args[] = {ConstantInt::get(IntTy64, Val),
1811 auto *C0 = dyn_cast<Constant>(Op0);
1813 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
1817 if (CILength && CIIndex) {
1851 for (
int i = 0; i != (int)
Length; ++i)
1853 for (
int i =
Length; i != 8; ++i)
1855 for (
int i = 8; i != 16; ++i)
1867 APInt Elt = CI0->getValue();
1874 if (
II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
1875 Value *Args[] = {Op0, CILength, CIIndex};
1883 if (CI0 && CI0->isZero())
1884 return LowConstantHighUndef(0);
1928 for (
int i = 0; i != (int)
Index; ++i)
1930 for (
int i = 0; i != (int)
Length; ++i)
1934 for (
int i = 8; i != 16; ++i)
1944 auto *C0 = dyn_cast<Constant>(Op0);
1945 auto *C1 = dyn_cast<Constant>(Op1);
1947 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
1950 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
1955 APInt V00 = CI00->getValue();
1956 APInt V10 = CI10->getValue();
1960 APInt Val = V00 | V10;
1969 if (
II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
1974 Value *Args[] = {Op0, Op1, CILength, CIIndex};
1986 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
1990 auto *VecTy = cast<FixedVectorType>(
II.getType());
1991 unsigned NumElts = VecTy->getNumElements();
1992 assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
1993 "Unexpected number of elements in shuffle mask!");
2000 for (
unsigned I = 0;
I < NumElts; ++
I) {
2001 Constant *COp = V->getAggregateElement(
I);
2002 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2005 if (isa<UndefValue>(COp)) {
2010 int8_t
Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
2023 auto V1 =
II.getArgOperand(0);
2031 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
2035 auto *VecTy = cast<FixedVectorType>(
II.getType());
2036 unsigned NumElts = VecTy->getNumElements();
2037 bool IsPD = VecTy->getScalarType()->isDoubleTy();
2038 unsigned NumLaneElts = IsPD ? 2 : 4;
2039 assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
2045 for (
unsigned I = 0;
I < NumElts; ++
I) {
2046 Constant *COp = V->getAggregateElement(
I);
2047 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2050 if (isa<UndefValue>(COp)) {
2055 APInt Index = cast<ConstantInt>(COp)->getValue();
2066 Index +=
APInt(32, (
I / NumLaneElts) * NumLaneElts);
2068 Indexes[
I] =
Index.getZExtValue();
2071 auto V1 =
II.getArgOperand(0);
2078 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
2082 auto *VecTy = cast<FixedVectorType>(
II.getType());
2083 unsigned Size = VecTy->getNumElements();
2085 "Unexpected shuffle mask size");
2090 for (
unsigned I = 0;
I <
Size; ++
I) {
2091 Constant *COp = V->getAggregateElement(
I);
2092 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2095 if (isa<UndefValue>(COp)) {
2105 auto V1 =
II.getArgOperand(0);
2112 auto *V = dyn_cast<Constant>(
II.getArgOperand(1));
2116 auto *VecTy = cast<FixedVectorType>(
II.getType());
2117 unsigned Size = VecTy->getNumElements();
2120 "Unexpected shuffle mask size");
2125 for (
unsigned I = 0;
I <
Size; ++
I) {
2126 Constant *COp = V->getAggregateElement(
I);
2127 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2130 if (isa<UndefValue>(COp)) {
2140 auto V1 =
II.getArgOperand(0);
2141 auto V2 =
II.getArgOperand(2);
2145std::optional<Instruction *>
2147 auto SimplifyDemandedVectorEltsLow = [&IC](
Value *
Op,
unsigned Width,
2148 unsigned DemandedWidth) {
2149 APInt UndefElts(Width, 0);
2156 case Intrinsic::x86_bmi_bextr_32:
2157 case Intrinsic::x86_bmi_bextr_64:
2158 case Intrinsic::x86_tbm_bextri_u32:
2159 case Intrinsic::x86_tbm_bextri_u64:
2161 if (
auto *
C = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2165 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2171 if (
auto *InC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2172 uint64_t Result = InC->getZExtValue() >> Shift;
2175 Result &= maskTrailingOnes<uint64_t>(
Length);
2177 ConstantInt::get(
II.getType(), Result));
2184 case Intrinsic::x86_bmi_bzhi_32:
2185 case Intrinsic::x86_bmi_bzhi_64:
2187 if (
auto *
C = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2189 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2197 if (
auto *InC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2198 uint64_t Result = InC->getZExtValue();
2199 Result &= maskTrailingOnes<uint64_t>(
Index);
2201 ConstantInt::get(
II.getType(), Result));
2206 case Intrinsic::x86_bmi_pext_32:
2207 case Intrinsic::x86_bmi_pext_64:
2208 if (
auto *MaskC = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2209 if (MaskC->isNullValue()) {
2212 if (MaskC->isAllOnesValue()) {
2216 unsigned MaskIdx, MaskLen;
2217 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2221 Value *Input =
II.getArgOperand(0);
2223 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2228 if (
auto *SrcC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2229 uint64_t Src = SrcC->getZExtValue();
2230 uint64_t Mask = MaskC->getZExtValue();
2237 if (BitToTest & Src)
2246 ConstantInt::get(
II.getType(), Result));
2250 case Intrinsic::x86_bmi_pdep_32:
2251 case Intrinsic::x86_bmi_pdep_64:
2252 if (
auto *MaskC = dyn_cast<ConstantInt>(
II.getArgOperand(1))) {
2253 if (MaskC->isNullValue()) {
2256 if (MaskC->isAllOnesValue()) {
2260 unsigned MaskIdx, MaskLen;
2261 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2265 Value *Input =
II.getArgOperand(0);
2266 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2272 if (
auto *SrcC = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
2273 uint64_t Src = SrcC->getZExtValue();
2274 uint64_t Mask = MaskC->getZExtValue();
2281 if (BitToTest & Src)
2290 ConstantInt::get(
II.getType(), Result));
2295 case Intrinsic::x86_sse_cvtss2si:
2296 case Intrinsic::x86_sse_cvtss2si64:
2297 case Intrinsic::x86_sse_cvttss2si:
2298 case Intrinsic::x86_sse_cvttss2si64:
2299 case Intrinsic::x86_sse2_cvtsd2si:
2300 case Intrinsic::x86_sse2_cvtsd2si64:
2301 case Intrinsic::x86_sse2_cvttsd2si:
2302 case Intrinsic::x86_sse2_cvttsd2si64:
2303 case Intrinsic::x86_avx512_vcvtss2si32:
2304 case Intrinsic::x86_avx512_vcvtss2si64:
2305 case Intrinsic::x86_avx512_vcvtss2usi32:
2306 case Intrinsic::x86_avx512_vcvtss2usi64:
2307 case Intrinsic::x86_avx512_vcvtsd2si32:
2308 case Intrinsic::x86_avx512_vcvtsd2si64:
2309 case Intrinsic::x86_avx512_vcvtsd2usi32:
2310 case Intrinsic::x86_avx512_vcvtsd2usi64:
2311 case Intrinsic::x86_avx512_cvttss2si:
2312 case Intrinsic::x86_avx512_cvttss2si64:
2313 case Intrinsic::x86_avx512_cvttss2usi:
2314 case Intrinsic::x86_avx512_cvttss2usi64:
2315 case Intrinsic::x86_avx512_cvttsd2si:
2316 case Intrinsic::x86_avx512_cvttsd2si64:
2317 case Intrinsic::x86_avx512_cvttsd2usi:
2318 case Intrinsic::x86_avx512_cvttsd2usi64: {
2321 Value *Arg =
II.getArgOperand(0);
2322 unsigned VWidth = cast<FixedVectorType>(Arg->
getType())->getNumElements();
2323 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2329 case Intrinsic::x86_mmx_pmovmskb:
2330 case Intrinsic::x86_sse_movmsk_ps:
2331 case Intrinsic::x86_sse2_movmsk_pd:
2332 case Intrinsic::x86_sse2_pmovmskb_128:
2333 case Intrinsic::x86_avx_movmsk_pd_256:
2334 case Intrinsic::x86_avx_movmsk_ps_256:
2335 case Intrinsic::x86_avx2_pmovmskb:
2341 case Intrinsic::x86_sse_comieq_ss:
2342 case Intrinsic::x86_sse_comige_ss:
2343 case Intrinsic::x86_sse_comigt_ss:
2344 case Intrinsic::x86_sse_comile_ss:
2345 case Intrinsic::x86_sse_comilt_ss:
2346 case Intrinsic::x86_sse_comineq_ss:
2347 case Intrinsic::x86_sse_ucomieq_ss:
2348 case Intrinsic::x86_sse_ucomige_ss:
2349 case Intrinsic::x86_sse_ucomigt_ss:
2350 case Intrinsic::x86_sse_ucomile_ss:
2351 case Intrinsic::x86_sse_ucomilt_ss:
2352 case Intrinsic::x86_sse_ucomineq_ss:
2353 case Intrinsic::x86_sse2_comieq_sd:
2354 case Intrinsic::x86_sse2_comige_sd:
2355 case Intrinsic::x86_sse2_comigt_sd:
2356 case Intrinsic::x86_sse2_comile_sd:
2357 case Intrinsic::x86_sse2_comilt_sd:
2358 case Intrinsic::x86_sse2_comineq_sd:
2359 case Intrinsic::x86_sse2_ucomieq_sd:
2360 case Intrinsic::x86_sse2_ucomige_sd:
2361 case Intrinsic::x86_sse2_ucomigt_sd:
2362 case Intrinsic::x86_sse2_ucomile_sd:
2363 case Intrinsic::x86_sse2_ucomilt_sd:
2364 case Intrinsic::x86_sse2_ucomineq_sd:
2365 case Intrinsic::x86_avx512_vcomi_ss:
2366 case Intrinsic::x86_avx512_vcomi_sd:
2367 case Intrinsic::x86_avx512_mask_cmp_ss:
2368 case Intrinsic::x86_avx512_mask_cmp_sd: {
2371 bool MadeChange =
false;
2372 Value *Arg0 =
II.getArgOperand(0);
2373 Value *Arg1 =
II.getArgOperand(1);
2374 unsigned VWidth = cast<FixedVectorType>(Arg0->
getType())->getNumElements();
2375 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2379 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2389 case Intrinsic::x86_avx512_add_ps_512:
2390 case Intrinsic::x86_avx512_div_ps_512:
2391 case Intrinsic::x86_avx512_mul_ps_512:
2392 case Intrinsic::x86_avx512_sub_ps_512:
2393 case Intrinsic::x86_avx512_add_pd_512:
2394 case Intrinsic::x86_avx512_div_pd_512:
2395 case Intrinsic::x86_avx512_mul_pd_512:
2396 case Intrinsic::x86_avx512_sub_pd_512:
2399 if (
auto *R = dyn_cast<ConstantInt>(
II.getArgOperand(2))) {
2400 if (R->getValue() == 4) {
2401 Value *Arg0 =
II.getArgOperand(0);
2402 Value *Arg1 =
II.getArgOperand(1);
2408 case Intrinsic::x86_avx512_add_ps_512:
2409 case Intrinsic::x86_avx512_add_pd_512:
2412 case Intrinsic::x86_avx512_sub_ps_512:
2413 case Intrinsic::x86_avx512_sub_pd_512:
2416 case Intrinsic::x86_avx512_mul_ps_512:
2417 case Intrinsic::x86_avx512_mul_pd_512:
2420 case Intrinsic::x86_avx512_div_ps_512:
2421 case Intrinsic::x86_avx512_div_pd_512:
2431 case Intrinsic::x86_avx512_mask_add_ss_round:
2432 case Intrinsic::x86_avx512_mask_div_ss_round:
2433 case Intrinsic::x86_avx512_mask_mul_ss_round:
2434 case Intrinsic::x86_avx512_mask_sub_ss_round:
2435 case Intrinsic::x86_avx512_mask_add_sd_round:
2436 case Intrinsic::x86_avx512_mask_div_sd_round:
2437 case Intrinsic::x86_avx512_mask_mul_sd_round:
2438 case Intrinsic::x86_avx512_mask_sub_sd_round:
2441 if (
auto *R = dyn_cast<ConstantInt>(
II.getArgOperand(4))) {
2442 if (R->getValue() == 4) {
2444 Value *Arg0 =
II.getArgOperand(0);
2445 Value *Arg1 =
II.getArgOperand(1);
2453 case Intrinsic::x86_avx512_mask_add_ss_round:
2454 case Intrinsic::x86_avx512_mask_add_sd_round:
2457 case Intrinsic::x86_avx512_mask_sub_ss_round:
2458 case Intrinsic::x86_avx512_mask_sub_sd_round:
2461 case Intrinsic::x86_avx512_mask_mul_ss_round:
2462 case Intrinsic::x86_avx512_mask_mul_sd_round:
2465 case Intrinsic::x86_avx512_mask_div_ss_round:
2466 case Intrinsic::x86_avx512_mask_div_sd_round:
2472 Value *Mask =
II.getArgOperand(3);
2473 auto *
C = dyn_cast<ConstantInt>(Mask);
2475 if (!
C || !
C->getValue()[0]) {
2479 cast<IntegerType>(Mask->getType())->
getBitWidth());
2499 case Intrinsic::x86_sse2_psrai_d:
2500 case Intrinsic::x86_sse2_psrai_w:
2501 case Intrinsic::x86_avx2_psrai_d:
2502 case Intrinsic::x86_avx2_psrai_w:
2503 case Intrinsic::x86_avx512_psrai_q_128:
2504 case Intrinsic::x86_avx512_psrai_q_256:
2505 case Intrinsic::x86_avx512_psrai_d_512:
2506 case Intrinsic::x86_avx512_psrai_q_512:
2507 case Intrinsic::x86_avx512_psrai_w_512:
2508 case Intrinsic::x86_sse2_psrli_d:
2509 case Intrinsic::x86_sse2_psrli_q:
2510 case Intrinsic::x86_sse2_psrli_w:
2511 case Intrinsic::x86_avx2_psrli_d:
2512 case Intrinsic::x86_avx2_psrli_q:
2513 case Intrinsic::x86_avx2_psrli_w:
2514 case Intrinsic::x86_avx512_psrli_d_512:
2515 case Intrinsic::x86_avx512_psrli_q_512:
2516 case Intrinsic::x86_avx512_psrli_w_512:
2517 case Intrinsic::x86_sse2_pslli_d:
2518 case Intrinsic::x86_sse2_pslli_q:
2519 case Intrinsic::x86_sse2_pslli_w:
2520 case Intrinsic::x86_avx2_pslli_d:
2521 case Intrinsic::x86_avx2_pslli_q:
2522 case Intrinsic::x86_avx2_pslli_w:
2523 case Intrinsic::x86_avx512_pslli_d_512:
2524 case Intrinsic::x86_avx512_pslli_q_512:
2525 case Intrinsic::x86_avx512_pslli_w_512:
2531 case Intrinsic::x86_sse2_psra_d:
2532 case Intrinsic::x86_sse2_psra_w:
2533 case Intrinsic::x86_avx2_psra_d:
2534 case Intrinsic::x86_avx2_psra_w:
2535 case Intrinsic::x86_avx512_psra_q_128:
2536 case Intrinsic::x86_avx512_psra_q_256:
2537 case Intrinsic::x86_avx512_psra_d_512:
2538 case Intrinsic::x86_avx512_psra_q_512:
2539 case Intrinsic::x86_avx512_psra_w_512:
2540 case Intrinsic::x86_sse2_psrl_d:
2541 case Intrinsic::x86_sse2_psrl_q:
2542 case Intrinsic::x86_sse2_psrl_w:
2543 case Intrinsic::x86_avx2_psrl_d:
2544 case Intrinsic::x86_avx2_psrl_q:
2545 case Intrinsic::x86_avx2_psrl_w:
2546 case Intrinsic::x86_avx512_psrl_d_512:
2547 case Intrinsic::x86_avx512_psrl_q_512:
2548 case Intrinsic::x86_avx512_psrl_w_512:
2549 case Intrinsic::x86_sse2_psll_d:
2550 case Intrinsic::x86_sse2_psll_q:
2551 case Intrinsic::x86_sse2_psll_w:
2552 case Intrinsic::x86_avx2_psll_d:
2553 case Intrinsic::x86_avx2_psll_q:
2554 case Intrinsic::x86_avx2_psll_w:
2555 case Intrinsic::x86_avx512_psll_d_512:
2556 case Intrinsic::x86_avx512_psll_q_512:
2557 case Intrinsic::x86_avx512_psll_w_512: {
2564 Value *Arg1 =
II.getArgOperand(1);
2566 "Unexpected packed shift size");
2567 unsigned VWidth = cast<FixedVectorType>(Arg1->
getType())->getNumElements();
2569 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2575 case Intrinsic::x86_avx2_psllv_d:
2576 case Intrinsic::x86_avx2_psllv_d_256:
2577 case Intrinsic::x86_avx2_psllv_q:
2578 case Intrinsic::x86_avx2_psllv_q_256:
2579 case Intrinsic::x86_avx512_psllv_d_512:
2580 case Intrinsic::x86_avx512_psllv_q_512:
2581 case Intrinsic::x86_avx512_psllv_w_128:
2582 case Intrinsic::x86_avx512_psllv_w_256:
2583 case Intrinsic::x86_avx512_psllv_w_512:
2584 case Intrinsic::x86_avx2_psrav_d:
2585 case Intrinsic::x86_avx2_psrav_d_256:
2586 case Intrinsic::x86_avx512_psrav_q_128:
2587 case Intrinsic::x86_avx512_psrav_q_256:
2588 case Intrinsic::x86_avx512_psrav_d_512:
2589 case Intrinsic::x86_avx512_psrav_q_512:
2590 case Intrinsic::x86_avx512_psrav_w_128:
2591 case Intrinsic::x86_avx512_psrav_w_256:
2592 case Intrinsic::x86_avx512_psrav_w_512:
2593 case Intrinsic::x86_avx2_psrlv_d:
2594 case Intrinsic::x86_avx2_psrlv_d_256:
2595 case Intrinsic::x86_avx2_psrlv_q:
2596 case Intrinsic::x86_avx2_psrlv_q_256:
2597 case Intrinsic::x86_avx512_psrlv_d_512:
2598 case Intrinsic::x86_avx512_psrlv_q_512:
2599 case Intrinsic::x86_avx512_psrlv_w_128:
2600 case Intrinsic::x86_avx512_psrlv_w_256:
2601 case Intrinsic::x86_avx512_psrlv_w_512:
2607 case Intrinsic::x86_sse2_packssdw_128:
2608 case Intrinsic::x86_sse2_packsswb_128:
2609 case Intrinsic::x86_avx2_packssdw:
2610 case Intrinsic::x86_avx2_packsswb:
2611 case Intrinsic::x86_avx512_packssdw_512:
2612 case Intrinsic::x86_avx512_packsswb_512:
2618 case Intrinsic::x86_sse2_packuswb_128:
2619 case Intrinsic::x86_sse41_packusdw:
2620 case Intrinsic::x86_avx2_packusdw:
2621 case Intrinsic::x86_avx2_packuswb:
2622 case Intrinsic::x86_avx512_packusdw_512:
2623 case Intrinsic::x86_avx512_packuswb_512:
2629 case Intrinsic::x86_sse2_pmulh_w:
2630 case Intrinsic::x86_avx2_pmulh_w:
2631 case Intrinsic::x86_avx512_pmulh_w_512:
2637 case Intrinsic::x86_sse2_pmulhu_w:
2638 case Intrinsic::x86_avx2_pmulhu_w:
2639 case Intrinsic::x86_avx512_pmulhu_w_512:
2645 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
2646 case Intrinsic::x86_avx2_pmul_hr_sw:
2647 case Intrinsic::x86_avx512_pmul_hr_sw_512:
2653 case Intrinsic::x86_sse2_pmadd_wd:
2654 case Intrinsic::x86_avx2_pmadd_wd:
2655 case Intrinsic::x86_avx512_pmaddw_d_512:
2661 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
2662 case Intrinsic::x86_avx2_pmadd_ub_sw:
2663 case Intrinsic::x86_avx512_pmaddubs_w_512:
2669 case Intrinsic::x86_pclmulqdq:
2670 case Intrinsic::x86_pclmulqdq_256:
2671 case Intrinsic::x86_pclmulqdq_512: {
2672 if (
auto *
C = dyn_cast<ConstantInt>(
II.getArgOperand(2))) {
2673 unsigned Imm =
C->getZExtValue();
2675 bool MadeChange =
false;
2676 Value *Arg0 =
II.getArgOperand(0);
2677 Value *Arg1 =
II.getArgOperand(1);
2679 cast<FixedVectorType>(Arg0->
getType())->getNumElements();
2681 APInt UndefElts1(VWidth, 0);
2682 APInt DemandedElts1 =
2690 APInt UndefElts2(VWidth, 0);
2691 APInt DemandedElts2 =
2713 case Intrinsic::x86_sse41_insertps:
2719 case Intrinsic::x86_sse4a_extrq: {
2720 Value *Op0 =
II.getArgOperand(0);
2721 Value *Op1 =
II.getArgOperand(1);
2722 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2723 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
2726 VWidth1 == 16 &&
"Unexpected operand sizes");
2729 auto *C1 = dyn_cast<Constant>(Op1);
2731 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
2734 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2744 bool MadeChange =
false;
2745 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2749 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2759 case Intrinsic::x86_sse4a_extrqi: {
2762 Value *Op0 =
II.getArgOperand(0);
2763 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2765 "Unexpected operand size");
2768 auto *CILength = dyn_cast<ConstantInt>(
II.getArgOperand(1));
2769 auto *CIIndex = dyn_cast<ConstantInt>(
II.getArgOperand(2));
2778 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2784 case Intrinsic::x86_sse4a_insertq: {
2785 Value *Op0 =
II.getArgOperand(0);
2786 Value *Op1 =
II.getArgOperand(1);
2787 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2790 cast<FixedVectorType>(Op1->
getType())->getNumElements() == 2 &&
2791 "Unexpected operand size");
2794 auto *C1 = dyn_cast<Constant>(Op1);
2796 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
2801 const APInt &V11 = CI11->getValue();
2811 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2817 case Intrinsic::x86_sse4a_insertqi: {
2821 Value *Op0 =
II.getArgOperand(0);
2822 Value *Op1 =
II.getArgOperand(1);
2823 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
2824 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
2827 VWidth1 == 2 &&
"Unexpected operand sizes");
2830 auto *CILength = dyn_cast<ConstantInt>(
II.getArgOperand(2));
2831 auto *CIIndex = dyn_cast<ConstantInt>(
II.getArgOperand(3));
2834 if (CILength && CIIndex) {
2835 APInt Len = CILength->getValue().zextOrTrunc(6);
2836 APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2844 bool MadeChange =
false;
2845 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2849 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2859 case Intrinsic::x86_sse41_pblendvb:
2860 case Intrinsic::x86_sse41_blendvps:
2861 case Intrinsic::x86_sse41_blendvpd:
2862 case Intrinsic::x86_avx_blendv_ps_256:
2863 case Intrinsic::x86_avx_blendv_pd_256:
2864 case Intrinsic::x86_avx2_pblendvb: {
2866 Value *Op0 =
II.getArgOperand(0);
2867 Value *Op1 =
II.getArgOperand(1);
2868 Value *Mask =
II.getArgOperand(2);
2874 if (isa<ConstantAggregateZero>(Mask)) {
2879 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2890 Value *MaskSrc =
nullptr;
2893 m_Mask(ShuffleMask))))) {
2895 int NumElts = cast<FixedVectorType>(MaskSrc->
getType())->getNumElements();
2898 [NumElts](
int M) {
return M < 0 || M >= NumElts; }))
2909 auto *MaskTy = cast<FixedVectorType>(Mask->getType());
2910 auto *OpTy = cast<FixedVectorType>(
II.getType());
2911 unsigned NumMaskElts = MaskTy->getNumElements();
2912 unsigned NumOperandElts = OpTy->getNumElements();
2916 unsigned NumMaskSrcElts =
2917 cast<FixedVectorType>(MaskSrc->
getType())->getNumElements();
2918 NumMaskElts = (ShuffleMask.
size() * NumMaskElts) / NumMaskSrcElts;
2920 if (NumMaskElts > NumOperandElts)
2928 assert(MaskTy->getPrimitiveSizeInBits() ==
2929 OpTy->getPrimitiveSizeInBits() &&
2930 "Not expecting mask and operands with different sizes");
2932 if (NumMaskElts == NumOperandElts) {
2938 if (NumMaskElts < NumOperandElts) {
2949 case Intrinsic::x86_ssse3_pshuf_b_128:
2950 case Intrinsic::x86_avx2_pshuf_b:
2951 case Intrinsic::x86_avx512_pshuf_b_512:
2957 case Intrinsic::x86_avx_vpermilvar_ps:
2958 case Intrinsic::x86_avx_vpermilvar_ps_256:
2959 case Intrinsic::x86_avx512_vpermilvar_ps_512:
2960 case Intrinsic::x86_avx_vpermilvar_pd:
2961 case Intrinsic::x86_avx_vpermilvar_pd_256:
2962 case Intrinsic::x86_avx512_vpermilvar_pd_512:
2968 case Intrinsic::x86_avx2_permd:
2969 case Intrinsic::x86_avx2_permps:
2970 case Intrinsic::x86_avx512_permvar_df_256:
2971 case Intrinsic::x86_avx512_permvar_df_512:
2972 case Intrinsic::x86_avx512_permvar_di_256:
2973 case Intrinsic::x86_avx512_permvar_di_512:
2974 case Intrinsic::x86_avx512_permvar_hi_128:
2975 case Intrinsic::x86_avx512_permvar_hi_256:
2976 case Intrinsic::x86_avx512_permvar_hi_512:
2977 case Intrinsic::x86_avx512_permvar_qi_128:
2978 case Intrinsic::x86_avx512_permvar_qi_256:
2979 case Intrinsic::x86_avx512_permvar_qi_512:
2980 case Intrinsic::x86_avx512_permvar_sf_512:
2981 case Intrinsic::x86_avx512_permvar_si_512:
2987 case Intrinsic::x86_avx512_vpermi2var_d_128:
2988 case Intrinsic::x86_avx512_vpermi2var_d_256:
2989 case Intrinsic::x86_avx512_vpermi2var_d_512:
2990 case Intrinsic::x86_avx512_vpermi2var_hi_128:
2991 case Intrinsic::x86_avx512_vpermi2var_hi_256:
2992 case Intrinsic::x86_avx512_vpermi2var_hi_512:
2993 case Intrinsic::x86_avx512_vpermi2var_pd_128:
2994 case Intrinsic::x86_avx512_vpermi2var_pd_256:
2995 case Intrinsic::x86_avx512_vpermi2var_pd_512:
2996 case Intrinsic::x86_avx512_vpermi2var_ps_128:
2997 case Intrinsic::x86_avx512_vpermi2var_ps_256:
2998 case Intrinsic::x86_avx512_vpermi2var_ps_512:
2999 case Intrinsic::x86_avx512_vpermi2var_q_128:
3000 case Intrinsic::x86_avx512_vpermi2var_q_256:
3001 case Intrinsic::x86_avx512_vpermi2var_q_512:
3002 case Intrinsic::x86_avx512_vpermi2var_qi_128:
3003 case Intrinsic::x86_avx512_vpermi2var_qi_256:
3004 case Intrinsic::x86_avx512_vpermi2var_qi_512:
3010 case Intrinsic::x86_avx_maskload_ps:
3011 case Intrinsic::x86_avx_maskload_pd:
3012 case Intrinsic::x86_avx_maskload_ps_256:
3013 case Intrinsic::x86_avx_maskload_pd_256:
3014 case Intrinsic::x86_avx2_maskload_d:
3015 case Intrinsic::x86_avx2_maskload_q:
3016 case Intrinsic::x86_avx2_maskload_d_256:
3017 case Intrinsic::x86_avx2_maskload_q_256:
3023 case Intrinsic::x86_sse2_maskmov_dqu:
3024 case Intrinsic::x86_avx_maskstore_ps:
3025 case Intrinsic::x86_avx_maskstore_pd:
3026 case Intrinsic::x86_avx_maskstore_ps_256:
3027 case Intrinsic::x86_avx_maskstore_pd_256:
3028 case Intrinsic::x86_avx2_maskstore_d:
3029 case Intrinsic::x86_avx2_maskstore_q:
3030 case Intrinsic::x86_avx2_maskstore_d_256:
3031 case Intrinsic::x86_avx2_maskstore_q_256:
3037 case Intrinsic::x86_addcarry_32:
3038 case Intrinsic::x86_addcarry_64:
3044 case Intrinsic::x86_avx512_pternlog_d_128:
3045 case Intrinsic::x86_avx512_pternlog_d_256:
3046 case Intrinsic::x86_avx512_pternlog_d_512:
3047 case Intrinsic::x86_avx512_pternlog_q_128:
3048 case Intrinsic::x86_avx512_pternlog_q_256:
3049 case Intrinsic::x86_avx512_pternlog_q_512:
3057 return std::nullopt;
3062 bool &KnownBitsComputed)
const {
3063 switch (
II.getIntrinsicID()) {
3066 case Intrinsic::x86_mmx_pmovmskb:
3067 case Intrinsic::x86_sse_movmsk_ps:
3068 case Intrinsic::x86_sse2_movmsk_pd:
3069 case Intrinsic::x86_sse2_pmovmskb_128:
3070 case Intrinsic::x86_avx_movmsk_ps_256:
3071 case Intrinsic::x86_avx_movmsk_pd_256:
3072 case Intrinsic::x86_avx2_pmovmskb: {
3076 if (
II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
3079 auto *ArgType = cast<FixedVectorType>(
II.getArgOperand(0)->getType());
3080 ArgWidth = ArgType->getNumElements();
3086 Type *VTy =
II.getType();
3087 if (DemandedElts.
isZero()) {
3093 KnownBitsComputed =
true;
3097 return std::nullopt;
3104 simplifyAndSetOp)
const {
3105 unsigned VWidth = cast<FixedVectorType>(
II.getType())->getNumElements();
3106 switch (
II.getIntrinsicID()) {
3109 case Intrinsic::x86_xop_vfrcz_ss:
3110 case Intrinsic::x86_xop_vfrcz_sd:
3115 if (!DemandedElts[0]) {
3122 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3125 UndefElts = UndefElts[0];
3129 case Intrinsic::x86_sse_rcp_ss:
3130 case Intrinsic::x86_sse_rsqrt_ss:
3131 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3134 if (!DemandedElts[0]) {
3136 return II.getArgOperand(0);
3145 case Intrinsic::x86_sse_min_ss:
3146 case Intrinsic::x86_sse_max_ss:
3147 case Intrinsic::x86_sse_cmp_ss:
3148 case Intrinsic::x86_sse2_min_sd:
3149 case Intrinsic::x86_sse2_max_sd:
3150 case Intrinsic::x86_sse2_cmp_sd: {
3151 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3154 if (!DemandedElts[0]) {
3156 return II.getArgOperand(0);
3161 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3173 case Intrinsic::x86_sse41_round_ss:
3174 case Intrinsic::x86_sse41_round_sd: {
3176 APInt DemandedElts2 = DemandedElts;
3178 simplifyAndSetOp(&
II, 0, DemandedElts2, UndefElts);
3181 if (!DemandedElts[0]) {
3183 return II.getArgOperand(0);
3188 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3193 UndefElts |= UndefElts2[0];
3200 case Intrinsic::x86_avx512_mask_add_ss_round:
3201 case Intrinsic::x86_avx512_mask_div_ss_round:
3202 case Intrinsic::x86_avx512_mask_mul_ss_round:
3203 case Intrinsic::x86_avx512_mask_sub_ss_round:
3204 case Intrinsic::x86_avx512_mask_max_ss_round:
3205 case Intrinsic::x86_avx512_mask_min_ss_round:
3206 case Intrinsic::x86_avx512_mask_add_sd_round:
3207 case Intrinsic::x86_avx512_mask_div_sd_round:
3208 case Intrinsic::x86_avx512_mask_mul_sd_round:
3209 case Intrinsic::x86_avx512_mask_sub_sd_round:
3210 case Intrinsic::x86_avx512_mask_max_sd_round:
3211 case Intrinsic::x86_avx512_mask_min_sd_round:
3212 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3215 if (!DemandedElts[0]) {
3217 return II.getArgOperand(0);
3222 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3223 simplifyAndSetOp(&
II, 2, DemandedElts, UndefElts3);
3227 if (!UndefElts2[0] || !UndefElts3[0])
3232 case Intrinsic::x86_sse3_addsub_pd:
3233 case Intrinsic::x86_sse3_addsub_ps:
3234 case Intrinsic::x86_avx_addsub_pd_256:
3235 case Intrinsic::x86_avx_addsub_ps_256: {
3240 bool IsSubOnly = DemandedElts.
isSubsetOf(SubMask);
3241 bool IsAddOnly = DemandedElts.
isSubsetOf(AddMask);
3242 if (IsSubOnly || IsAddOnly) {
3243 assert((IsSubOnly ^ IsAddOnly) &&
"Can't be both add-only and sub-only");
3246 Value *Arg0 =
II.getArgOperand(0), *Arg1 =
II.getArgOperand(1);
3248 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3251 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3252 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3253 UndefElts &= UndefElts2;
3258 case Intrinsic::x86_avx2_psllv_d:
3259 case Intrinsic::x86_avx2_psllv_d_256:
3260 case Intrinsic::x86_avx2_psllv_q:
3261 case Intrinsic::x86_avx2_psllv_q_256:
3262 case Intrinsic::x86_avx2_psrlv_d:
3263 case Intrinsic::x86_avx2_psrlv_d_256:
3264 case Intrinsic::x86_avx2_psrlv_q:
3265 case Intrinsic::x86_avx2_psrlv_q_256:
3266 case Intrinsic::x86_avx2_psrav_d:
3267 case Intrinsic::x86_avx2_psrav_d_256: {
3268 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3269 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3270 UndefElts &= UndefElts2;
3274 case Intrinsic::x86_sse2_pmulh_w:
3275 case Intrinsic::x86_avx2_pmulh_w:
3276 case Intrinsic::x86_avx512_pmulh_w_512:
3277 case Intrinsic::x86_sse2_pmulhu_w:
3278 case Intrinsic::x86_avx2_pmulhu_w:
3279 case Intrinsic::x86_avx512_pmulhu_w_512:
3280 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
3281 case Intrinsic::x86_avx2_pmul_hr_sw:
3282 case Intrinsic::x86_avx512_pmul_hr_sw_512: {
3283 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3284 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3289 case Intrinsic::x86_sse2_packssdw_128:
3290 case Intrinsic::x86_sse2_packsswb_128:
3291 case Intrinsic::x86_sse2_packuswb_128:
3292 case Intrinsic::x86_sse41_packusdw:
3293 case Intrinsic::x86_avx2_packssdw:
3294 case Intrinsic::x86_avx2_packsswb:
3295 case Intrinsic::x86_avx2_packusdw:
3296 case Intrinsic::x86_avx2_packuswb:
3297 case Intrinsic::x86_avx512_packssdw_512:
3298 case Intrinsic::x86_avx512_packsswb_512:
3299 case Intrinsic::x86_avx512_packusdw_512:
3300 case Intrinsic::x86_avx512_packuswb_512: {
3301 auto *Ty0 =
II.getArgOperand(0)->getType();
3302 unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements();
3303 assert(VWidth == (InnerVWidth * 2) &&
"Unexpected input size");
3305 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3306 unsigned VWidthPerLane = VWidth / NumLanes;
3307 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3313 for (
int OpNum = 0; OpNum != 2; ++OpNum) {
3314 APInt OpDemandedElts(InnerVWidth, 0);
3315 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3316 unsigned LaneIdx = Lane * VWidthPerLane;
3317 for (
unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3318 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3319 if (DemandedElts[
Idx])
3320 OpDemandedElts.
setBit((Lane * InnerVWidthPerLane) + Elt);
3325 APInt OpUndefElts(InnerVWidth, 0);
3326 simplifyAndSetOp(&
II, OpNum, OpDemandedElts, OpUndefElts);
3329 OpUndefElts = OpUndefElts.
zext(VWidth);
3330 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3331 APInt LaneElts = OpUndefElts.
lshr(InnerVWidthPerLane * Lane);
3332 LaneElts = LaneElts.
getLoBits(InnerVWidthPerLane);
3333 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3334 UndefElts |= LaneElts;
3340 case Intrinsic::x86_sse2_pmadd_wd:
3341 case Intrinsic::x86_avx2_pmadd_wd:
3342 case Intrinsic::x86_avx512_pmaddw_d_512:
3343 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3344 case Intrinsic::x86_avx2_pmadd_ub_sw:
3345 case Intrinsic::x86_avx512_pmaddubs_w_512: {
3347 auto *ArgTy =
II.getArgOperand(0)->getType();
3348 unsigned InnerVWidth = cast<FixedVectorType>(ArgTy)->getNumElements();
3349 assert((VWidth * 2) == InnerVWidth &&
"Unexpected input size");
3351 APInt Op0UndefElts(InnerVWidth, 0);
3352 APInt Op1UndefElts(InnerVWidth, 0);
3353 simplifyAndSetOp(&
II, 0, OpDemandedElts, Op0UndefElts);
3354 simplifyAndSetOp(&
II, 1, OpDemandedElts, Op1UndefElts);
3360 case Intrinsic::x86_ssse3_pshuf_b_128:
3361 case Intrinsic::x86_avx2_pshuf_b:
3362 case Intrinsic::x86_avx512_pshuf_b_512:
3364 case Intrinsic::x86_avx_vpermilvar_ps:
3365 case Intrinsic::x86_avx_vpermilvar_ps_256:
3366 case Intrinsic::x86_avx512_vpermilvar_ps_512:
3367 case Intrinsic::x86_avx_vpermilvar_pd:
3368 case Intrinsic::x86_avx_vpermilvar_pd_256:
3369 case Intrinsic::x86_avx512_vpermilvar_pd_512:
3371 case Intrinsic::x86_avx2_permd:
3372 case Intrinsic::x86_avx2_permps: {
3373 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts);
3379 case Intrinsic::x86_sse4a_extrq:
3380 case Intrinsic::x86_sse4a_extrqi:
3381 case Intrinsic::x86_sse4a_insertq:
3382 case Intrinsic::x86_sse4a_insertqi:
3386 return std::nullopt;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
static Value * simplifyTernarylogic(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
static Value * simplifyX86addcarry(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86pack(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
static Constant * getNegativeIsTrueBoolVec(Constant *V, const DataLayout &DL)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static Value * simplifyX86vpermv3(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermi2/vpermt2 to shufflevector if the mask is constant.
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
static Value * simplifyX86pmulh(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned, bool IsRounding)
static Value * simplifyX86movmsk(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
static Value * simplifyX86pmadd(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsPMADDWD)
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
static Value * getBoolVecFromMask(Value *Mask, const DataLayout &DL)
Convert the x86 XMM integer vector mask to a vector of bools based on each element's most significant...
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
support::ulittle16_t & Lo
support::ulittle16_t & Hi
Class for arbitrary precision integers.
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_SGT
signed greater than
All zero aggregate value.
static ConstantAggregateZero * get(Type *Ty)
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static Constant * getAllOnesValue(Type *Ty)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
static FixedVectorType * getExtendedElementVectorType(FixedVectorType *VTy)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth=0, bool AllowMultipleUsers=false)=0
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
A Module instance is used to store all the information related to an LLVM module.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isZero() const
Returns true if value is all zero.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.