30 #define DEBUG_TYPE "legalizer"
33 using namespace LegalizeActions;
34 using namespace MIPatternMatch;
43 static std::pair<int, int>
49 unsigned NumParts =
Size / NarrowSize;
50 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
53 if (LeftoverSize == 0)
58 if (LeftoverSize % EltSize != 0)
66 return std::make_pair(NumParts, NumLeftover);
93 : MIRBuilder(
Builder), Observer(Observer),
MRI(MF.getRegInfo()),
94 LI(*MF.getSubtarget().getLegalizerInfo()),
95 TLI(*MF.getSubtarget().getTargetLowering()) { }
100 : MIRBuilder(
B), Observer(Observer),
MRI(MF.getRegInfo()), LI(LI),
101 TLI(*MF.getSubtarget().getTargetLowering()) { }
109 if (
MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
110 MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
113 switch (Step.Action) {
128 return bitcast(
MI, Step.TypeIdx, Step.NewType);
131 return lower(
MI, Step.TypeIdx, Step.NewType);
147 void LegalizerHelper::extractParts(
Register Reg,
LLT Ty,
int NumParts,
149 for (
int i = 0;
i < NumParts; ++
i)
155 LLT MainTy,
LLT &LeftoverTy,
162 unsigned NumParts = RegSize / MainSize;
163 unsigned LeftoverSize = RegSize - NumParts * MainSize;
166 if (LeftoverSize == 0) {
167 for (
unsigned I = 0;
I < NumParts; ++
I)
175 if (LeftoverSize % EltSize != 0)
183 for (
unsigned I = 0;
I != NumParts; ++
I) {
185 VRegs.push_back(NewReg);
189 for (
unsigned Offset = MainSize * NumParts;
Offset < RegSize;
192 LeftoverRegs.push_back(NewReg);
199 void LegalizerHelper::insertParts(
Register DstReg,
229 CurResultReg = NewResultReg;
233 for (
unsigned I = 0,
E = LeftoverRegs.
size();
I !=
E; ++
I) {
239 CurResultReg = NewResultReg;
240 Offset += LeftoverPartSize;
247 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
249 const int StartIdx = Regs.size();
250 const int NumResults =
MI.getNumOperands() - 1;
251 Regs.
resize(Regs.size() + NumResults);
252 for (
int I = 0;
I != NumResults; ++
I)
253 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
259 if (SrcTy == GCDTy) {
262 Parts.push_back(SrcReg);
274 extractGCDType(Parts, GCDTy, SrcReg);
278 LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
280 unsigned PadStrategy) {
285 int NumOrigSrc = VRegs.size();
291 if (NumOrigSrc < NumParts * NumSubParts) {
292 if (PadStrategy == TargetOpcode::G_ZEXT)
294 else if (PadStrategy == TargetOpcode::G_ANYEXT)
297 assert(PadStrategy == TargetOpcode::G_SEXT);
318 for (
int I = 0;
I != NumParts; ++
I) {
319 bool AllMergePartsArePadding =
true;
322 for (
int J = 0; J != NumSubParts; ++J) {
323 int Idx =
I * NumSubParts + J;
324 if (Idx >= NumOrigSrc) {
325 SubMerge[J] = PadReg;
329 SubMerge[J] = VRegs[Idx];
332 AllMergePartsArePadding =
false;
338 if (AllMergePartsArePadding && !AllPadReg) {
339 if (PadStrategy == TargetOpcode::G_ANYEXT)
341 else if (PadStrategy == TargetOpcode::G_ZEXT)
351 Remerge[
I] = AllPadReg;
355 if (NumSubParts == 1)
356 Remerge[
I] = SubMerge[0];
361 if (AllMergePartsArePadding && !AllPadReg)
362 AllPadReg = Remerge[
I];
369 void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
376 if (DstTy == LCMTy) {
390 UnmergeDefs[0] = DstReg;
391 for (
unsigned I = 1;
I != NumDefs; ++
I)
403 #define RTLIBCASE_INT(LibcallPrefix) \
407 return RTLIB::LibcallPrefix##32; \
409 return RTLIB::LibcallPrefix##64; \
411 return RTLIB::LibcallPrefix##128; \
413 llvm_unreachable("unexpected size"); \
417 #define RTLIBCASE(LibcallPrefix) \
421 return RTLIB::LibcallPrefix##32; \
423 return RTLIB::LibcallPrefix##64; \
425 return RTLIB::LibcallPrefix##80; \
427 return RTLIB::LibcallPrefix##128; \
429 llvm_unreachable("unexpected size"); \
434 case TargetOpcode::G_SDIV:
436 case TargetOpcode::G_UDIV:
438 case TargetOpcode::G_SREM:
440 case TargetOpcode::G_UREM:
442 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
444 case TargetOpcode::G_FADD:
446 case TargetOpcode::G_FSUB:
448 case TargetOpcode::G_FMUL:
450 case TargetOpcode::G_FDIV:
452 case TargetOpcode::G_FEXP:
454 case TargetOpcode::G_FEXP2:
456 case TargetOpcode::G_FREM:
458 case TargetOpcode::G_FPOW:
460 case TargetOpcode::G_FMA:
462 case TargetOpcode::G_FSIN:
464 case TargetOpcode::G_FCOS:
466 case TargetOpcode::G_FLOG10:
468 case TargetOpcode::G_FLOG:
470 case TargetOpcode::G_FLOG2:
472 case TargetOpcode::G_FCEIL:
474 case TargetOpcode::G_FFLOOR:
476 case TargetOpcode::G_FMINNUM:
478 case TargetOpcode::G_FMAXNUM:
480 case TargetOpcode::G_FSQRT:
482 case TargetOpcode::G_FRINT:
484 case TargetOpcode::G_FNEARBYINT:
486 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
504 .removeAttribute(Attribute::NoAlias)
505 .removeAttribute(Attribute::NonNull)
532 Info.OrigRet = Result;
534 if (!CLI.lowerCall(MIRBuilder,
Info))
557 for (
unsigned i = 1;
i <
MI.getNumOperands();
i++)
558 Args.push_back({MI.getOperand(i).getReg(), OpType});
570 for (
unsigned i = 0;
i <
MI.getNumOperands() - 1; ++
i) {
575 Type *OpTy =
nullptr;
586 unsigned Opc =
MI.getOpcode();
588 case TargetOpcode::G_BZERO:
589 RTLibcall = RTLIB::BZERO;
591 case TargetOpcode::G_MEMCPY:
594 case TargetOpcode::G_MEMMOVE:
595 RTLibcall = RTLIB::MEMMOVE;
597 case TargetOpcode::G_MEMSET:
598 RTLibcall = RTLIB::MEMSET;
603 const char *
Name = TLI.getLibcallName(RTLibcall);
613 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
616 Info.IsTailCall =
MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
620 if (!CLI.lowerCall(MIRBuilder,
Info))
623 if (
Info.LoweredTailCall) {
624 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
630 "Expected instr following MI to be return or debug inst?");
634 }
while (
MI.getNextNode());
646 case TargetOpcode::G_FPEXT:
648 case TargetOpcode::G_FPTRUNC:
650 case TargetOpcode::G_FPTOSI:
652 case TargetOpcode::G_FPTOUI:
654 case TargetOpcode::G_SITOFP:
656 case TargetOpcode::G_UITOFP:
676 switch (
MI.getOpcode()) {
679 case TargetOpcode::G_SDIV:
680 case TargetOpcode::G_UDIV:
681 case TargetOpcode::G_SREM:
682 case TargetOpcode::G_UREM:
683 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
690 case TargetOpcode::G_FADD:
691 case TargetOpcode::G_FSUB:
692 case TargetOpcode::G_FMUL:
693 case TargetOpcode::G_FDIV:
694 case TargetOpcode::G_FMA:
695 case TargetOpcode::G_FPOW:
696 case TargetOpcode::G_FREM:
697 case TargetOpcode::G_FCOS:
698 case TargetOpcode::G_FSIN:
699 case TargetOpcode::G_FLOG10:
700 case TargetOpcode::G_FLOG:
701 case TargetOpcode::G_FLOG2:
702 case TargetOpcode::G_FEXP:
703 case TargetOpcode::G_FEXP2:
704 case TargetOpcode::G_FCEIL:
705 case TargetOpcode::G_FFLOOR:
706 case TargetOpcode::G_FMINNUM:
707 case TargetOpcode::G_FMAXNUM:
708 case TargetOpcode::G_FSQRT:
709 case TargetOpcode::G_FRINT:
710 case TargetOpcode::G_FNEARBYINT:
711 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
714 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
722 case TargetOpcode::G_FPEXT:
723 case TargetOpcode::G_FPTRUNC: {
726 if (!FromTy || !ToTy)
733 case TargetOpcode::G_FPTOSI:
734 case TargetOpcode::G_FPTOUI: {
738 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
748 case TargetOpcode::G_SITOFP:
749 case TargetOpcode::G_UITOFP: {
753 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
763 case TargetOpcode::G_BZERO:
764 case TargetOpcode::G_MEMCPY:
765 case TargetOpcode::G_MEMMOVE:
766 case TargetOpcode::G_MEMSET: {
771 MI.eraseFromParent();
776 MI.eraseFromParent();
786 switch (
MI.getOpcode()) {
789 case TargetOpcode::G_IMPLICIT_DEF: {
799 if (SizeOp0 % NarrowSize != 0) {
800 LLT ImplicitTy = NarrowTy;
807 MI.eraseFromParent();
811 int NumParts = SizeOp0 / NarrowSize;
814 for (
int i = 0;
i < NumParts; ++
i)
821 MI.eraseFromParent();
824 case TargetOpcode::G_CONSTANT: {
826 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
829 int NumParts = TotalSize / NarrowSize;
832 for (
int I = 0;
I != NumParts; ++
I) {
833 unsigned Offset =
I * NarrowSize;
836 PartRegs.push_back(K.getReg(0));
840 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
842 if (LeftoverBits != 0) {
846 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
847 LeftoverRegs.push_back(K.getReg(0));
850 insertParts(
MI.getOperand(0).getReg(),
851 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
853 MI.eraseFromParent();
856 case TargetOpcode::G_SEXT:
857 case TargetOpcode::G_ZEXT:
858 case TargetOpcode::G_ANYEXT:
860 case TargetOpcode::G_TRUNC: {
866 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
872 MI.eraseFromParent();
876 case TargetOpcode::G_FREEZE:
878 case TargetOpcode::G_ADD:
879 case TargetOpcode::G_SUB:
880 case TargetOpcode::G_SADDO:
881 case TargetOpcode::G_SSUBO:
882 case TargetOpcode::G_SADDE:
883 case TargetOpcode::G_SSUBE:
884 case TargetOpcode::G_UADDO:
885 case TargetOpcode::G_USUBO:
886 case TargetOpcode::G_UADDE:
887 case TargetOpcode::G_USUBE:
889 case TargetOpcode::G_MUL:
890 case TargetOpcode::G_UMULH:
892 case TargetOpcode::G_EXTRACT:
894 case TargetOpcode::G_INSERT:
896 case TargetOpcode::G_LOAD: {
897 auto &MMO = **
MI.memoperands_begin();
907 MI.eraseFromParent();
913 case TargetOpcode::G_ZEXTLOAD:
914 case TargetOpcode::G_SEXTLOAD: {
915 bool ZExt =
MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
920 auto &MMO = **
MI.memoperands_begin();
921 unsigned MemSize = MMO.getSizeInBits();
923 if (MemSize == NarrowSize) {
925 }
else if (MemSize < NarrowSize) {
927 }
else if (MemSize > NarrowSize) {
937 MI.eraseFromParent();
940 case TargetOpcode::G_STORE: {
941 const auto &MMO = **
MI.memoperands_begin();
948 int NumParts = SizeOp0 / NarrowSize;
951 if (SrcTy.
isVector() && LeftoverBits != 0)
956 auto &MMO = **
MI.memoperands_begin();
959 MI.eraseFromParent();
965 case TargetOpcode::G_SELECT:
967 case TargetOpcode::G_AND:
968 case TargetOpcode::G_OR:
969 case TargetOpcode::G_XOR: {
981 case TargetOpcode::G_SHL:
982 case TargetOpcode::G_LSHR:
983 case TargetOpcode::G_ASHR:
985 case TargetOpcode::G_CTLZ:
986 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
987 case TargetOpcode::G_CTTZ:
988 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
989 case TargetOpcode::G_CTPOP:
991 switch (
MI.getOpcode()) {
992 case TargetOpcode::G_CTLZ:
993 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
995 case TargetOpcode::G_CTTZ:
996 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
998 case TargetOpcode::G_CTPOP:
1008 case TargetOpcode::G_INTTOPTR:
1016 case TargetOpcode::G_PTRTOINT:
1024 case TargetOpcode::G_PHI: {
1027 if (SizeOp0 % NarrowSize != 0)
1030 unsigned NumParts = SizeOp0 / NarrowSize;
1034 for (
unsigned i = 1;
i <
MI.getNumOperands();
i += 2) {
1037 extractParts(
MI.getOperand(
i).getReg(), NarrowTy, NumParts,
1042 for (
unsigned i = 0;
i < NumParts; ++
i) {
1046 for (
unsigned j = 1;
j <
MI.getNumOperands();
j += 2)
1052 MI.eraseFromParent();
1055 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1056 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1060 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1066 case TargetOpcode::G_ICMP: {
1068 if (NarrowSize * 2 != SrcSize)
1099 MI.eraseFromParent();
1102 case TargetOpcode::G_SEXT_INREG: {
1106 int64_t SizeInBits =
MI.getOperand(2).getImm();
1116 MO1.
setReg(TruncMIB.getReg(0));
1131 if (SizeOp0 % NarrowSize != 0)
1133 int NumParts = SizeOp0 / NarrowSize;
1141 for (
int i = 0;
i < NumParts; ++
i) {
1144 SrcRegs.push_back(SrcReg);
1157 for (
int i = 0;
i < NumParts; ++
i) {
1159 DstRegs.push_back(SrcRegs[
i]);
1161 assert(PartialExtensionReg &&
1162 "Expected to visit partial extension before full");
1163 if (FullExtensionReg) {
1164 DstRegs.push_back(FullExtensionReg);
1170 FullExtensionReg = DstRegs.back();
1175 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1178 PartialExtensionReg = DstRegs.back();
1185 MI.eraseFromParent();
1188 case TargetOpcode::G_BSWAP:
1189 case TargetOpcode::G_BITREVERSE: {
1190 if (SizeOp0 % NarrowSize != 0)
1195 unsigned NumParts = SizeOp0 / NarrowSize;
1196 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1198 for (
unsigned i = 0;
i < NumParts; ++
i) {
1200 {SrcRegs[NumParts - 1 - i]});
1201 DstRegs.push_back(DstPart.getReg(0));
1207 MI.eraseFromParent();
1210 case TargetOpcode::G_PTR_ADD:
1211 case TargetOpcode::G_PTRMASK: {
1219 case TargetOpcode::G_FPTOUI: {
1227 case TargetOpcode::G_FPTOSI: {
1235 case TargetOpcode::G_FPEXT:
1268 unsigned OpIdx,
unsigned ExtOpcode) {
1271 MO.
setReg(ExtB.getReg(0));
1278 MO.
setReg(ExtB.getReg(0));
1282 unsigned OpIdx,
unsigned TruncOpcode) {
1291 unsigned OpIdx,
unsigned ExtOpcode) {
1314 unsigned NumParts = NewElts / OldElts;
1317 if (NumParts * OldElts == NewElts) {
1319 Parts.push_back(MO.
getReg());
1322 for (
unsigned I = 1;
I != NumParts; ++
I)
1323 Parts.push_back(ImpDef);
1350 LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1365 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1367 unsigned NumOps =
MI.getNumOperands();
1368 unsigned NumSrc =
MI.getNumOperands() - 1;
1371 if (WideSize >= DstSize) {
1375 for (
unsigned I = 2;
I != NumOps; ++
I) {
1376 const unsigned Offset = (
I - 1) * PartSize;
1383 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
1389 ResultReg = NextResult;
1392 if (WideSize > DstSize)
1397 MI.eraseFromParent();
1431 for (
int I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I) {
1433 if (GCD == SrcSize) {
1434 Unmerges.push_back(SrcReg);
1437 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1438 Unmerges.push_back(Unmerge.getReg(J));
1443 if (
static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1445 for (
int I = Unmerges.size();
I != NumMerge * WideSize; ++
I)
1446 Unmerges.push_back(UndefReg);
1449 const int PartsPerGCD = WideSize / GCD;
1453 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1455 NewMergeRegs.push_back(
Merge.getReg(0));
1467 MI.eraseFromParent();
1488 if (NumMergeParts > 1) {
1491 MergeParts[0] = WideReg;
1497 UnmergeResults[0] = OrigReg;
1498 for (
int I = 1;
I != NumUnmergeParts; ++
I)
1506 LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
1511 int NumDst =
MI.getNumOperands() - 1;
1512 Register SrcReg =
MI.getOperand(NumDst).getReg();
1517 Register Dst0Reg =
MI.getOperand(0).getReg();
1527 dbgs() <<
"Not casting non-integral address space integer\n");
1548 for (
int I = 1;
I != NumDst; ++
I) {
1554 MI.eraseFromParent();
1565 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
1590 const int NumUnmerge = Unmerge->getNumOperands() - 1;
1595 if (PartsPerRemerge == 1) {
1598 for (
int I = 0;
I != NumUnmerge; ++
I) {
1601 for (
int J = 0; J != PartsPerUnmerge; ++J) {
1602 int Idx =
I * PartsPerUnmerge + J;
1604 MIB.addDef(
MI.getOperand(Idx).getReg());
1611 MIB.addUse(Unmerge.getReg(
I));
1615 for (
int J = 0; J != NumUnmerge; ++J)
1616 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1619 for (
int I = 0;
I != NumDst; ++
I) {
1620 for (
int J = 0; J < PartsPerRemerge; ++J) {
1621 const int Idx =
I * PartsPerRemerge + J;
1626 RemergeParts.
clear();
1630 MI.eraseFromParent();
1635 LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
1642 unsigned Offset =
MI.getOperand(2).getImm();
1668 MI.eraseFromParent();
1673 LLT ShiftTy = SrcTy;
1682 MI.eraseFromParent();
1713 LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
1715 if (TypeIdx != 0 || WideTy.
isVector())
1725 LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
1733 switch (
MI.getOpcode()) {
1736 case TargetOpcode::G_SADDO:
1737 Opcode = TargetOpcode::G_ADD;
1738 ExtOpcode = TargetOpcode::G_SEXT;
1740 case TargetOpcode::G_SSUBO:
1741 Opcode = TargetOpcode::G_SUB;
1742 ExtOpcode = TargetOpcode::G_SEXT;
1744 case TargetOpcode::G_UADDO:
1745 Opcode = TargetOpcode::G_ADD;
1746 ExtOpcode = TargetOpcode::G_ZEXT;
1748 case TargetOpcode::G_USUBO:
1749 Opcode = TargetOpcode::G_SUB;
1750 ExtOpcode = TargetOpcode::G_ZEXT;
1752 case TargetOpcode::G_SADDE:
1753 Opcode = TargetOpcode::G_UADDE;
1754 ExtOpcode = TargetOpcode::G_SEXT;
1755 CarryIn =
MI.getOperand(4).getReg();
1757 case TargetOpcode::G_SSUBE:
1758 Opcode = TargetOpcode::G_USUBE;
1759 ExtOpcode = TargetOpcode::G_SEXT;
1760 CarryIn =
MI.getOperand(4).getReg();
1762 case TargetOpcode::G_UADDE:
1763 Opcode = TargetOpcode::G_UADDE;
1764 ExtOpcode = TargetOpcode::G_ZEXT;
1765 CarryIn =
MI.getOperand(4).getReg();
1767 case TargetOpcode::G_USUBE:
1768 Opcode = TargetOpcode::G_USUBE;
1769 ExtOpcode = TargetOpcode::G_ZEXT;
1770 CarryIn =
MI.getOperand(4).getReg();
1779 LLT CarryOutTy = MRI.
getType(
MI.getOperand(1).getReg());
1782 {LHSExt, RHSExt, *CarryIn})
1794 MI.eraseFromParent();
1799 LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
1801 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1802 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1803 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1804 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1805 MI.getOpcode() == TargetOpcode::G_USHLSAT;
1830 {ShiftL, ShiftR},
MI.getFlags());
1838 MI.eraseFromParent();
1843 LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
1848 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
1850 Register OriginalOverflow =
MI.getOperand(1).getReg();
1854 LLT OverflowTy = MRI.
getType(OriginalOverflow);
1861 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1866 {LeftOperand, RightOperand});
1867 auto Mul = Mulo->getOperand(0);
1894 MI.eraseFromParent();
1900 switch (
MI.getOpcode()) {
1903 case TargetOpcode::G_EXTRACT:
1904 return widenScalarExtract(
MI, TypeIdx, WideTy);
1905 case TargetOpcode::G_INSERT:
1906 return widenScalarInsert(
MI, TypeIdx, WideTy);
1907 case TargetOpcode::G_MERGE_VALUES:
1908 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
1909 case TargetOpcode::G_UNMERGE_VALUES:
1910 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
1911 case TargetOpcode::G_SADDO:
1912 case TargetOpcode::G_SSUBO:
1913 case TargetOpcode::G_UADDO:
1914 case TargetOpcode::G_USUBO:
1915 case TargetOpcode::G_SADDE:
1916 case TargetOpcode::G_SSUBE:
1917 case TargetOpcode::G_UADDE:
1918 case TargetOpcode::G_USUBE:
1919 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
1920 case TargetOpcode::G_UMULO:
1921 case TargetOpcode::G_SMULO:
1922 return widenScalarMulo(
MI, TypeIdx, WideTy);
1923 case TargetOpcode::G_SADDSAT:
1924 case TargetOpcode::G_SSUBSAT:
1925 case TargetOpcode::G_SSHLSAT:
1926 case TargetOpcode::G_UADDSAT:
1927 case TargetOpcode::G_USUBSAT:
1928 case TargetOpcode::G_USHLSAT:
1929 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
1930 case TargetOpcode::G_CTTZ:
1931 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1932 case TargetOpcode::G_CTLZ:
1933 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1934 case TargetOpcode::G_CTPOP: {
1947 if (
MI.getOpcode() == TargetOpcode::G_CTTZ) {
1960 if (
MI.getOpcode() == TargetOpcode::G_CTLZ ||
1961 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1969 MI.eraseFromParent();
1972 case TargetOpcode::G_BSWAP: {
1981 MI.getOperand(0).setReg(DstExt);
1994 case TargetOpcode::G_BITREVERSE: {
2003 MI.getOperand(0).setReg(DstExt);
2012 case TargetOpcode::G_FREEZE:
2019 case TargetOpcode::G_ADD:
2020 case TargetOpcode::G_AND:
2021 case TargetOpcode::G_MUL:
2022 case TargetOpcode::G_OR:
2023 case TargetOpcode::G_XOR:
2024 case TargetOpcode::G_SUB:
2035 case TargetOpcode::G_SHL:
2051 case TargetOpcode::G_SDIV:
2052 case TargetOpcode::G_SREM:
2053 case TargetOpcode::G_SMIN:
2054 case TargetOpcode::G_SMAX:
2062 case TargetOpcode::G_ASHR:
2063 case TargetOpcode::G_LSHR:
2067 unsigned CvtOp =
MI.getOpcode() == TargetOpcode::G_ASHR ?
2068 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2081 case TargetOpcode::G_UDIV:
2082 case TargetOpcode::G_UREM:
2083 case TargetOpcode::G_UMIN:
2084 case TargetOpcode::G_UMAX:
2092 case TargetOpcode::G_SELECT:
2109 case TargetOpcode::G_FPTOSI:
2110 case TargetOpcode::G_FPTOUI:
2120 case TargetOpcode::G_SITOFP:
2130 case TargetOpcode::G_UITOFP:
2140 case TargetOpcode::G_LOAD:
2141 case TargetOpcode::G_SEXTLOAD:
2142 case TargetOpcode::G_ZEXTLOAD:
2148 case TargetOpcode::G_STORE: {
2159 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2165 case TargetOpcode::G_CONSTANT: {
2169 MRI.
getType(
MI.getOperand(0).getReg()));
2170 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2171 ExtOpc == TargetOpcode::G_ANYEXT) &&
2174 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2184 case TargetOpcode::G_FCONSTANT: {
2202 assert(!LosesInfo &&
"extend should always be lossless");
2211 case TargetOpcode::G_IMPLICIT_DEF: {
2217 case TargetOpcode::G_BRCOND:
2223 case TargetOpcode::G_FCMP:
2234 case TargetOpcode::G_ICMP:
2240 MI.getOperand(1).getPredicate()))
2241 ? TargetOpcode::G_SEXT
2242 : TargetOpcode::G_ZEXT;
2249 case TargetOpcode::G_PTR_ADD:
2250 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
2256 case TargetOpcode::G_PHI: {
2257 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
2260 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
2272 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2280 1, TargetOpcode::G_SEXT);
2295 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2320 case TargetOpcode::G_FADD:
2321 case TargetOpcode::G_FMUL:
2322 case TargetOpcode::G_FSUB:
2323 case TargetOpcode::G_FMA:
2324 case TargetOpcode::G_FMAD:
2325 case TargetOpcode::G_FNEG:
2326 case TargetOpcode::G_FABS:
2327 case TargetOpcode::G_FCANONICALIZE:
2328 case TargetOpcode::G_FMINNUM:
2329 case TargetOpcode::G_FMAXNUM:
2330 case TargetOpcode::G_FMINNUM_IEEE:
2331 case TargetOpcode::G_FMAXNUM_IEEE:
2332 case TargetOpcode::G_FMINIMUM:
2333 case TargetOpcode::G_FMAXIMUM:
2334 case TargetOpcode::G_FDIV:
2335 case TargetOpcode::G_FREM:
2336 case TargetOpcode::G_FCEIL:
2337 case TargetOpcode::G_FFLOOR:
2338 case TargetOpcode::G_FCOS:
2339 case TargetOpcode::G_FSIN:
2340 case TargetOpcode::G_FLOG10:
2341 case TargetOpcode::G_FLOG:
2342 case TargetOpcode::G_FLOG2:
2343 case TargetOpcode::G_FRINT:
2344 case TargetOpcode::G_FNEARBYINT:
2345 case TargetOpcode::G_FSQRT:
2346 case TargetOpcode::G_FEXP:
2347 case TargetOpcode::G_FEXP2:
2348 case TargetOpcode::G_FPOW:
2349 case TargetOpcode::G_INTRINSIC_TRUNC:
2350 case TargetOpcode::G_INTRINSIC_ROUND:
2351 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2355 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I)
2361 case TargetOpcode::G_FPOWI: {
2370 case TargetOpcode::G_INTTOPTR:
2378 case TargetOpcode::G_PTRTOINT:
2386 case TargetOpcode::G_BUILD_VECTOR: {
2390 for (
int I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I)
2404 case TargetOpcode::G_SEXT_INREG:
2413 case TargetOpcode::G_PTRMASK: {
2426 auto Unmerge =
B.buildUnmerge(Ty, Src);
2427 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
2428 Pieces.push_back(Unmerge.getReg(
I));
2447 LLT DstCastTy = DstEltTy;
2448 LLT SrcPartTy = SrcEltTy;
2452 if (NumSrcElt < NumDstElt) {
2461 DstCastTy =
LLT::vector(NumDstElt / NumSrcElt, DstEltTy);
2462 SrcPartTy = SrcEltTy;
2463 }
else if (NumSrcElt > NumDstElt) {
2473 SrcPartTy =
LLT::vector(NumSrcElt / NumDstElt, SrcEltTy);
2474 DstCastTy = DstEltTy;
2484 MI.eraseFromParent();
2492 MI.eraseFromParent();
2508 unsigned NewEltSize,
2509 unsigned OldEltSize) {
2510 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
2511 LLT IdxTy =
B.getMRI()->getType(Idx);
2514 auto OffsetMask =
B.buildConstant(
2516 auto OffsetIdx =
B.buildAnd(IdxTy, Idx, OffsetMask);
2517 return B.buildShl(IdxTy, OffsetIdx,
2518 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
2548 if (NewNumElts > OldNumElts) {
2559 if (NewNumElts % OldNumElts != 0)
2563 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2571 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
2575 NewOps[
I] = Elt.getReg(0);
2580 MI.eraseFromParent();
2584 if (NewNumElts < OldNumElts) {
2585 if (NewEltSize % OldEltSize != 0)
2607 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
2626 MI.eraseFromParent();
2640 LLT TargetTy =
B.getMRI()->getType(TargetReg);
2641 LLT InsertTy =
B.getMRI()->getType(InsertReg);
2642 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
2643 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
2646 auto EltMask =
B.buildConstant(
2650 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
2651 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
2654 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2658 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2689 if (NewNumElts < OldNumElts) {
2690 if (NewEltSize % OldEltSize != 0)
2699 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
2719 CastTy, CastVec, InsertedElt, ScaledIdx).
getReg(0);
2723 MI.eraseFromParent();
2736 auto &MMO = **
MI.memoperands_begin();
2739 if (
MI.getOpcode() == TargetOpcode::G_LOAD) {
2759 uint64_t SmallSplitSize = DstTy.
getSizeInBits() - LargeSplitSize;
2765 &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2773 TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
2787 MI.eraseFromParent();
2792 MI.eraseFromParent();
2800 switch (
MI.getOpcode()) {
2803 case TargetOpcode::G_LOAD:
2806 case TargetOpcode::G_SEXTLOAD:
2809 case TargetOpcode::G_ZEXTLOAD:
2814 MI.eraseFromParent();
2846 uint64_t SmallSplitSize = SrcTy.
getSizeInBits() - LargeSplitSize;
2865 MI.eraseFromParent();
2871 switch (
MI.getOpcode()) {
2872 case TargetOpcode::G_LOAD: {
2881 case TargetOpcode::G_STORE: {
2890 case TargetOpcode::G_SELECT: {
2896 dbgs() <<
"bitcast action not implemented for vector select\n");
2907 case TargetOpcode::G_AND:
2908 case TargetOpcode::G_OR:
2909 case TargetOpcode::G_XOR: {
2917 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2919 case TargetOpcode::G_INSERT_VECTOR_ELT:
2927 void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
2935 using namespace TargetOpcode;
2937 switch(
MI.getOpcode()) {
2940 case TargetOpcode::G_BITCAST:
2942 case TargetOpcode::G_SREM:
2943 case TargetOpcode::G_UREM: {
2947 {MI.getOperand(1), MI.getOperand(2)});
2951 MI.eraseFromParent();
2954 case TargetOpcode::G_SADDO:
2955 case TargetOpcode::G_SSUBO:
2957 case TargetOpcode::G_UMULH:
2958 case TargetOpcode::G_SMULH:
2960 case TargetOpcode::G_SMULO:
2961 case TargetOpcode::G_UMULO: {
2965 Register Overflow =
MI.getOperand(1).getReg();
2970 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
2971 ? TargetOpcode::G_SMULH
2972 : TargetOpcode::G_UMULH;
2976 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
2977 MI.RemoveOperand(1);
2988 if (Opcode == TargetOpcode::G_SMULH) {
2997 case TargetOpcode::G_FNEG: {
3007 Register SubByReg =
MI.getOperand(1).getReg();
3009 MI.eraseFromParent();
3012 case TargetOpcode::G_FSUB: {
3019 if (LI.getAction({G_FNEG, {Ty}}).Action ==
Lower)
3026 MI.eraseFromParent();
3029 case TargetOpcode::G_FMAD:
3031 case TargetOpcode::G_FFLOOR:
3033 case TargetOpcode::G_INTRINSIC_ROUND:
3035 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
3038 changeOpcode(
MI, TargetOpcode::G_FRINT);
3041 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3042 Register OldValRes =
MI.getOperand(0).getReg();
3043 Register SuccessRes =
MI.getOperand(1).getReg();
3048 **
MI.memoperands_begin());
3050 MI.eraseFromParent();
3053 case TargetOpcode::G_LOAD:
3054 case TargetOpcode::G_SEXTLOAD:
3055 case TargetOpcode::G_ZEXTLOAD:
3057 case TargetOpcode::G_STORE:
3059 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3060 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3061 case TargetOpcode::G_CTLZ:
3062 case TargetOpcode::G_CTTZ:
3063 case TargetOpcode::G_CTPOP:
3067 Register CarryOut =
MI.getOperand(1).getReg();
3074 MI.eraseFromParent();
3079 Register CarryOut =
MI.getOperand(1).getReg();
3082 Register CarryIn =
MI.getOperand(4).getReg();
3090 MI.eraseFromParent();
3095 Register BorrowOut =
MI.getOperand(1).getReg();
3102 MI.eraseFromParent();
3107 Register BorrowOut =
MI.getOperand(1).getReg();
3110 Register BorrowIn =
MI.getOperand(4).getReg();
3122 MI.eraseFromParent();
3147 case G_MERGE_VALUES:
3149 case G_UNMERGE_VALUES:
3151 case TargetOpcode::G_SEXT_INREG: {
3152 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
3153 int64_t SizeInBits =
MI.getOperand(2).getImm();
3163 MI.eraseFromParent();
3166 case G_EXTRACT_VECTOR_ELT:
3167 case G_INSERT_VECTOR_ELT:
3169 case G_SHUFFLE_VECTOR:
3171 case G_DYN_STACKALLOC:
3181 case G_READ_REGISTER:
3182 case G_WRITE_REGISTER:
3190 if (LI.isLegalOrCustom({G_UMIN, Ty}))
3201 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3221 MI.eraseFromParent();
3255 unsigned AddrSpace =
DL.getAllocaAddrSpace();
3268 LLT IdxTy =
B.getMRI()->getType(IdxReg);
3272 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
3275 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
3286 "Converting bits to bytes lost precision");
3309 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3310 MI.eraseFromParent();
3332 const LLT NarrowTy0 = NarrowTyArg;
3333 const unsigned NewNumElts =
3336 const Register DstReg =
MI.getOperand(0).getReg();
3352 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I) {
3360 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
3367 for (
Register PartReg : PartRegs) {
3372 DstRegs.push_back(PartDstReg);
3375 for (
Register LeftoverReg : LeftoverRegs) {
3380 LeftoverDstRegs.push_back(PartDstReg);
3383 assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
3388 unsigned InstCount = 0;
3389 for (
unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
3390 NewInsts[InstCount++].addUse(PartRegs[J]);
3391 for (
unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
3392 NewInsts[InstCount++].addUse(LeftoverRegs[J]);
3396 LeftoverRegs.
clear();
3400 for (
auto &MIB : NewInsts)
3403 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
3405 MI.eraseFromParent();
3420 LLT NarrowTy0 = NarrowTy;
3437 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
3439 for (
unsigned I = 0;
I < NumParts; ++
I) {
3445 DstRegs.push_back(DstReg);
3453 MI.eraseFromParent();
3461 Register Src0Reg =
MI.getOperand(2).getReg();
3466 LLT NarrowTy0, NarrowTy1;
3472 NarrowTy0 = NarrowTy;
3482 NumParts = NarrowTy.
isVector() ? (OldElts / NewElts) :
3486 NarrowTy1 = NarrowTy;
3499 extractParts(
MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
3500 extractParts(
MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
3502 for (
unsigned I = 0;
I < NumParts; ++
I) {
3504 DstRegs.push_back(DstReg);
3506 if (
MI.getOpcode() == TargetOpcode::G_ICMP)
3520 MI.eraseFromParent();
3528 Register CondReg =
MI.getOperand(1).getReg();
3530 unsigned NumParts = 0;
3531 LLT NarrowTy0, NarrowTy1;
3540 NarrowTy0 = NarrowTy;
3546 if (
Size % NarrowSize != 0)
3549 NumParts =
Size / NarrowSize;
3569 NarrowTy1 = NarrowTy;
3575 extractParts(
MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
3577 extractParts(
MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
3578 extractParts(
MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
3580 for (
unsigned i = 0;
i < NumParts; ++
i) {
3583 Src1Regs[
i], Src2Regs[
i]);
3584 DstRegs.push_back(DstReg);
3592 MI.eraseFromParent();
3599 const Register DstReg =
MI.getOperand(0).getReg();
3606 int NumParts, NumLeftover;
3607 std::tie(NumParts, NumLeftover)
3615 const int TotalNumParts = NumParts + NumLeftover;
3618 for (
int I = 0;
I != TotalNumParts; ++
I) {
3619 LLT Ty =
I < NumParts ? NarrowTy : LeftoverTy;
3624 DstRegs.push_back(PartDstReg);
3626 LeftoverDstRegs.push_back(PartDstReg);
3631 insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
3636 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E;
I += 2) {
3638 LeftoverRegs.
clear();
3645 if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
3652 for (
int J = 0; J != TotalNumParts; ++J) {
3654 MIB.
addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
3659 MI.eraseFromParent();
3670 const int NumDst =
MI.getNumOperands() - 1;
3671 const Register SrcReg =
MI.getOperand(NumDst).getReg();
3677 if (DstTy == NarrowTy)
3681 if (DstTy == GCDTy) {
3689 const int PartsPerUnmerge = NumDst / NumUnmerge;
3691 for (
int I = 0;
I != NumUnmerge; ++
I) {
3694 for (
int J = 0; J != PartsPerUnmerge; ++J)
3695 MIB.
addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
3696 MIB.
addUse(Unmerge.getReg(
I));
3699 MI.eraseFromParent();
3707 Register Overflow =
MI.getOperand(1).getReg();
3725 const int PartsPerUnmerge = NumResult / NumOps;
3732 for (
int I = 0;
I != NumOps; ++
I) {
3733 Register Operand1 = UnmergeLHS->getOperand(
I).getReg();
3734 Register Operand2 = UnmergeRHS->getOperand(
I).getReg();
3736 {Operand1, Operand2});
3737 ResultParts.push_back(PartMul->getOperand(0).getReg());
3738 OverflowParts.push_back(PartMul->getOperand(1).getReg());
3741 LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts);
3746 buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts);
3747 buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts);
3748 MI.eraseFromParent();
3777 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E; ++
I)
3778 extractGCDType(Parts, GCDTy,
MI.getOperand(
I).getReg());
3781 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
3782 TargetOpcode::G_ANYEXT);
3785 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3787 MI.eraseFromParent();
3798 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3800 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
3802 InsertVal =
MI.getOperand(2).getReg();
3804 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
3819 MI.eraseFromParent();
3824 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
3827 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
3828 TargetOpcode::G_ANYEXT);
3833 int64_t PartIdx = IdxVal / NewNumElts;
3842 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
3843 VecParts[PartIdx] = InsertPart.getReg(0);
3847 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
3852 MI.eraseFromParent();
3879 bool IsLoad =
MI.getOpcode() == TargetOpcode::G_LOAD;
3881 Register AddrReg =
MI.getOperand(1).getReg();
3891 int NumLeftover = -1;
3897 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
3898 NarrowLeftoverRegs)) {
3899 NumParts = NarrowRegs.size();
3900 NumLeftover = NarrowLeftoverRegs.size();
3917 unsigned Offset) ->
unsigned {
3920 for (
unsigned Idx = 0,
E = NumParts; Idx !=
E &&
Offset < TotalSize;
3921 Offset += PartSize, ++Idx) {
3922 unsigned ByteSize = PartSize / 8;
3923 unsigned ByteOffset =
Offset / 8;
3933 ValRegs.push_back(Dst);
3943 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
3947 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
3950 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
3951 LeftoverTy, NarrowLeftoverRegs);
3954 MI.eraseFromParent();
3961 assert(TypeIdx == 0 &&
"only one type index expected");
3963 const unsigned Opc =
MI.getOpcode();
3964 const int NumOps =
MI.getNumOperands() - 1;
3965 const Register DstReg =
MI.getOperand(0).getReg();
3966 const unsigned Flags =
MI.getFlags();
3970 assert(NumOps <= 3 &&
"expected instruction with 1 result and 1-3 sources");
3984 for (
int I = 0;
I != NumOps; ++
I) {
3992 OpNarrowTy = NarrowScalarTy;
4005 LLT GCDTy = extractGCDType(ExtractedRegs[
I], SrcTy, OpNarrowTy, SrcReg);
4008 buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[
I],
4009 TargetOpcode::G_ANYEXT);
4017 int NumParts = ExtractedRegs[0].size();
4022 LLT DstLCMTy, NarrowDstTy;
4024 DstLCMTy =
getLCMType(DstScalarTy, NarrowScalarTy);
4025 NarrowDstTy = NarrowScalarTy;
4028 NarrowDstTy = NarrowTy;
4033 const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
4035 for (
int I = 0;
I != NumRealParts; ++
I) {
4037 for (
int J = 0; J != NumOps; ++J)
4038 InputRegs[J] = ExtractedRegs[J][
I];
4041 ResultRegs.push_back(Inst.getReg(0));
4046 int NumUndefParts = NumParts - NumRealParts;
4047 if (NumUndefParts != 0)
4048 ResultRegs.
append(NumUndefParts,
4057 MergeDstReg = DstReg;
4059 buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs);
4065 MI.eraseFromParent();
4074 int64_t Imm =
MI.getOperand(2).getImm();
4079 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
4080 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
4085 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
4087 MI.eraseFromParent();
4094 using namespace TargetOpcode;
4096 switch (
MI.getOpcode()) {
4097 case G_IMPLICIT_DEF:
4114 case G_FCANONICALIZE:
4129 case G_INTRINSIC_ROUND:
4130 case G_INTRINSIC_ROUNDEVEN:
4131 case G_INTRINSIC_TRUNC:
4147 case G_FMINNUM_IEEE:
4148 case G_FMAXNUM_IEEE:
4168 case G_CTLZ_ZERO_UNDEF:
4170 case G_CTTZ_ZERO_UNDEF:
4185 case G_ADDRSPACE_CAST:
4194 case G_UNMERGE_VALUES:
4196 case G_BUILD_VECTOR:
4197 assert(TypeIdx == 0 &&
"not a vector type index");
4199 case G_CONCAT_VECTORS:
4203 case G_EXTRACT_VECTOR_ELT:
4204 case G_INSERT_VECTOR_ELT:
4220 unsigned Opc =
MI.getOpcode();
4221 assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
4222 Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
4223 "Sequential reductions not expected");
4240 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
4242 for (
unsigned Part = 0; Part < NumParts; ++Part) {
4243 PartialReductions.push_back(
4249 case TargetOpcode::G_VECREDUCE_FADD:
4250 ScalarOpc = TargetOpcode::G_FADD;
4252 case TargetOpcode::G_VECREDUCE_FMUL:
4253 ScalarOpc = TargetOpcode::G_FMUL;
4255 case TargetOpcode::G_VECREDUCE_FMAX:
4256 ScalarOpc = TargetOpcode::G_FMAXNUM;
4258 case TargetOpcode::G_VECREDUCE_FMIN:
4259 ScalarOpc = TargetOpcode::G_FMINNUM;
4261 case TargetOpcode::G_VECREDUCE_ADD:
4262 ScalarOpc = TargetOpcode::G_ADD;
4264 case TargetOpcode::G_VECREDUCE_MUL:
4265 ScalarOpc = TargetOpcode::G_MUL;
4267 case TargetOpcode::G_VECREDUCE_AND:
4268 ScalarOpc = TargetOpcode::G_AND;
4270 case TargetOpcode::G_VECREDUCE_OR:
4271 ScalarOpc = TargetOpcode::G_OR;
4273 case TargetOpcode::G_VECREDUCE_XOR:
4274 ScalarOpc = TargetOpcode::G_XOR;
4276 case TargetOpcode::G_VECREDUCE_SMAX:
4277 ScalarOpc = TargetOpcode::G_SMAX;
4279 case TargetOpcode::G_VECREDUCE_SMIN:
4280 ScalarOpc = TargetOpcode::G_SMIN;
4282 case TargetOpcode::G_VECREDUCE_UMAX:
4283 ScalarOpc = TargetOpcode::G_UMAX;
4285 case TargetOpcode::G_VECREDUCE_UMIN:
4286 ScalarOpc = TargetOpcode::G_UMIN;
4289 LLVM_DEBUG(
dbgs() <<
"Can't legalize: unknown reduction kind.\n");
4297 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4300 Register Acc = PartialReductions[0];
4301 for (
unsigned Part = 1; Part < NumParts; ++Part) {
4302 if (Part == NumParts - 1) {
4304 {Acc, PartialReductions[Part]});
4307 .
buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4311 MI.eraseFromParent();
4318 unsigned ScalarOpc) {
4321 extractParts(SrcReg, NarrowTy,
4325 while (SplitSrcs.size() > 1) {
4327 for (
unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
4333 PartialRdxs.push_back(Res);
4339 MI.getOperand(1).setReg(SplitSrcs[0]);
4346 const LLT HalfTy,
const LLT AmtTy) {
4354 MI.eraseFromParent();
4360 unsigned VTBits = 2 * NVTBits;
4363 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
4364 if (Amt.
ugt(VTBits)) {
4366 }
else if (Amt.
ugt(NVTBits)) {
4370 }
else if (Amt == NVTBits) {
4381 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
4382 if (Amt.
ugt(VTBits)) {
4384 }
else if (Amt.
ugt(NVTBits)) {
4388 }
else if (Amt == NVTBits) {
4402 if (Amt.
ugt(VTBits)) {
4405 }
else if (Amt.
ugt(NVTBits)) {
4410 }
else if (Amt == NVTBits) {
4427 MI.eraseFromParent();
4451 if (DstEltSize % 2 != 0)
4457 const unsigned NewBitSize = DstEltSize / 2;
4464 MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
4484 switch (
MI.getOpcode()) {
4485 case TargetOpcode::G_SHL: {
4501 ResultRegs[0] =
Lo.getReg(0);
4502 ResultRegs[1] =
Hi.getReg(0);
4505 case TargetOpcode::G_LSHR:
4506 case TargetOpcode::G_ASHR: {
4516 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
4530 ResultRegs[0] =
Lo.getReg(0);
4531 ResultRegs[1] =
Hi.getReg(0);
4539 MI.eraseFromParent();
4546 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
4549 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E;
I += 2) {
4565 unsigned Opc =
MI.getOpcode();
4567 case TargetOpcode::G_IMPLICIT_DEF:
4568 case TargetOpcode::G_LOAD: {
4576 case TargetOpcode::G_STORE:
4583 case TargetOpcode::G_AND:
4584 case TargetOpcode::G_OR:
4585 case TargetOpcode::G_XOR:
4586 case TargetOpcode::G_SMIN:
4587 case TargetOpcode::G_SMAX:
4588 case TargetOpcode::G_UMIN:
4589 case TargetOpcode::G_UMAX:
4590 case TargetOpcode::G_FMINNUM:
4591 case TargetOpcode::G_FMAXNUM:
4592 case TargetOpcode::G_FMINNUM_IEEE:
4593 case TargetOpcode::G_FMAXNUM_IEEE:
4594 case TargetOpcode::G_FMINIMUM:
4595 case TargetOpcode::G_FMAXIMUM: {
4603 case TargetOpcode::G_EXTRACT:
4610 case TargetOpcode::G_INSERT:
4611 case TargetOpcode::G_FREEZE:
4619 case TargetOpcode::G_SELECT:
4631 case TargetOpcode::G_UNMERGE_VALUES: {
4636 int NumDst =
MI.getNumOperands() - 1;
4640 for (
int I = 0;
I != NumDst; ++
I)
4644 for (
int I = NumDst;
I != NewNumDst; ++
I)
4647 MIB.addUse(
MI.getOperand(NumDst).getReg());
4648 MI.eraseFromParent();
4651 case TargetOpcode::G_PHI:
4663 unsigned SrcParts = Src1Regs.
size();
4664 unsigned DstParts = DstRegs.size();
4666 unsigned DstIdx = 0;
4668 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
4669 DstRegs[DstIdx] = FactorSum;
4671 unsigned CarrySumPrevDstIdx;
4674 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
4676 for (
unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
4679 B.buildMul(NarrowTy, Src1Regs[DstIdx -
i], Src2Regs[
i]);
4680 Factors.push_back(
Mul.getReg(0));
4683 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
4684 i <=
std::min(DstIdx - 1, SrcParts - 1); ++
i) {
4686 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 -
i], Src2Regs[
i]);
4687 Factors.push_back(Umulh.
getReg(0));
4691 Factors.push_back(CarrySumPrevDstIdx);
4696 if (DstIdx != DstParts - 1) {
4698 B.buildUAddo(NarrowTy,
LLT::scalar(1), Factors[0], Factors[1]);
4699 FactorSum = Uaddo.
getReg(0);
4700 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).
getReg(0);
4701 for (
unsigned i = 2;
i < Factors.size(); ++
i) {
4703 B.buildUAddo(NarrowTy,
LLT::scalar(1), FactorSum, Factors[
i]);
4704 FactorSum = Uaddo.
getReg(0);
4706 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
4710 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
4711 for (
unsigned i = 2;
i < Factors.size(); ++
i)
4712 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[
i]).getReg(0);
4715 CarrySumPrevDstIdx = CarrySum;
4716 DstRegs[DstIdx] = FactorSum;
4738 if (SizeOp0 % NarrowSize != 0)
4744 unsigned Opcode =
MI.getOpcode();
4745 unsigned OpO, OpE, OpF;
4747 case TargetOpcode::G_SADDO:
4748 case TargetOpcode::G_SADDE:
4749 case TargetOpcode::G_UADDO:
4750 case TargetOpcode::G_UADDE:
4751 case TargetOpcode::G_ADD:
4752 OpO = TargetOpcode::G_UADDO;
4753 OpE = TargetOpcode::G_UADDE;
4754 OpF = TargetOpcode::G_UADDE;
4755 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
4756 OpF = TargetOpcode::G_SADDE;
4758 case TargetOpcode::G_SSUBO:
4759 case TargetOpcode::G_SSUBE:
4760 case TargetOpcode::G_USUBO:
4761 case TargetOpcode::G_USUBE:
4762 case TargetOpcode::G_SUB:
4763 OpO = TargetOpcode::G_USUBO;
4764 OpE = TargetOpcode::G_USUBE;
4765 OpF = TargetOpcode::G_USUBE;
4766 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
4767 OpF = TargetOpcode::G_SSUBE;
4774 unsigned NumDefs =
MI.getNumExplicitDefs();
4775 Register Src1 =
MI.getOperand(NumDefs).getReg();
4776 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
4779 CarryDst =
MI.getOperand(1).getReg();
4781 if (
MI.getNumOperands() == NumDefs + 3)
4782 CarryIn =
MI.getOperand(NumDefs + 2).getReg();