49#include "llvm/IR/IntrinsicsPowerPC.h"
72#define DEBUG_TYPE "ppc-isel"
73#define PASS_NAME "PowerPC DAG->DAG Pattern Instruction Selection"
76 "Number of (sext(setcc)) nodes expanded into GPR sequence.");
78 "Number of (zext(setcc)) nodes expanded into GPR sequence.");
80 "Number of sign extensions for compare inputs added.");
82 "Number of zero extensions for compare inputs added.");
84 "Number of logical ops on i1 values calculated in GPR.");
86 "Number of compares not eliminated as they have non-extending uses.");
88 "Number of compares lowered to setb.");
96 cl::desc(
"use aggressive ppc isel for bit permutations"),
99 "ppc-bit-perm-rewriter-stress-rotates",
100 cl::desc(
"stress rotate selection in aggressive ppc isel for "
105 "ppc-use-branch-hint",
cl::init(
true),
106 cl::desc(
"Enable static hinting of branches on ppc"),
111 cl::desc(
"Enable tls optimization peephole"),
120 cl::desc(
"Specify the types of comparisons to emit GPR-only code for."),
126 "Only comparisons where inputs don't need [sz]ext."),
129 "Only i32 comparisons with zext result."),
131 "Only i64 comparisons with zext result."),
134 "Only i32 comparisons with sext result."),
136 "Only i64 comparisons with sext result.")));
147 unsigned GlobalBaseReg = 0;
152 PPCDAGToDAGISel() =
delete;
162 if (Subtarget->hasROPProtect()) {
181 inline SDValue getI16Imm(
unsigned Imm,
const SDLoc &dl) {
182 return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
187 inline SDValue getI32Imm(
unsigned Imm,
const SDLoc &dl) {
188 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
194 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
199 return CurDAG->getTargetConstant(
200 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
205 static bool isRotateAndMask(
SDNode *
N,
unsigned Mask,
bool isShiftMask,
206 unsigned &SH,
unsigned &MB,
unsigned &ME);
210 SDNode *getGlobalBaseReg();
218 bool tryBitfieldInsert(
SDNode *
N);
219 bool tryBitPermutation(
SDNode *
N);
220 bool tryIntCompareInGPR(
SDNode *
N);
252 return PPCLowering->SelectOptimalAddrMode(Parent,
N, Disp,
Base, *CurDAG,
260 return PPCLowering->SelectOptimalAddrMode(Parent,
N, Disp,
Base, *CurDAG,
268 return PPCLowering->SelectOptimalAddrMode(Parent,
N, Disp,
Base, *CurDAG,
276 return PPCLowering->SelectOptimalAddrMode(Parent,
N, Disp,
Base, *CurDAG,
283 return PPCLowering->SelectOptimalAddrMode(Parent,
N, Disp,
Base, *CurDAG,
291 return PPCLowering->SelectOptimalAddrMode(Parent,
N, Disp,
Base, *CurDAG,
299 return PPCLowering->SelectForceXFormMode(
N, Disp,
Base, *CurDAG) ==
310 return PPCLowering->SelectAddressRegReg(
N,
Base,
Index, *CurDAG,
321 return PPCLowering->SelectAddressRegReg(
N,
Base,
Index, *CurDAG,
332 return PPCLowering->SelectAddressRegReg(
N,
Base,
Index, *CurDAG,
339 return PPCLowering->SelectAddressRegRegOnly(
N,
Base,
Index, *CurDAG);
348 return PPCLowering->SelectAddressRegImm(
N, Disp,
Base, *CurDAG,
356 return PPCLowering->SelectAddressRegImm(
N, Disp,
Base, *CurDAG,
Align(4));
363 return PPCLowering->SelectAddressRegImm(
N, Disp,
Base, *CurDAG,
371 return PPCLowering->SelectAddressRegImm34(
N, Disp,
Base, *CurDAG);
381 return PPCLowering->SelectAddressPCRel(
N,
Base);
391 std::vector<SDValue> &OutOps)
override {
392 switch(ConstraintID) {
394 errs() <<
"ConstraintID: "
408 SDValue RC = CurDAG->getTargetConstant(TRC->
getID(), dl, MVT::i32);
410 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
411 dl,
Op.getValueType(),
414 OutOps.push_back(NewOp);
421#include "PPCGenDAGISel.inc"
425 bool tryFoldSWTestBRCC(
SDNode *
N);
426 bool trySelectLoopCountIntrinsic(
SDNode *
N);
427 bool tryAsSingleRLDICL(
SDNode *
N);
428 bool tryAsSingleRLDCL(
SDNode *
N);
429 bool tryAsSingleRLDICR(
SDNode *
N);
430 bool tryAsSingleRLWINM(
SDNode *
N);
431 bool tryAsSingleRLWINM8(
SDNode *
N);
432 bool tryAsSingleRLWIMI(
SDNode *
N);
433 bool tryAsPairOfRLDICL(
SDNode *
N);
434 bool tryAsSingleRLDIMI(
SDNode *
N);
436 void PeepholePPC64();
437 void PeepholePPC64ZExt();
438 void PeepholeCROps();
443 bool AllUsersSelectZero(
SDNode *
N);
444 void SwapAllSelectUsers(
SDNode *
N);
446 bool isOffsetMultipleOf(
SDNode *
N,
unsigned Val)
const;
452char PPCDAGToDAGISel::ID = 0;
459SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
460 if (!GlobalBaseReg) {
468 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
471 if (!Subtarget->isSecurePlt() &&
481 TII.get(PPC::UpdateGBR), GlobalBaseReg)
487 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
507 return CurDAG->getRegister(GlobalBaseReg,
508 PPCLowering->getPointerTy(CurDAG->getDataLayout()))
530 "GlobalVariables with an alignment requirement stricter than TOC entry "
531 "size not supported by the toc data transformation.");
535 assert(GVType->
isSized() &&
"A GlobalVariable's size must be known to be "
536 "supported by the toc data transformation.");
540 "supported by the toc data transformation.");
544 "supported by the toc data transformation.");
548 "supported by the toc data transformation.");
551 "A GlobalVariable with size larger than a TOC entry is not currently "
552 "supported by the toc data transformation.");
556 "currently supported by the toc data transformation.");
559 "Tentative definitions cannot have the mapping class XMC_TD.");
567 if (
N->getOpcode() ==
ISD::Constant &&
N->getValueType(0) == MVT::i32) {
568 Imm = cast<ConstantSDNode>(
N)->getZExtValue();
577 if (
N->getOpcode() ==
ISD::Constant &&
N->getValueType(0) == MVT::i64) {
578 Imm = cast<ConstantSDNode>(
N)->getZExtValue();
599 assert(isa<BasicBlockSDNode>(DestMBB));
629 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
633 <<
"::" << BB->
getName() <<
"'\n"
634 <<
" -> " <<
TBB->
getName() <<
": " << TProb <<
"\n"
635 <<
" -> " << FBB->
getName() <<
": " << FProb <<
"\n");
651 return N->getOpcode() == Opc
657 int FI = cast<FrameIndexSDNode>(
N)->getIndex();
658 SDValue TFI = CurDAG->getTargetFrameIndex(FI,
N->getValueType(0));
659 unsigned Opc =
N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
661 CurDAG->SelectNodeTo(SN, Opc,
N->getValueType(0), TFI,
662 getSmallIPtrImm(
Offset, dl));
664 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl,
N->getValueType(0), TFI,
665 getSmallIPtrImm(
Offset, dl)));
668bool PPCDAGToDAGISel::isRotateAndMask(
SDNode *
N,
unsigned Mask,
669 bool isShiftMask,
unsigned &SH,
670 unsigned &MB,
unsigned &ME) {
673 if (
N->getValueType(0) != MVT::i32)
677 unsigned Indeterminant = ~0;
678 unsigned Opcode =
N->getOpcode();
679 if (
N->getNumOperands() != 2 ||
685 if (isShiftMask)
Mask =
Mask << Shift;
687 Indeterminant = ~(0xFFFFFFFFu << Shift);
690 if (isShiftMask)
Mask =
Mask >> Shift;
692 Indeterminant = ~(0xFFFFFFFFu >> Shift);
702 if (Mask && !(Mask & Indeterminant)) {
715 "Only expecting the ADD_TLS instruction to acquire the thread pointer!");
719 unsigned ADDTLSOp1Opcode = ADDTLSOp1.getOpcode();
732 LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSOp1);
744 dyn_cast_or_null<RegisterSDNode>(ADDTLSOp1.getNode());
766 for (
auto *ADDTLSUse :
Base.getNode()->uses()) {
770 if (
LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSUse)) {
771 if (LD->getSrcValueOffset() != 0 || !LD->getOffset().isUndef())
773 }
else if (
StoreSDNode *ST = dyn_cast<StoreSDNode>(ADDTLSUse)) {
774 if (ST->getSrcValueOffset() != 0 || !ST->getOffset().isUndef())
789bool PPCDAGToDAGISel::tryTLSXFormStore(
StoreSDNode *ST) {
795 EVT MemVT =
ST->getMemoryVT();
796 EVT RegVT =
ST->getValue().getValueType();
803 Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
807 Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
811 Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
815 Opcode = PPC::STDXTLS;
819 Opcode = PPC::STFSXTLS;
823 Opcode = PPC::STFDXTLS;
831 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
832 transferMemOperands(ST, MN);
837bool PPCDAGToDAGISel::tryTLSXFormLoad(
LoadSDNode *LD) {
843 EVT MemVT =
LD->getMemoryVT();
844 EVT RegVT =
LD->getValueType(0);
851 Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
855 if (RegVT == MVT::i32)
856 Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32;
858 Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS;
862 if (RegVT == MVT::i32)
863 Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32;
865 Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS;
869 Opcode = PPC::LDXTLS;
873 Opcode = PPC::LFSXTLS;
877 Opcode = PPC::LFDXTLS;
884 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
885 transferMemOperands(LD, MN);
892bool PPCDAGToDAGISel::tryBitfieldInsert(
SDNode *
N) {
897 KnownBits LKnown = CurDAG->computeKnownBits(Op0);
898 KnownBits RKnown = CurDAG->computeKnownBits(Op1);
903 if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
906 unsigned Value, SH = 0;
907 TargetMask = ~TargetMask;
908 InsertMask = ~InsertMask;
956 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
958 ReplaceNode(
N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
966 unsigned MaxTruncation = 0;
973 Use->isMachineOpcode() ?
Use->getMachineOpcode() :
Use->getOpcode();
977 if (
Use->isMachineOpcode())
980 std::max(MaxTruncation, (
unsigned)
Use->getValueType(0).getSizeInBits());
983 if (
Use->isMachineOpcode())
989 MaxTruncation = std::max(MaxTruncation, MemVTSize);
998 MaxTruncation = std::max(MaxTruncation, 32u);
1006 MaxTruncation = std::max(MaxTruncation, 16u);
1014 MaxTruncation = std::max(MaxTruncation, 8u);
1018 return MaxTruncation;
1024 unsigned HiTZ = llvm::countr_zero<uint32_t>(
Hi_32(Imm));
1025 unsigned LoLZ = llvm::countl_zero<uint32_t>(
Lo_32(Imm));
1026 if ((HiTZ + LoLZ) >= Num)
1034 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1035 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1036 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1037 unsigned LO = llvm::countl_one<uint64_t>(Imm);
1038 unsigned Hi32 =
Hi_32(Imm);
1039 unsigned Lo32 =
Lo_32(Imm);
1040 SDNode *Result =
nullptr;
1043 auto getI32Imm = [CurDAG, dl](
unsigned Imm) {
1051 if (isInt<16>(Imm)) {
1057 if (TZ > 15 && (LZ > 32 || LO > 32))
1059 getI32Imm((Imm >> 16) & 0xffff));
1063 assert(LZ < 64 &&
"Unexpected leading zeros here.");
1065 unsigned FO = llvm::countl_one<uint64_t>(Imm << LZ);
1068 if (isInt<32>(Imm)) {
1069 uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
1070 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1071 Result = CurDAG->
getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1073 getI32Imm(Imm & 0xffff));
1081 if ((LZ + FO + TZ) > 48) {
1083 getI32Imm((Imm >> TZ) & 0xffff));
1085 getI32Imm(TZ), getI32Imm(LZ));
1102 if ((LZ + TO) > 48) {
1106 assert(LZ <= 32 &&
"Unexpected shift value.");
1108 getI32Imm((Imm >> (48 - LZ) & 0xffff)));
1110 getI32Imm(48 - LZ), getI32Imm(LZ));
1128 if ((LZ + FO + TO) > 48) {
1130 getI32Imm((Imm >> TO) & 0xffff));
1132 getI32Imm(TO), getI32Imm(LZ));
1138 if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
1140 getI32Imm(Lo32 & 0xffff));
1142 getI32Imm(Lo32 >> 16));
1166 getI32Imm(RotImm & 0xffff));
1168 getI32Imm(Shift), getI32Imm(0));
1175 uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
1177 if (isInt<16>(Lo32))
1179 CurDAG->
getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(ImmLo16));
1182 CurDAG->
getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1186 CurDAG->
getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1188 SDValue(Result, 0), getI32Imm(ImmLo16));
1205 if ((LZ + FO + TZ) > 32) {
1206 uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
1207 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1208 Result = CurDAG->
getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1210 getI32Imm((Imm >> TZ) & 0xffff));
1212 getI32Imm(TZ), getI32Imm(LZ));
1219 if ((LZ + TO) > 32) {
1223 assert(LZ <= 32 &&
"Unexpected shift value.");
1225 getI32Imm((Imm >> (48 - LZ)) & 0xffff));
1227 getI32Imm((Imm >> (32 - LZ)) & 0xffff));
1229 getI32Imm(32 - LZ), getI32Imm(LZ));
1237 if ((LZ + FO + TO) > 32) {
1239 getI32Imm((Imm >> (TO + 16)) & 0xffff));
1241 getI32Imm((Imm >> TO) & 0xffff));
1243 getI32Imm(TO), getI32Imm(LZ));
1255 uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
1256 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1257 Result = CurDAG->
getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1259 getI32Imm(RotImm & 0xffff));
1261 getI32Imm(Shift), getI32Imm(0));
1275 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1276 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1277 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1278 unsigned FO = llvm::countl_one<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
1279 unsigned Hi32 =
Hi_32(Imm);
1280 unsigned Lo32 =
Lo_32(Imm);
1282 auto getI32Imm = [CurDAG, dl](
unsigned Imm) {
1286 auto getI64Imm = [CurDAG, dl](
uint64_t Imm) {
1301 SDNode *Result =
nullptr;
1308 if ((LZ + FO + TZ) > 30) {
1309 APInt SignedInt34 =
APInt(34, (Imm >> TZ) & 0x3ffffffff);
1312 getI64Imm(*Extended.getRawData()));
1314 getI32Imm(TZ), getI32Imm(LZ));
1330 if ((LZ + TO) > 30) {
1331 APInt SignedInt34 =
APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
1334 getI64Imm(*Extended.getRawData()));
1336 getI32Imm(30 - LZ), getI32Imm(LZ));
1343 if ((LZ + FO + TO) > 30) {
1344 APInt SignedInt34 =
APInt(34, (Imm >> TO) & 0x3ffffffff);
1347 getI64Imm(*Extended.getRawData()));
1349 getI32Imm(TO), getI32Imm(LZ));
1361 for (
unsigned Shift = 0; Shift < 63; ++Shift) {
1363 if (isInt<34>(RotImm)) {
1365 CurDAG->
getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
1367 SDValue(Result, 0), getI32Imm(Shift),
1375 Result = CurDAG->
getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1385 CurDAG->
getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1387 CurDAG->
getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
1394 unsigned *InstCnt =
nullptr) {
1395 unsigned InstCntDirect = 0;
1406 if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
1407 unsigned InstCntDirectP = 0;
1414 if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
1416 *InstCnt = InstCntDirectP;
1423 *InstCnt = InstCntDirect;
1426 auto getI32Imm = [CurDAG, dl](
unsigned Imm) {
1435 if (Hi16OfLo32 && Lo16OfLo32) {
1438 bool IsSelected =
false;
1442 CurDAG->
getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi16));
1444 SDValue(Result, 0), getI32Imm(Lo16));
1450 if (Hi16OfHi32 == Lo16OfHi32 && Lo16OfHi32 == Lo16OfLo32) {
1452 Result = getSplat(Hi16OfLo32, Lo16OfLo32);
1457 }
else if (Hi16OfHi32 == Hi16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1459 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1462 getI32Imm(16), getI32Imm(31)};
1463 Result = CurDAG->
getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1464 }
else if (Lo16OfHi32 == Lo16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1466 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1469 getI32Imm(0), getI32Imm(15)};
1470 Result = CurDAG->
getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1472 if (IsSelected ==
true) {
1485 SDValue(Result, 0), getI32Imm(Hi16OfLo32));
1490 getI32Imm(Lo16OfLo32));
1494 *InstCnt = InstCntDirect;
1503 int64_t Imm = cast<ConstantSDNode>(
N)->getZExtValue();
1507 if (isInt<16>(SextImm))
1515class BitPermutationSelector {
1530 VariableKnownToBeZero
1533 ValueBit(
SDValue V,
unsigned I, Kind K = Variable)
1535 ValueBit(Kind K = Variable) :
Idx(UINT32_MAX),
K(
K) {}
1538 return K == ConstZero ||
K == VariableKnownToBeZero;
1541 bool hasValue()
const {
1542 return K == Variable ||
K == VariableKnownToBeZero;
1546 assert(hasValue() &&
"Cannot get the value of a constant bit");
1550 unsigned getValueBitIndex()
const {
1551 assert(hasValue() &&
"Cannot get the value bit index of a constant bit");
1560 unsigned StartIdx, EndIdx;
1570 bool Repl32Coalesced;
1572 BitGroup(
SDValue V,
unsigned R,
unsigned S,
unsigned E)
1573 :
V(
V), RLAmt(
R), StartIdx(S), EndIdx(
E), Repl32(
false), Repl32CR(
false),
1574 Repl32Coalesced(
false) {
1575 LLVM_DEBUG(
dbgs() <<
"\tbit group for " <<
V.getNode() <<
" RLAmt = " << R
1576 <<
" [" << S <<
", " <<
E <<
"]\n");
1582 struct ValueRotInfo {
1584 unsigned RLAmt = std::numeric_limits<unsigned>::max();
1585 unsigned NumGroups = 0;
1586 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1587 bool Repl32 =
false;
1589 ValueRotInfo() =
default;
1597 if (Repl32 <
Other.Repl32)
1599 else if (Repl32 >
Other.Repl32)
1601 else if (NumGroups >
Other.NumGroups)
1603 else if (NumGroups <
Other.NumGroups)
1605 else if (RLAmt == 0 &&
Other.RLAmt != 0)
1607 else if (RLAmt != 0 &&
Other.RLAmt == 0)
1609 else if (FirstGroupStartIdx <
Other.FirstGroupStartIdx)
1615 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1616 using ValueBitsMemoizer =
1618 ValueBitsMemoizer Memoizer;
1624 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(
SDValue V,
1626 auto &ValueEntry = Memoizer[
V];
1628 return std::make_pair(ValueEntry->first, &ValueEntry->second);
1629 ValueEntry.reset(
new ValueBitsMemoizedValue());
1630 bool &Interesting = ValueEntry->first;
1632 Bits.resize(NumBits);
1634 switch (
V.getOpcode()) {
1637 if (isa<ConstantSDNode>(
V.getOperand(1))) {
1638 unsigned RotAmt =
V.getConstantOperandVal(1);
1640 const auto &LHSBits = *getValueBits(
V.getOperand(0), NumBits).second;
1642 for (
unsigned i = 0; i < NumBits; ++i)
1643 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1645 return std::make_pair(Interesting =
true, &Bits);
1650 if (isa<ConstantSDNode>(
V.getOperand(1))) {
1651 unsigned ShiftAmt =
V.getConstantOperandVal(1);
1653 const auto &LHSBits = *getValueBits(
V.getOperand(0), NumBits).second;
1655 for (
unsigned i = ShiftAmt; i < NumBits; ++i)
1656 Bits[i] = LHSBits[i - ShiftAmt];
1658 for (
unsigned i = 0; i < ShiftAmt; ++i)
1659 Bits[i] = ValueBit(ValueBit::ConstZero);
1661 return std::make_pair(Interesting =
true, &Bits);
1666 if (isa<ConstantSDNode>(
V.getOperand(1))) {
1667 unsigned ShiftAmt =
V.getConstantOperandVal(1);
1669 const auto &LHSBits = *getValueBits(
V.getOperand(0), NumBits).second;
1671 for (
unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1672 Bits[i] = LHSBits[i + ShiftAmt];
1674 for (
unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1675 Bits[i] = ValueBit(ValueBit::ConstZero);
1677 return std::make_pair(Interesting =
true, &Bits);
1681 if (isa<ConstantSDNode>(
V.getOperand(1))) {
1689 std::tie(Interesting, LHSBits) = getValueBits(
V.getOperand(0), NumBits);
1691 for (
unsigned i = 0; i < NumBits; ++i)
1692 if (((Mask >> i) & 1) == 1)
1693 Bits[i] = (*LHSBits)[i];
1697 if ((*LHSBits)[i].
isZero())
1698 Bits[i] = (*LHSBits)[i];
1700 Bits[i] = ValueBit(ValueBit::ConstZero);
1703 return std::make_pair(Interesting, &Bits);
1707 const auto &LHSBits = *getValueBits(
V.getOperand(0), NumBits).second;
1708 const auto &RHSBits = *getValueBits(
V.getOperand(1), NumBits).second;
1710 bool AllDisjoint =
true;
1712 unsigned LastIdx = 0;
1713 for (
unsigned i = 0; i < NumBits; ++i) {
1721 if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1722 LHSBits[i].getValueBitIndex() == LastIdx + 1)
1723 Bits[i] = LHSBits[i];
1724 else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1725 RHSBits[i].getValueBitIndex() == LastIdx + 1)
1726 Bits[i] = RHSBits[i];
1728 Bits[i] = ValueBit(ValueBit::ConstZero);
1730 else if (LHSBits[i].
isZero())
1731 Bits[i] = RHSBits[i];
1732 else if (RHSBits[i].
isZero())
1733 Bits[i] = LHSBits[i];
1735 AllDisjoint =
false;
1739 if (Bits[i].hasValue()) {
1740 LastVal =
Bits[i].getValue();
1741 LastIdx =
Bits[i].getValueBitIndex();
1744 if (LastVal) LastVal =
SDValue();
1752 return std::make_pair(Interesting =
true, &Bits);
1756 if (
V.getValueType() != MVT::i64 ||
1757 V.getOperand(0).getValueType() != MVT::i32)
1761 const unsigned NumOperandBits = 32;
1762 std::tie(Interesting, LHSBits) = getValueBits(
V.getOperand(0),
1765 for (
unsigned i = 0; i < NumOperandBits; ++i)
1766 Bits[i] = (*LHSBits)[i];
1768 for (
unsigned i = NumOperandBits; i < NumBits; ++i)
1769 Bits[i] = ValueBit(ValueBit::ConstZero);
1771 return std::make_pair(Interesting, &Bits);
1775 EVT ToType =
V.getValueType();
1777 if (FromType != MVT::i64 || ToType != MVT::i32)
1779 const unsigned NumAllBits =
FromType.getSizeInBits();
1781 std::tie(Interesting, InBits) = getValueBits(
V.getOperand(0),
1787 bool UseUpper32bit =
false;
1788 for (
unsigned i = 0; i < NumValidBits; ++i)
1789 if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1790 UseUpper32bit =
true;
1796 for (
unsigned i = 0; i < NumValidBits; ++i)
1797 Bits[i] = (*InBits)[i];
1799 return std::make_pair(Interesting, &Bits);
1805 std::tie(Interesting, LHSBits) = getValueBits(
V.getOperand(0),
1808 EVT FromType = cast<VTSDNode>(
V.getOperand(1))->getVT();
1809 const unsigned NumValidBits =
FromType.getSizeInBits();
1810 for (
unsigned i = 0; i < NumValidBits; ++i)
1811 Bits[i] = (*LHSBits)[i];
1815 for (
unsigned i = NumValidBits; i < NumBits; ++i)
1816 Bits[i] = (*LHSBits)[i].hasValue()
1817 ? ValueBit((*LHSBits)[i].getValue(),
1818 (*LHSBits)[i].getValueBitIndex(),
1819 ValueBit::VariableKnownToBeZero)
1820 : ValueBit(ValueBit::ConstZero);
1822 return std::make_pair(Interesting, &Bits);
1827 EVT VT =
LD->getMemoryVT();
1830 for (
unsigned i = 0; i < NumValidBits; ++i)
1831 Bits[i] = ValueBit(V, i);
1834 for (
unsigned i = NumValidBits; i < NumBits; ++i)
1835 Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1839 return std::make_pair(Interesting =
false, &Bits);
1844 for (
unsigned i = 0; i < NumBits; ++i)
1845 Bits[i] = ValueBit(V, i);
1847 return std::make_pair(Interesting =
false, &Bits);
1852 void computeRotationAmounts() {
1854 RLAmt.resize(
Bits.size());
1855 for (
unsigned i = 0; i <
Bits.size(); ++i)
1856 if (Bits[i].hasValue()) {
1857 unsigned VBI =
Bits[i].getValueBitIndex();
1861 RLAmt[i] =
Bits.size() - (VBI - i);
1862 }
else if (Bits[i].
isZero()) {
1864 RLAmt[i] = UINT32_MAX;
1873 void collectBitGroups(
bool LateMask) {
1876 unsigned LastRLAmt = RLAmt[0];
1878 unsigned LastGroupStartIdx = 0;
1879 bool IsGroupOfZeros = !
Bits[LastGroupStartIdx].hasValue();
1880 for (
unsigned i = 1; i <
Bits.size(); ++i) {
1881 unsigned ThisRLAmt = RLAmt[i];
1883 if (LateMask && !ThisValue) {
1884 ThisValue = LastValue;
1885 ThisRLAmt = LastRLAmt;
1888 if (BitGroups.empty())
1889 LastGroupStartIdx = 0;
1896 if (IsGroupOfZeros && Bits[i].
isZero())
1901 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1904 if (!(IsGroupOfZeros && ThisValue && !Bits[i].
isZero()))
1908 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1910 LastRLAmt = ThisRLAmt;
1911 LastValue = ThisValue;
1912 LastGroupStartIdx = i;
1913 IsGroupOfZeros = !
Bits[LastGroupStartIdx].hasValue();
1916 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1919 if (BitGroups.empty())
1923 if (BitGroups.size() > 1) {
1927 if (BitGroups[0].StartIdx == 0 &&
1928 BitGroups[BitGroups.size()-1].EndIdx ==
Bits.size()-1 &&
1929 BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1930 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1931 LLVM_DEBUG(
dbgs() <<
"\tcombining final bit group with initial one\n");
1932 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1933 BitGroups.erase(BitGroups.begin());
1943 void collectValueRotInfo() {
1946 for (
auto &BG : BitGroups) {
1947 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1948 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1950 VRI.RLAmt = BG.RLAmt;
1951 VRI.Repl32 = BG.Repl32;
1953 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1958 ValueRotsVec.clear();
1959 for (
auto &
I : ValueRots) {
1960 ValueRotsVec.push_back(
I.second);
1973 void assignRepl32BitGroups() {
1984 auto IsAllLow32 = [
this](BitGroup & BG) {
1985 if (BG.StartIdx <= BG.EndIdx) {
1986 for (
unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1987 if (!Bits[i].hasValue())
1989 if (Bits[i].getValueBitIndex() >= 32)
1993 for (
unsigned i = BG.StartIdx; i <
Bits.size(); ++i) {
1994 if (!Bits[i].hasValue())
1996 if (Bits[i].getValueBitIndex() >= 32)
1999 for (
unsigned i = 0; i <= BG.EndIdx; ++i) {
2000 if (!Bits[i].hasValue())
2002 if (Bits[i].getValueBitIndex() >= 32)
2010 for (
auto &BG : BitGroups) {
2014 if (BG.RLAmt == 0) {
2015 auto PotentiallyMerged = [
this](BitGroup & BG) {
2016 for (
auto &BG2 : BitGroups)
2017 if (&BG != &BG2 && BG.V == BG2.V &&
2018 (BG2.RLAmt == 0 || BG2.RLAmt == 32))
2022 if (!PotentiallyMerged(BG))
2025 if (BG.StartIdx < 32 && BG.EndIdx < 32) {
2026 if (IsAllLow32(BG)) {
2027 if (BG.RLAmt >= 32) {
2035 << BG.V.getNode() <<
" RLAmt = " << BG.RLAmt <<
" ["
2036 << BG.StartIdx <<
", " << BG.EndIdx <<
"]\n");
2042 for (
auto I = BitGroups.begin();
I != BitGroups.end();) {
2045 auto IP = (
I == BitGroups.begin()) ?
2046 std::prev(BitGroups.end()) : std::prev(
I);
2047 if (
I->Repl32 && IP->Repl32 &&
I->V == IP->V &&
I->RLAmt == IP->RLAmt &&
2048 I->StartIdx == (IP->EndIdx + 1) % 64 &&
I != IP) {
2050 LLVM_DEBUG(
dbgs() <<
"\tcombining 32-bit replicated bit group for "
2051 <<
I->V.getNode() <<
" RLAmt = " <<
I->RLAmt <<
" ["
2052 <<
I->StartIdx <<
", " <<
I->EndIdx
2053 <<
"] with group with range [" << IP->StartIdx <<
", "
2054 << IP->EndIdx <<
"]\n");
2056 IP->EndIdx =
I->EndIdx;
2057 IP->Repl32CR = IP->Repl32CR ||
I->Repl32CR;
2058 IP->Repl32Coalesced =
true;
2059 I = BitGroups.erase(
I);
2068 if (
I->StartIdx == 32 &&
I->EndIdx == 63) {
2069 assert(std::next(
I) == BitGroups.end() &&
2070 "bit group ends at index 63 but there is another?");
2071 auto IN = BitGroups.begin();
2073 if (IP->Repl32 && IN->Repl32 &&
I->V == IP->V &&
I->V == IN->V &&
2074 (
I->RLAmt % 32) == IP->RLAmt && (
I->RLAmt % 32) == IN->RLAmt &&
2075 IP->EndIdx == 31 && IN->StartIdx == 0 &&
I != IP &&
2079 <<
" RLAmt = " <<
I->RLAmt <<
" [" <<
I->StartIdx
2080 <<
", " <<
I->EndIdx
2081 <<
"] with 32-bit replicated groups with ranges ["
2082 << IP->StartIdx <<
", " << IP->EndIdx <<
"] and ["
2083 << IN->StartIdx <<
", " << IN->EndIdx <<
"]\n");
2091 IP->Repl32CR = IP->Repl32CR ||
I->RLAmt >= 32;
2092 IP->Repl32Coalesced =
true;
2093 I = BitGroups.erase(
I);
2098 IP->EndIdx = IN->EndIdx;
2099 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR ||
I->RLAmt >= 32;
2100 IP->Repl32Coalesced =
true;
2101 I = BitGroups.erase(
I);
2102 BitGroups.erase(BitGroups.begin());
2117 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
2122 for (
unsigned i = 0; i <
Bits.size(); ++i) {
2123 if (Bits[i].hasValue())
2125 Mask |= (UINT64_C(1) << i);
2136 if (
V.getValueSizeInBits() == 64)
2139 assert(
V.getValueSizeInBits() == 32);
2140 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2141 SDValue ImDef =
SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
2143 SDValue ExtVal =
SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
2150 if (
V.getValueSizeInBits() == 32)
2153 assert(
V.getValueSizeInBits() == 64);
2154 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2155 SDValue SubVal =
SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
2156 MVT::i32, V, SubRegIdx), 0);
2163 void SelectAndParts32(
const SDLoc &dl,
SDValue &Res,
unsigned *InstCnt) {
2167 for (ValueRotInfo &VRI : ValueRotsVec) {
2169 for (
unsigned i = 0; i <
Bits.size(); ++i) {
2170 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
2172 if (RLAmt[i] != VRI.RLAmt)
2178 unsigned ANDIMask = (
Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2179 assert((ANDIMask != 0 || ANDISMask != 0) &&
2180 "No set bits in mask for value bit groups");
2181 bool NeedsRotate = VRI.RLAmt != 0;
2197 unsigned NumAndInsts = (
unsigned) NeedsRotate +
2198 (
unsigned) (ANDIMask != 0) +
2199 (
unsigned) (ANDISMask != 0) +
2200 (
unsigned) (ANDIMask != 0 && ANDISMask != 0) +
2201 (
unsigned) (
bool) Res;
2203 LLVM_DEBUG(
dbgs() <<
"\t\trotation groups for " << VRI.V.getNode()
2204 <<
" RL: " << VRI.RLAmt <<
":"
2205 <<
"\n\t\t\tisel using masking: " << NumAndInsts
2206 <<
" using rotates: " << VRI.NumGroups <<
"\n");
2208 if (NumAndInsts >= VRI.NumGroups)
2213 if (InstCnt) *InstCnt += NumAndInsts;
2218 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2219 getI32Imm(0, dl), getI32Imm(31, dl) };
2220 VRot =
SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2223 VRot = TruncateToInt32(VRI.V, dl);
2228 ANDIVal =
SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2229 VRot, getI32Imm(ANDIMask, dl)),
2233 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
2234 getI32Imm(ANDISMask, dl)),
2239 TotalVal = ANDISVal;
2243 TotalVal =
SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2244 ANDIVal, ANDISVal), 0);
2249 Res =
SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2254 eraseMatchingBitGroups([VRI](
const BitGroup &BG) {
2255 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2261 SDNode *Select32(
SDNode *
N,
bool LateMask,
unsigned *InstCnt) {
2265 if (InstCnt) *InstCnt = 0;
2268 SelectAndParts32(dl, Res, InstCnt);
2273 if ((!NeedMask || LateMask) && !Res) {
2274 ValueRotInfo &VRI = ValueRotsVec[0];
2276 if (InstCnt) *InstCnt += 1;
2278 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2279 getI32Imm(0, dl), getI32Imm(31, dl) };
2280 Res =
SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
2283 Res = TruncateToInt32(VRI.V, dl);
2287 eraseMatchingBitGroups([VRI](
const BitGroup &BG) {
2288 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2292 if (InstCnt) *InstCnt += BitGroups.size();
2295 for (
auto &BG : BitGroups) {
2298 { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2299 getI32Imm(
Bits.size() - BG.EndIdx - 1, dl),
2300 getI32Imm(
Bits.size() - BG.StartIdx - 1, dl) };
2301 Res =
SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2304 { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2305 getI32Imm(
Bits.size() - BG.EndIdx - 1, dl),
2306 getI32Imm(
Bits.size() - BG.StartIdx - 1, dl) };
2307 Res =
SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
2314 unsigned ANDIMask = (
Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2315 assert((ANDIMask != 0 || ANDISMask != 0) &&
2316 "No set bits in zeros mask?");
2318 if (InstCnt) *InstCnt += (
unsigned) (ANDIMask != 0) +
2320 (
unsigned) (ANDIMask != 0 && ANDISMask != 0);
2324 ANDIVal =
SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2325 Res, getI32Imm(ANDIMask, dl)),
2329 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
2330 getI32Imm(ANDISMask, dl)),
2338 Res =
SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2339 ANDIVal, ANDISVal), 0);
2345 unsigned SelectRotMask64Count(
unsigned RLAmt,
bool Repl32,
2346 unsigned MaskStart,
unsigned MaskEnd,
2350 unsigned InstMaskStart = 64 - MaskEnd - 1,
2351 InstMaskEnd = 64 - MaskStart - 1;
2356 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
2357 InstMaskEnd == 63 - RLAmt)
2366 bool Repl32,
unsigned MaskStart,
unsigned MaskEnd,
2367 unsigned *InstCnt =
nullptr) {
2370 unsigned InstMaskStart = 64 - MaskEnd - 1,
2371 InstMaskEnd = 64 - MaskStart - 1;
2373 if (InstCnt) *InstCnt += 1;
2379 assert(InstMaskStart >= 32 &&
"Mask cannot start out of range");
2380 assert(InstMaskEnd >= 32 &&
"Mask cannot end out of range");
2382 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2383 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2384 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
2388 if (InstMaskEnd == 63) {
2390 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2391 getI32Imm(InstMaskStart, dl) };
2392 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
2395 if (InstMaskStart == 0) {
2397 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2398 getI32Imm(InstMaskEnd, dl) };
2399 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
2402 if (InstMaskEnd == 63 - RLAmt) {
2404 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2405 getI32Imm(InstMaskStart, dl) };
2406 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
2415 if (InstCnt) *InstCnt += 1;
2418 unsigned RLAmt2 = MaskStart;
2421 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2423 V = SelectRotMask64(V, dl, RLAmt1,
false, 0, 63);
2424 return SelectRotMask64(V, dl, RLAmt2,
false, MaskStart, MaskEnd);
2430 unsigned RLAmt,
bool Repl32,
unsigned MaskStart,
2431 unsigned MaskEnd,
unsigned *InstCnt =
nullptr) {
2434 unsigned InstMaskStart = 64 - MaskEnd - 1,
2435 InstMaskEnd = 64 - MaskStart - 1;
2437 if (InstCnt) *InstCnt += 1;
2443 assert(InstMaskStart >= 32 &&
"Mask cannot start out of range");
2444 assert(InstMaskEnd >= 32 &&
"Mask cannot end out of range");
2446 { ExtendToInt64(
Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2447 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2448 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2452 if (InstMaskEnd == 63 - RLAmt) {
2454 { ExtendToInt64(
Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2455 getI32Imm(InstMaskStart, dl) };
2456 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2465 if (InstCnt) *InstCnt += 1;
2468 unsigned RLAmt2 = MaskStart;
2471 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2473 V = SelectRotMask64(V, dl, RLAmt1,
false, 0, 63);
2474 return SelectRotMaskIns64(
Base, V, dl, RLAmt2,
false, MaskStart, MaskEnd);
2477 void SelectAndParts64(
const SDLoc &dl,
SDValue &Res,
unsigned *InstCnt) {
2490 for (ValueRotInfo &VRI : ValueRotsVec) {
2498 auto MatchingBG = [VRI](
const BitGroup &BG) {
2502 unsigned EffRLAmt = BG.RLAmt;
2503 if (!VRI.Repl32 && BG.Repl32) {
2504 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2505 !BG.Repl32Coalesced) {
2511 }
else if (VRI.Repl32 != BG.Repl32) {
2515 return VRI.RLAmt == EffRLAmt;
2518 for (
auto &BG : BitGroups) {
2519 if (!MatchingBG(BG))
2522 if (BG.StartIdx <= BG.EndIdx) {
2523 for (
unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2524 Mask |= (UINT64_C(1) << i);
2526 for (
unsigned i = BG.StartIdx; i <
Bits.size(); ++i)
2527 Mask |= (UINT64_C(1) << i);
2528 for (
unsigned i = 0; i <= BG.EndIdx; ++i)
2529 Mask |= (UINT64_C(1) << i);
2536 bool Use32BitInsts = isUInt<32>(Mask);
2538 unsigned ANDIMask = (
Mask & UINT16_MAX),
2539 ANDISMask = (Mask >> 16) & UINT16_MAX;
2541 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2543 unsigned NumAndInsts = (
unsigned) NeedsRotate +
2544 (
unsigned) (
bool) Res;
2545 unsigned NumOfSelectInsts = 0;
2547 assert(NumOfSelectInsts > 0 &&
"Failed to select an i64 constant.");
2550 (
unsigned) (ANDIMask != 0 && ANDISMask != 0);
2552 NumAndInsts += NumOfSelectInsts + 1;
2554 unsigned NumRLInsts = 0;
2555 bool FirstBG =
true;
2556 bool MoreBG =
false;
2557 for (
auto &BG : BitGroups) {
2558 if (!MatchingBG(BG)) {
2563 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2568 LLVM_DEBUG(
dbgs() <<
"\t\trotation groups for " << VRI.V.getNode()
2569 <<
" RL: " << VRI.RLAmt << (VRI.Repl32 ?
" (32):" :
":")
2570 <<
"\n\t\t\tisel using masking: " << NumAndInsts
2571 <<
" using rotates: " << NumRLInsts <<
"\n");
2577 if (NumAndInsts > NumRLInsts)
2582 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2587 if (InstCnt) *InstCnt += NumAndInsts;
2594 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2595 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2596 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2601 if (Use32BitInsts) {
2602 assert((ANDIMask != 0 || ANDISMask != 0) &&
2603 "No set bits in mask when using 32-bit ands for 64-bit value");
2607 ANDIVal =
SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2608 ExtendToInt64(VRot, dl),
2609 getI32Imm(ANDIMask, dl)),
2613 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2614 ExtendToInt64(VRot, dl),
2615 getI32Imm(ANDISMask, dl)),
2619 TotalVal = ANDISVal;
2623 TotalVal =
SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2624 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2628 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2629 ExtendToInt64(VRot, dl), TotalVal),
2636 Res =
SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2637 ExtendToInt64(Res, dl), TotalVal),
2642 eraseMatchingBitGroups(MatchingBG);
2647 SDNode *Select64(
SDNode *
N,
bool LateMask,
unsigned *InstCnt) {
2651 if (InstCnt) *InstCnt = 0;
2654 SelectAndParts64(dl, Res, InstCnt);
2659 if ((!NeedMask || LateMask) && !Res) {
2663 unsigned MaxGroupsIdx = 0;
2664 if (!ValueRotsVec[0].Repl32) {
2665 for (
unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2666 if (ValueRotsVec[i].Repl32) {
2667 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2673 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2674 bool NeedsRotate =
false;
2677 }
else if (VRI.Repl32) {
2678 for (
auto &BG : BitGroups) {
2679 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2680 BG.Repl32 != VRI.Repl32)
2685 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2694 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2695 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2702 eraseMatchingBitGroups([VRI](
const BitGroup &BG) {
2703 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2704 BG.Repl32 == VRI.Repl32;
2711 for (
auto I = BitGroups.begin(), IE = BitGroups.end();
I != IE; ++
I) {
2712 if (SelectRotMask64Count(
I->RLAmt,
I->Repl32,
I->StartIdx,
I->EndIdx,
2714 SelectRotMask64Count(
I->RLAmt,
I->Repl32,
I->StartIdx,
I->EndIdx,
2716 if (
I != BitGroups.begin()) {
2719 BitGroups.insert(BitGroups.begin(), BG);
2727 for (
auto &BG : BitGroups) {
2729 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2730 BG.EndIdx, InstCnt);
2732 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2733 BG.StartIdx, BG.EndIdx, InstCnt);
2742 bool Use32BitInsts = isUInt<32>(Mask);
2744 unsigned ANDIMask = (
Mask & UINT16_MAX),
2745 ANDISMask = (Mask >> 16) & UINT16_MAX;
2747 if (Use32BitInsts) {
2748 assert((ANDIMask != 0 || ANDISMask != 0) &&
2749 "No set bits in mask when using 32-bit ands for 64-bit value");
2751 if (InstCnt) *InstCnt += (
unsigned) (ANDIMask != 0) +
2753 (
unsigned) (ANDIMask != 0 && ANDISMask != 0);
2757 ANDIVal =
SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2758 ExtendToInt64(Res, dl),
2759 getI32Imm(ANDIMask, dl)),
2763 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2764 ExtendToInt64(Res, dl),
2765 getI32Imm(ANDISMask, dl)),
2773 Res =
SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2774 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2776 unsigned NumOfSelectInsts = 0;
2779 Res =
SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2780 ExtendToInt64(Res, dl), MaskVal),
2783 *InstCnt += NumOfSelectInsts + 1;
2792 collectBitGroups(LateMask);
2793 if (BitGroups.empty())
2797 if (
Bits.size() == 64)
2798 assignRepl32BitGroups();
2801 collectValueRotInfo();
2803 if (
Bits.size() == 32) {
2804 return Select32(
N, LateMask, InstCnt);
2806 assert(
Bits.size() == 64 &&
"Not 64 bits here?");
2807 return Select64(
N, LateMask, InstCnt);
2813 void eraseMatchingBitGroups(
function_ref<
bool(
const BitGroup &)>
F) {
2819 bool NeedMask =
false;
2839 getValueBits(
SDValue(
N, 0),
N->getValueType(0).getSizeInBits());
2844 LLVM_DEBUG(
dbgs() <<
"Considering bit-permutation-based instruction"
2845 " selection for: ");
2849 computeRotationAmounts();
2862 unsigned InstCnt = 0, InstCntLateMask = 0;
2865 LLVM_DEBUG(
dbgs() <<
"\t\tisel would use " << InstCnt <<
" instructions\n");
2870 <<
" instructions\n");
2872 if (InstCnt <= InstCntLateMask) {
2882class IntegerCompareEliminator {
2887 enum ExtOrTruncConversion {
Ext, Trunc };
2895 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2905 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2912 SDValue addExtOrTrunc(
SDValue NatWidthRes, ExtOrTruncConversion Conv);
2916 int64_t RHSValue,
SDLoc dl);
2918 int64_t RHSValue,
SDLoc dl);
2920 int64_t RHSValue,
SDLoc dl);
2922 int64_t RHSValue,
SDLoc dl);
2927 PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2930 "Only expecting to use this on 64 bit targets.");
2935 switch (
N->getOpcode()) {
2946 return tryEXTEND(
N);
2950 return tryLogicOpOfCompares(
N);
2961 "Expecting a zero/sign extend node!");
2966 N->getOperand(0).getValueType() == MVT::i1 &&
2968 WideRes = computeLogicOpInGPR(
N->getOperand(0));
2969 else if (
N->getOperand(0).getOpcode() !=
ISD::SETCC)
2973 getSETCCInGPR(
N->getOperand(0),
2975 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2982 bool Output32Bit =
N->getValueType(0) == MVT::i32;
2988 if (Input32Bit != Output32Bit)
2989 ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2990 ExtOrTruncConversion::Trunc);
2998SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(
SDNode *
N) {
2999 if (
N->getValueType(0) != MVT::i1)
3002 "Expected a logic operation on setcc results.");
3004 if (!LoweredLogical)
3009 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
3018 if (IsBitwiseNegate &&
3021 else if (IsBitwiseNegate)
3023 OpToConvToRecForm = LoweredLogical.
getOperand(0);
3027 OpToConvToRecForm = LoweredLogical;
3037 if (NewOpc != -1 && IsBitwiseNegate) {
3040 "Expected a PPC::XORI8 only for bitwise negation.");
3042 std::vector<SDValue> Ops;
3043 for (
int i = 0, e = OpToConvToRecForm.
getNumOperands(); i < e; i++)
3044 Ops.push_back(OpToConvToRecForm.
getOperand(i));
3049 MVT::Glue, Ops), 0);
3051 assert((NewOpc != -1 || !IsBitwiseNegate) &&
3052 "No record form available for AND8/OR8/XOR8?");
3055 dl, MVT::i64, MVT::Glue, LHS, RHS),
3067 MVT::i1, CR0Reg, SRIdxVal,
3080SDValue IntegerCompareEliminator::computeLogicOpInGPR(
SDValue LogicOp) {
3082 "Can only handle logic operations here.");
3084 "Can only handle logic operations on i1 values here.");
3096 unsigned OperandOpcode = Operand.
getOpcode();
3098 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
3103 PPC::RLDICL, dl, InVT, InputOp,
3104 S->getI64Imm(0, dl),
3105 S->getI64Imm(63, dl)), 0);
3107 return computeLogicOpInGPR(Operand);
3116 if (!LHS || (!RHS && !IsBitwiseNegation))
3119 NumLogicOpsOnComparison++;
3122 if (
LHS.getValueType() == MVT::i32)
3123 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
3124 if (!IsBitwiseNegation &&
RHS.getValueType() == MVT::i32)
3125 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
3130 case ISD::AND: NewOpc = PPC::AND8;
break;
3131 case ISD::OR: NewOpc = PPC::OR8;
break;
3132 case ISD::XOR: NewOpc = PPC::XOR8;
break;
3135 if (IsBitwiseNegation) {
3136 RHS = S->getI64Imm(1, dl);
3137 NewOpc = PPC::XORI8;
3148SDValue IntegerCompareEliminator::signExtendInputIfNeeded(
SDValue Input) {
3150 "Can only sign-extend 32-bit values here.");
3158 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3160 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3164 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3169 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3172 SignExtensionsAdded++;
3174 MVT::i64, Input), 0);
3181SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(
SDValue Input) {
3183 "Can only zero-extend 32-bit values here.");
3193 if (IsTruncateOfZExt)
3194 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3198 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3200 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3203 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3207 ZeroExtensionsAdded++;
3209 S->getI64Imm(0, dl),
3210 S->getI64Imm(32, dl)), 0);
3217SDValue IntegerCompareEliminator::addExtOrTrunc(
SDValue NatWidthRes,
3218 ExtOrTruncConversion Conv) {
3219 SDLoc dl(NatWidthRes);
3223 if (Conv == ExtOrTruncConversion::Ext) {
3228 ImDef, NatWidthRes, SubRegIdx), 0);
3231 assert(Conv == ExtOrTruncConversion::Trunc &&
3232 "Unknown convertion between 32 and 64 bit values.");
3238 NatWidthRes, SubRegIdx), 0);
3244IntegerCompareEliminator::getCompoundZeroComparisonInGPR(
SDValue LHS,
SDLoc dl,
3245 ZeroCompare CmpTy) {
3246 EVT InVT =
LHS.getValueType();
3247 bool Is32Bit = InVT == MVT::i32;
3252 case ZeroCompare::GEZExt:
3253 case ZeroCompare::GESExt:
3255 dl, InVT, LHS, LHS), 0);
3257 case ZeroCompare::LEZExt:
3258 case ZeroCompare::LESExt: {
3261 LHS = signExtendInputIfNeeded(LHS);
3266 Neg, S->getI64Imm(1, dl),
3267 S->getI64Imm(63, dl)), 0);
3271 S->getI64Imm(~0ULL, dl)), 0);
3281 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
3283 ToExtend, S->getI64Imm(1, dl),
3284 S->getI64Imm(63, dl)), 0);
3286 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
3288 S->getI64Imm(63, dl)), 0);
3290 assert(Is32Bit &&
"Should have handled the 32-bit sequences above.");
3293 case ZeroCompare::GEZExt: {
3294 SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3295 S->getI32Imm(31, dl) };
3299 case ZeroCompare::GESExt:
3301 S->getI32Imm(31, dl)), 0);
3302 case ZeroCompare::LEZExt:
3304 S->getI32Imm(1, dl)), 0);
3305 case ZeroCompare::LESExt:
3307 S->getI32Imm(-1, dl)), 0);
3318IntegerCompareEliminator::get32BitZExtCompare(
SDValue LHS,
SDValue RHS,
3320 int64_t RHSValue,
SDLoc dl) {
3324 bool IsRHSZero = RHSValue == 0;
3325 bool IsRHSOne = RHSValue == 1;
3326 bool IsRHSNegOne = RHSValue == -1LL;
3336 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3337 S->getI32Imm(31, dl) };
3348 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3349 S->getI32Imm(31, dl) };
3353 S->getI32Imm(1, dl)), 0);
3359 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3365 IsRHSZero = RHSConst && RHSConst->
isZero();
3376 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3380 LHS = signExtendInputIfNeeded(LHS);
3381 RHS = signExtendInputIfNeeded(RHS);
3386 S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
3390 MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
3398 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3404 LHS = signExtendInputIfNeeded(LHS);
3405 RHS = signExtendInputIfNeeded(RHS);
3409 Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
3415 IsRHSZero = RHSConst && RHSConst->
isZero();
3427 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3431 SDValue ShiftOps[] = {
LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3432 S->getI32Imm(31, dl) };
3440 LHS = signExtendInputIfNeeded(LHS);
3441 RHS = signExtendInputIfNeeded(RHS);
3445 SUBFNode, S->getI64Imm(1, dl),
3446 S->getI64Imm(63, dl)), 0);
3457 LHS = zeroExtendInputIfNeeded(LHS);
3458 RHS = zeroExtendInputIfNeeded(RHS);
3463 Subtract, S->getI64Imm(1, dl),
3464 S->getI64Imm(63, dl)), 0);
3466 S->getI32Imm(1, dl)), 0);
3477 LHS = zeroExtendInputIfNeeded(LHS);
3478 RHS = zeroExtendInputIfNeeded(RHS);
3482 Subtract, S->getI64Imm(1, dl),
3483 S->getI64Imm(63, dl)), 0);
3491IntegerCompareEliminator::get32BitSExtCompare(
SDValue LHS,
SDValue RHS,
3493 int64_t RHSValue,
SDLoc dl) {
3497 bool IsRHSZero = RHSValue == 0;
3498 bool IsRHSOne = RHSValue == 1;
3499 bool IsRHSNegOne = RHSValue == -1LL;
3512 SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3513 S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3531 { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3536 S->getI32Imm(1, dl)), 0);
3543 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3549 IsRHSZero = RHSConst && RHSConst->
isZero();
3558 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3561 LHS = signExtendInputIfNeeded(LHS);
3562 RHS = signExtendInputIfNeeded(RHS);
3568 SUBFNode, S->getI64Imm(1, dl),
3569 S->getI64Imm(63, dl)), 0);
3571 S->getI32Imm(-1, dl)), 0);
3578 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3583 LHS = signExtendInputIfNeeded(LHS);
3584 RHS = signExtendInputIfNeeded(RHS);
3588 S->getI64Imm(63, dl)), 0);
3594 IsRHSZero = RHSConst && RHSConst->
isZero();
3605 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3609 S->getI32Imm(31, dl)), 0);
3614 LHS = signExtendInputIfNeeded(LHS);
3615 RHS = signExtendInputIfNeeded(RHS);
3619 SUBFNode, S->getI64Imm(63, dl)), 0);
3630 LHS = zeroExtendInputIfNeeded(LHS);
3631 RHS = zeroExtendInputIfNeeded(RHS);
3636 S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3639 S->getI32Imm(-1, dl)), 0);
3650 LHS = zeroExtendInputIfNeeded(LHS);
3651 RHS = zeroExtendInputIfNeeded(RHS);
3655 Subtract, S->getI64Imm(63, dl)), 0);
3663IntegerCompareEliminator::get64BitZExtCompare(
SDValue LHS,
SDValue RHS,
3665 int64_t RHSValue,
SDLoc dl) {
3669 bool IsRHSZero = RHSValue == 0;
3670 bool IsRHSOne = RHSValue == 1;
3671 bool IsRHSNegOne = RHSValue == -1LL;
3682 S->getI64Imm(58, dl),
3683 S->getI64Imm(63, dl)), 0);
3694 Xor, S->getI32Imm(~0U, dl)), 0);
3704 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3707 IsRHSZero = RHSConst && RHSConst->
isZero();
3716 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3719 S->getI64Imm(1, dl),
3720 S->getI64Imm(63, dl)), 0);
3723 S->getI64Imm(63, dl)), 0);
3728 ShiftR, ShiftL, SubtractCarry), 0);
3736 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3740 S->getI64Imm(~0ULL, dl)), 0);
3744 S->getI64Imm(1, dl),
3745 S->getI64Imm(63, dl)), 0);
3749 IsRHSZero = RHSConst && RHSConst->
isZero();
3759 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3762 S->getI64Imm(1, dl),
3763 S->getI64Imm(63, dl)), 0);
3766 LHS, S->getI64Imm(63, dl)), 0);
3769 RHS, S->getI64Imm(1, dl),
3770 S->getI64Imm(63, dl)), 0);
3776 SRDINode, SRADINode, SUBFC8Carry), 0);
3778 ADDE8Node, S->getI64Imm(1, dl)), 0);
3793 LHS, LHS, SUBFC8Carry), 0);
3795 SUBFE8Node, S->getI64Imm(1, dl)), 0);
3810 LHS, LHS, SubtractCarry), 0);
3820IntegerCompareEliminator::get64BitSExtCompare(
SDValue LHS,
SDValue RHS,
3822 int64_t RHSValue,
SDLoc dl) {
3826 bool IsRHSZero = RHSValue == 0;
3827 bool IsRHSOne = RHSValue == 1;
3828 bool IsRHSNegOne = RHSValue == -1LL;
3840 AddInput, S->getI32Imm(~0U, dl)), 0);
3853 Xor, S->getI32Imm(0, dl)), 0);
3855 SC,
SC.getValue(1)), 0);
3863 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3866 IsRHSZero = RHSConst && RHSConst->
isZero();
3875 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3878 S->getI64Imm(63, dl)), 0);
3881 S->getI64Imm(1, dl),
3882 S->getI64Imm(63, dl)), 0);
3888 ShiftR, ShiftL, SubtractCarry), 0);
3897 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3901 S->getI64Imm(-1, dl)), 0);
3905 S->getI64Imm(63, dl)), 0);
3909 IsRHSZero = RHSConst && RHSConst->
isZero();
3919 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3922 S->getI64Imm(63, dl)), 0);
3926 LHS, S->getI64Imm(63, dl)), 0);
3929 RHS, S->getI64Imm(1, dl),
3930 S->getI64Imm(63, dl)), 0);
3936 SRDINode, SRADINode, SUBFC8Carry), 0);
3939 ADDE8Node, S->getI64Imm(1, dl)), 0);
3956 LHS, SubtractCarry), 0);
3958 ExtSub, ExtSub), 0);
3972 LHS, LHS, SubCarry), 0);
3983 "An ISD::SETCC node required here.");
3991 for (
auto *CompareUse :
Compare.getNode()->uses())
3996 OmittedForNonExtendUses++;
4006 SetccInGPROpts ConvOpts) {
4009 "An ISD::SETCC node required here.");
4022 cast<CondCodeSDNode>(
Compare.getOperand(CCOpNum))->get();
4023 EVT InputVT =
LHS.getValueType();
4024 if (InputVT != MVT::i32 && InputVT != MVT::i64)
4027 if (ConvOpts == SetccInGPROpts::ZExtInvert ||
4028 ConvOpts == SetccInGPROpts::SExtInvert)
4031 bool Inputs32Bit = InputVT == MVT::i32;
4036 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
4037 ConvOpts == SetccInGPROpts::SExtInvert;
4039 if (IsSext && Inputs32Bit)
4040 return get32BitSExtCompare(LHS, RHS,
CC, RHSValue, dl);
4041 else if (Inputs32Bit)
4042 return get32BitZExtCompare(LHS, RHS,
CC, RHSValue, dl);
4044 return get64BitSExtCompare(LHS, RHS,
CC, RHSValue, dl);
4045 return get64BitZExtCompare(LHS, RHS,
CC, RHSValue, dl);
4050bool PPCDAGToDAGISel::tryIntCompareInGPR(
SDNode *
N) {
4051 if (
N->getValueType(0) != MVT::i32 &&
4052 N->getValueType(0) != MVT::i64)
4058 if (
TM.getOptLevel() == CodeGenOptLevel::None || !
TM.isPPC64())
4064 if (!(
CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
4067 switch (
N->getOpcode()) {
4074 IntegerCompareEliminator ICmpElim(CurDAG,
this);
4075 if (
SDNode *New = ICmpElim.Select(
N)) {
4076 ReplaceNode(
N, New);
4084bool PPCDAGToDAGISel::tryBitPermutation(
SDNode *
N) {
4085 if (
N->getValueType(0) != MVT::i32 &&
4086 N->getValueType(0) != MVT::i64)
4092 switch (
N->getOpcode()) {
4097 if (Subtarget->isISA3_1() &&
N->getValueType(0) == MVT::i32 &&
4099 auto &OpRight =
N->getOperand(1);
4109 BitPermutationSelector BPS(CurDAG);
4110 if (
SDNode *New = BPS.Select(
N)) {
4111 ReplaceNode(
N, New);
4128 if (
LHS.getValueType() == MVT::i32) {
4133 if (isUInt<16>(Imm))
4135 getI32Imm(Imm & 0xFFFF, dl)),
4138 if (isInt<16>((
int)Imm))
4140 getI32Imm(Imm & 0xFFFF, dl)),
4153 getI32Imm(Imm >> 16, dl)), 0);
4155 getI32Imm(Imm & 0xFFFF, dl)), 0);
4161 getI32Imm(Imm & 0xFFFF, dl)), 0);
4167 getI32Imm((
int)SImm & 0xFFFF,
4172 }
else if (
LHS.getValueType() == MVT::i64) {
4177 if (isUInt<16>(Imm))
4179 getI32Imm(Imm & 0xFFFF, dl)),
4184 getI32Imm(Imm & 0xFFFF, dl)),
4196 if (isUInt<32>(Imm)) {
4198 getI64Imm(Imm >> 16, dl)), 0);
4200 getI64Imm(Imm & 0xFFFF, dl)),
4208 getI64Imm(Imm & 0xFFFF, dl)), 0);
4214 getI64Imm(SImm & 0xFFFF, dl)),
4218 }
else if (
LHS.getValueType() == MVT::f32) {
4219 if (Subtarget->hasSPE()) {
4224 Opc = PPC::EFSCMPEQ;
4232 Opc = PPC::EFSCMPLT;
4240 Opc = PPC::EFSCMPGT;
4245 }
else if (
LHS.getValueType() == MVT::f64) {
4246 if (Subtarget->hasSPE()) {
4251 Opc = PPC::EFDCMPEQ;
4259 Opc = PPC::EFDCMPLT;
4267 Opc = PPC::EFDCMPGT;
4271 Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4273 assert(
LHS.getValueType() == MVT::f128 &&
"Unknown vt!");
4274 assert(Subtarget->hasP9Vector() &&
"XSCMPUQP requires Power9 Vector");
4275 Opc = PPC::XSCMPUQP;
4279 CurDAG->
getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
4343 case ISD::SETO: Invert =
true;
return 3;
4360 bool HasVSX,
bool &Swap,
bool &Negate) {
4387 if (VecVT == MVT::v4f32)
4388 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4389 else if (VecVT == MVT::v2f64)
4390 return PPC::XVCMPEQDP;
4394 if (VecVT == MVT::v4f32)
4395 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4396 else if (VecVT == MVT::v2f64)
4397 return PPC::XVCMPGTDP;
4401 if (VecVT == MVT::v4f32)
4402 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4403 else if (VecVT == MVT::v2f64)
4404 return PPC::XVCMPGEDP;
4431 if (VecVT == MVT::v16i8)
4432 return PPC::VCMPEQUB;
4433 else if (VecVT == MVT::v8i16)
4434 return PPC::VCMPEQUH;
4435 else if (VecVT == MVT::v4i32)
4436 return PPC::VCMPEQUW;
4437 else if (VecVT == MVT::v2i64)
4438 return PPC::VCMPEQUD;
4439 else if (VecVT == MVT::v1i128)
4440 return PPC::VCMPEQUQ;
4443 if (VecVT == MVT::v16i8)
4444 return PPC::VCMPGTSB;
4445 else if (VecVT == MVT::v8i16)
4446 return PPC::VCMPGTSH;
4447 else if (VecVT == MVT::v4i32)
4448 return PPC::VCMPGTSW;
4449 else if (VecVT == MVT::v2i64)
4450 return PPC::VCMPGTSD;
4451 else if (VecVT == MVT::v1i128)
4452 return PPC::VCMPGTSQ;
4455 if (VecVT == MVT::v16i8)
4456 return PPC::VCMPGTUB;
4457 else if (VecVT == MVT::v8i16)
4458 return PPC::VCMPGTUH;
4459 else if (VecVT == MVT::v4i32)
4460 return PPC::VCMPGTUW;
4461 else if (VecVT == MVT::v2i64)
4462 return PPC::VCMPGTUD;
4463 else if (VecVT == MVT::v1i128)
4464 return PPC::VCMPGTUQ;
4473bool PPCDAGToDAGISel::trySETCC(
SDNode *
N) {
4476 bool IsStrict =
N->isStrictFPOpcode();
4478 cast<CondCodeSDNode>(
N->getOperand(IsStrict ? 3 : 2))->get();
4481 bool isPPC64 = (PtrVT == MVT::i64);
4497 SDValue Ops[] = {
Op, getI32Imm(27, dl), getI32Imm(5, dl),
4498 getI32Imm(31, dl) };
4506 Op, getI32Imm(~0U, dl)), 0);
4511 SDValue Ops[] = {
Op, getI32Imm(1, dl), getI32Imm(31, dl),
4512 getI32Imm(31, dl) };
4520 SDValue Ops[] = {
T, getI32Imm(1, dl), getI32Imm(31, dl),
4521 getI32Imm(31, dl) };
4526 }
else if (Imm == ~0U) {
4533 Op, getI32Imm(1, dl)), 0);
4538 0),
Op.getValue(1));
4544 Op, getI32Imm(~0U, dl));
4551 getI32Imm(1, dl)), 0);
4554 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4555 getI32Imm(31, dl) };
4560 SDValue Ops[] = {
Op, getI32Imm(1, dl), getI32Imm(31, dl),
4561 getI32Imm(31, dl) };
4572 if (!IsStrict &&
LHS.getValueType().isVector()) {
4573 if (Subtarget->hasSPE())
4576 EVT VecVT =
LHS.getValueType();
4578 unsigned int VCmpInst =
4586 CurDAG->
SelectNodeTo(
N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4595 if (Subtarget->useCRBits())
4600 SDValue CCReg = SelectCC(LHS, RHS,
CC, dl, Chain);
4607 if (Subtarget->hasSPE() &&
LHS.getValueType().isFloatingPoint()) {
4621 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 -
Idx)) & 31, dl),
4622 getI32Imm(31, dl), getI32Imm(31, dl) };
4631 CurDAG->
SelectNodeTo(
N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4637bool PPCDAGToDAGISel::isOffsetMultipleOf(
SDNode *
N,
unsigned Val)
const {
4643 AddrOp =
N->getOperand(1);
4658 if ((SlotAlign % Val) != 0)
4673void PPCDAGToDAGISel::transferMemOperands(
SDNode *
N,
SDNode *Result) {
4680 bool &NeedSwapOps,
bool &IsUnCmp) {
4686 SDValue TrueRes =
N->getOperand(2);
4687 SDValue FalseRes =
N->getOperand(3);
4689 if (!TrueConst || (
N->getSimpleValueType(0) != MVT::i64 &&
4690 N->getSimpleValueType(0) != MVT::i32))
4699 if ((TrueResVal < -1 || TrueResVal > 1) ||
4728 cast<CondCodeSDNode>(SetOrSelCC.
getOperand(InnerIsSel ? 4 : 2))->get();
4733 dyn_cast<ConstantSDNode>(SetOrSelCC.
getOperand(2));
4735 dyn_cast<ConstantSDNode>(SetOrSelCC.
getOperand(3));
4736 if (!SelCCTrueConst || !SelCCFalseConst)
4741 if (SelCCTVal == -1 && SelCCFVal == 1) {
4743 }
else if (SelCCTVal != 1 || SelCCFVal != -1)
4753 bool InnerSwapped =
false;
4754 if (
LHS == InnerRHS &&
RHS == InnerLHS)
4755 InnerSwapped =
true;
4756 else if (
LHS != InnerLHS ||
RHS != InnerRHS)
4767 NeedSwapOps = (InnerCC ==
ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4784 NeedSwapOps = (TrueResVal == 1);
4803 NeedSwapOps = (TrueResVal == -1);
4812 LLVM_DEBUG(
dbgs() <<
"Found a node that can be lowered to a SETB: ");
4822 if (
N.getNumOperands() < 1 || !isa<ConstantSDNode>(
N.getOperand(0)) ||
4825 switch (
N.getConstantOperandVal(0)) {
4826 case Intrinsic::ppc_vsx_xvtdivdp:
4827 case Intrinsic::ppc_vsx_xvtdivsp:
4828 case Intrinsic::ppc_vsx_xvtsqrtdp:
4829 case Intrinsic::ppc_vsx_xvtsqrtsp:
4835bool PPCDAGToDAGISel::tryFoldSWTestBRCC(
SDNode *
N) {
4893bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(
SDNode *
N) {
4900 if (
LHS.getOpcode() !=
ISD::AND || !isa<ConstantSDNode>(
LHS.getOperand(1)) ||
4905 cast<ConstantSDNode>(
LHS.getOperand(0).getOperand(1))->getZExtValue() !=
4906 Intrinsic::loop_decrement)
4909 if (!isa<ConstantSDNode>(RHS))
4913 "Counter decrement comparison is not EQ or NE");
4916 assert(OldDecrement.
hasOneUse() &&
"loop decrement has more than one use!");
4918 SDLoc DecrementLoc(OldDecrement);
4920 SDValue DecrementOps[] = {Subtarget->
isPPC64() ? getI64Imm(1, DecrementLoc)
4921 : getI32Imm(1, DecrementLoc)};
4922 unsigned DecrementOpcode =
4923 Subtarget->
isPPC64() ? PPC::DecreaseCTR8loop : PPC::DecreaseCTRloop;
4925 MVT::i1, DecrementOps);
4927 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
4929 unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn;
4931 ReplaceUses(
LHS.getValue(0),
LHS.getOperand(1));
4935 ReplaceUses(OldDecrement.
getValue(1), ChainInput);
4939 ChainInput,
N->getOperand(0));
4942 N->getOperand(4), Chain);
4946bool PPCDAGToDAGISel::tryAsSingleRLWINM(
SDNode *
N) {
4954 unsigned SH, MB, ME;
4957 if (isRotateAndMask(Val.
getNode(), Imm,
false, SH, MB, ME)) {
4959 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4968 SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4976 ReplaceUses(
SDValue(
N, 0),
N->getOperand(1));