22#include "llvm/Config/llvm-config.h"
27#include "llvm/IR/IntrinsicsX86.h"
37#define DEBUG_TYPE "x86-isel"
38#define PASS_NAME "X86 DAG->DAG Instruction Selection"
40STATISTIC(NumLoadMoved,
"Number of loads moved below TokenFactor");
43 cl::desc(
"Enable setting constant bits to reduce size of mask immediates"),
47 "x86-promote-anyext-load",
cl::init(
true),
59 struct X86ISelAddressMode {
67 int Base_FrameIndex = 0;
76 const char *ES =
nullptr;
81 bool NegateIndex =
false;
83 X86ISelAddressMode() =
default;
85 bool hasSymbolicDisplacement()
const {
86 return GV !=
nullptr ||
CP !=
nullptr || ES !=
nullptr ||
87 MCSym !=
nullptr ||
JT != -1 || BlockAddr !=
nullptr;
90 bool hasBaseOrIndexReg()
const {
97 if (
BaseType != RegBase)
return false;
99 dyn_cast_or_null<RegisterSDNode>(Base_Reg.
getNode()))
100 return RegNode->getReg() == X86::RIP;
109#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
111 dbgs() <<
"X86ISelAddressMode " <<
this <<
'\n';
112 dbgs() <<
"Base_Reg ";
118 dbgs() <<
" Base.FrameIndex " << Base_FrameIndex <<
'\n';
119 dbgs() <<
" Scale " << Scale <<
'\n'
127 dbgs() <<
" Disp " << Disp <<
'\n'
149 dbgs() <<
" JT" <<
JT <<
" Align" << Alignment.
value() <<
'\n';
169 bool IndirectTlsSegRefs;
174 X86DAGToDAGISel() =
delete;
178 OptForMinSize(
false), IndirectTlsSegRefs(
false) {}
184 "indirect-tls-seg-refs");
189 "OptForMinSize implies OptForSize");
203#include "X86GenDAGISel.inc"
208 bool foldOffsetIntoAddress(
uint64_t Offset, X86ISelAddressMode &AM);
209 bool matchLoadInAddress(
LoadSDNode *
N, X86ISelAddressMode &AM,
210 bool AllowSegmentRegForX32 =
false);
211 bool matchWrapper(
SDValue N, X86ISelAddressMode &AM);
212 bool matchAddress(
SDValue N, X86ISelAddressMode &AM);
213 bool matchVectorAddress(
SDValue N, X86ISelAddressMode &AM);
214 bool matchAdd(
SDValue &
N, X86ISelAddressMode &AM,
unsigned Depth);
215 bool matchAddressRecursively(
SDValue N, X86ISelAddressMode &AM,
217 bool matchVectorAddressRecursively(
SDValue N, X86ISelAddressMode &AM,
219 bool matchAddressBase(
SDValue N, X86ISelAddressMode &AM);
248 return tryFoldLoad(
P,
P,
N,
Base, Scale,
Index, Disp, Segment);
256 bool isProfitableToFormMaskedOp(
SDNode *
N)
const;
260 unsigned ConstraintID,
261 std::vector<SDValue> &OutOps)
override;
263 void emitSpecialCodeForMain();
265 inline void getAddressOperands(X86ISelAddressMode &AM,
const SDLoc &
DL,
269 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
270 Base = CurDAG->getTargetFrameIndex(
271 AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
272 else if (AM.Base_Reg.getNode())
275 Base = CurDAG->getRegister(0, VT);
277 Scale = getI8Imm(AM.Scale,
DL);
280 if (AM.NegateIndex) {
281 unsigned NegOpc = VT ==
MVT::i64 ? X86::NEG64r : X86::NEG32r;
287 if (AM.IndexReg.getNode())
290 Index = CurDAG->getRegister(0, VT);
295 Disp = CurDAG->getTargetGlobalAddress(AM.GV,
SDLoc(),
299 Disp = CurDAG->getTargetConstantPool(AM.CP,
MVT::i32, AM.Alignment,
300 AM.Disp, AM.SymbolFlags);
302 assert(!AM.Disp &&
"Non-zero displacement is ignored with ES.");
303 Disp = CurDAG->getTargetExternalSymbol(AM.ES,
MVT::i32, AM.SymbolFlags);
304 }
else if (AM.MCSym) {
305 assert(!AM.Disp &&
"Non-zero displacement is ignored with MCSym.");
306 assert(AM.SymbolFlags == 0 &&
"oo");
307 Disp = CurDAG->getMCSymbol(AM.MCSym,
MVT::i32);
308 }
else if (AM.JT != -1) {
309 assert(!AM.Disp &&
"Non-zero displacement is ignored with JT.");
310 Disp = CurDAG->getTargetJumpTable(AM.JT,
MVT::i32, AM.SymbolFlags);
311 }
else if (AM.BlockAddr)
312 Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr,
MVT::i32, AM.Disp,
315 Disp = CurDAG->getTargetConstant(AM.Disp,
DL,
MVT::i32);
317 if (AM.Segment.getNode())
318 Segment = AM.Segment;
320 Segment = CurDAG->getRegister(0,
MVT::i16);
329 bool shouldAvoidImmediateInstFormsForSize(
SDNode *
N)
const {
335 if (!CurDAG->shouldOptForSize())
345 if (
User->isMachineOpcode()) {
368 auto *
C = dyn_cast<ConstantSDNode>(
N);
369 if (
C && isInt<8>(
C->getSExtValue()))
389 (RegNode = dyn_cast_or_null<RegisterSDNode>(
391 if ((RegNode->
getReg() == X86::ESP) ||
392 (RegNode->
getReg() == X86::RSP))
401 return (UseCount > 1);
406 return CurDAG->getTargetConstant(Imm,
DL,
MVT::i8);
411 return CurDAG->getTargetConstant(Imm,
DL,
MVT::i32);
416 return CurDAG->getTargetConstant(Imm,
DL,
MVT::i64);
421 assert((VecWidth == 128 || VecWidth == 256) &&
"Unexpected vector width");
423 MVT VecVT =
N->getOperand(0).getSimpleValueType();
429 assert((VecWidth == 128 || VecWidth == 256) &&
"Unexpected vector width");
431 MVT VecVT =
N->getSimpleValueType(0);
435 SDValue getPermuteVINSERTCommutedImmediate(
SDNode *
N,
unsigned VecWidth,
437 assert(VecWidth == 128 &&
"Unexpected vector width");
439 MVT VecVT =
N->getSimpleValueType(0);
441 assert((InsertIdx == 0 || InsertIdx == 1) &&
"Bad insertf128 index");
444 return getI8Imm(InsertIdx ? 0x02 : 0x30,
DL);
449 MVT VT =
N->getSimpleValueType(0);
454 CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, std::nullopt), 0);
457 CurDAG->getMachineNode(
458 TargetOpcode::SUBREG_TO_REG, dl,
MVT::i64,
459 CurDAG->getTargetConstant(0, dl,
MVT::i64), Zero,
460 CurDAG->getTargetConstant(X86::sub_32bit, dl,
MVT::i32)),
465 unsigned Opcode =
N->getOpcode();
467 "Unexpected opcode for SBB materialization");
468 unsigned FlagOpIndex = Opcode ==
X86ISD::SBB ? 2 : 1;
470 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
471 N->getOperand(FlagOpIndex),
SDValue());
475 unsigned Opc = VT ==
MVT::i64 ? X86::SBB64rr : X86::SBB32rr;
477 VTs = CurDAG->getVTList(SBBVT,
MVT::i32);
479 CurDAG->getMachineNode(Opc, dl, VTs,
480 {Zero, Zero, EFLAGS, EFLAGS.getValue(1)}),
486 bool isUnneededShiftMask(
SDNode *
N,
unsigned Width)
const {
488 const APInt &Val = cast<ConstantSDNode>(
N->getOperand(1))->getAPIntValue();
493 APInt Mask = Val | CurDAG->computeKnownBits(
N->getOperand(0)).Zero;
500 SDNode *getGlobalBaseReg();
511 return Subtarget->getInstrInfo();
524 bool isSExtAbsoluteSymbolRef(
unsigned Width,
SDNode *
N)
const;
528 if (!
N->isNonTemporal())
531 unsigned StoreSize =
N->getMemoryVT().getStoreSize();
533 if (
N->getAlign().value() < StoreSize)
542 return Subtarget->hasSSE41();
544 return Subtarget->hasAVX2();
546 return Subtarget->hasAVX512();
550 bool foldLoadStoreIntoMemOperand(
SDNode *
Node);
553 bool shrinkAndImmediate(
SDNode *
N);
554 bool isMaskZeroExtended(
SDNode *
N)
const;
555 bool tryShiftAmountMod(
SDNode *
N);
556 bool tryShrinkShlLogicImm(
SDNode *
N);
562 bool tryMatchBitSelect(
SDNode *
N);
564 MachineSDNode *emitPCMPISTR(
unsigned ROpc,
unsigned MOpc,
bool MayFoldLoad,
566 MachineSDNode *emitPCMPESTR(
unsigned ROpc,
unsigned MOpc,
bool MayFoldLoad,
570 bool tryOptimizeRem8Extend(
SDNode *
N);
572 bool onlyUsesZeroFlag(
SDValue Flags)
const;
573 bool hasNoSignFlagUses(
SDValue Flags)
const;
574 bool hasNoCarryFlagUses(
SDValue Flags)
const;
578char X86DAGToDAGISel::ID = 0;
585 unsigned Opcode =
N->getOpcode();
592 EVT OpVT =
N->getOperand(0).getValueType();
596 OpVT =
N->getOperand(1).getValueType();
598 return Subtarget->hasVLX();
612bool X86DAGToDAGISel::isMaskZeroExtended(
SDNode *
N)
const {
634 if (useNonTemporalLoad(cast<LoadSDNode>(
N)))
639 switch (
U->getOpcode()) {
665 if (
auto *Imm = dyn_cast<ConstantSDNode>(Op1)) {
666 if (
Imm->getAPIntValue().isSignedIntN(8))
675 Imm->getAPIntValue().getBitWidth() == 64 &&
676 Imm->getAPIntValue().isIntN(32))
683 (
Imm->getAPIntValue() == UINT8_MAX ||
684 Imm->getAPIntValue() == UINT16_MAX ||
685 Imm->getAPIntValue() == UINT32_MAX))
691 (-
Imm->getAPIntValue()).isSignedIntN(8))
695 (-
Imm->getAPIntValue()).isSignedIntN(8) &&
696 hasNoCarryFlagUses(
SDValue(U, 1)))
721 if (
U->getOperand(0).getOpcode() ==
ISD::SHL &&
725 if (
U->getOperand(1).getOpcode() ==
ISD::SHL &&
733 auto *
C = dyn_cast<ConstantSDNode>(U0.
getOperand(0));
734 if (
C &&
C->getSExtValue() == -2)
739 auto *
C = dyn_cast<ConstantSDNode>(U1.
getOperand(0));
740 if (
C &&
C->getSExtValue() == -2)
754 if (isa<ConstantSDNode>(
U->getOperand(1)))
775bool X86DAGToDAGISel::isProfitableToFormMaskedOp(
SDNode *
N)
const {
778 "Unexpected opcode!");
783 return N->getOperand(1).hasOneUse();
792 if (Chain.
getNode() == Load.getNode())
796 "Unexpected chain operand");
810 Load.getOperand(1), Load.getOperand(2));
814 Ops.
append(Call->op_begin() + 1, Call->op_end());
830 auto *LD = dyn_cast<LoadSDNode>(
Callee.getNode());
848 if (isa<MemSDNode>(Chain.
getNode()) &&
849 cast<MemSDNode>(Chain.
getNode())->writeMem())
855 Callee.getValue(1).hasOneUse())
863 if ((Imm & 0x00FFFFFF) != 0x0F1EFA)
866 uint8_t OptionalPrefixBytes [] = {0x26, 0x2e, 0x36, 0x3e, 0x64,
867 0x65, 0x66, 0x67, 0xf0, 0xf2};
870 uint8_t Byte = (Imm >> i) & 0xFF;
881void X86DAGToDAGISel::PreprocessISelDAG() {
882 bool MadeChange =
false;
884 E = CurDAG->allnodes_end();
I !=
E; ) {
903 MVT VT =
N->getSimpleValueType(0);
904 int64_t
Imm = cast<ConstantSDNode>(
N)->getSExtValue();
905 int32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB;
912 SDValue Complement = CurDAG->getConstant(~Imm, dl, VT,
false,
true);
913 Complement = CurDAG->getNOT(dl, Complement, VT);
915 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Complement);
925 if (
N->getOpcode() ==
X86ISD::AND && !
N->hasAnyUseOfValue(1)) {
927 N->getOperand(0),
N->getOperand(1));
929 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
953 auto mayPreventLoadFold = [&]() {
955 N->getOpcode() ==
ISD::ADD && Subtarget->hasAVX() &&
956 !
N->getOperand(1).hasOneUse();
959 N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) {
965 MVT VT =
N->getSimpleValueType(0);
973 CurDAG->getNode(NewOpcode,
DL, VT,
N->getOperand(0),
AllOnes);
975 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
982 switch (
N->getOpcode()) {
984 MVT VT =
N->getSimpleValueType(0);
993 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
996 CurDAG->getIntPtrConstant(
Index, dl));
999 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
1008 MVT VT =
N->getSimpleValueType(0);
1012 auto *MemNode = cast<MemSDNode>(
N);
1015 SDValue Ops[] = {MemNode->getChain(), MemNode->getBasePtr()};
1016 SDValue NarrowBCast = CurDAG->getMemIntrinsicNode(
1018 MemNode->getMemOperand());
1021 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
1024 CurDAG->getIntPtrConstant(
Index, dl));
1028 CurDAG->ReplaceAllUsesWith(
N, To);
1038 if (
N->getOperand(0).getValueType().getVectorElementType() ==
MVT::i1)
1041 assert(Subtarget->hasSSE41() &&
"Expected SSE4.1 support!");
1043 "We can't replace VSELECT with BLENDV in vXi16!");
1046 N->getOperand(0),
N->getOperand(1),
N->getOperand(2));
1048 CurDAG->ReplaceAllUsesWith(
N, Blendv.
getNode());
1061 if (!
N->getSimpleValueType(0).isVector())
1065 switch (
N->getOpcode()) {
1075 if (
N->isStrictFPOpcode())
1078 {
N->getOperand(0),
N->getOperand(1)});
1081 CurDAG->getNode(NewOpc,
SDLoc(
N),
N->getValueType(0),
1084 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
1094 if (!
N->getValueType(0).isVector())
1098 switch (
N->getOpcode()) {
1104 SDValue Res = CurDAG->getNode(NewOpc,
SDLoc(
N),
N->getValueType(0),
1105 N->getOperand(0),
N->getOperand(1));
1107 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
1116 if (!
N->getValueType(0).isVector())
1120 if (
N->getOperand(0).getScalarValueSizeInBits() == 1) {
1122 "Unexpected opcode for mask vector!");
1130 SDValue Res = CurDAG->getNode(NewOpc,
SDLoc(
N),
N->getValueType(0),
1133 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
1153 switch (
N->getOpcode()) {
1169 bool IsStrict =
N->isStrictFPOpcode();
1174 {
N->getOperand(0),
N->getOperand(1),
1175 CurDAG->getTargetConstant(Imm, dl,
MVT::i32)});
1179 CurDAG->getTargetConstant(Imm, dl,
MVT::i32));
1181 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
1192 MVT VT =
N->getSimpleValueType(0);
1207 if (Subtarget->hasSSE2()) {
1212 switch (
N->getOpcode()) {
1219 Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
1222 Res = CurDAG->getNode(
N->getOpcode(), dl, VecVT, Op0, Op1);
1225 CurDAG->getIntPtrConstant(0, dl));
1227 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
1237 !Subtarget->useIndirectThunkCalls() &&
1238 ((
N->getOpcode() ==
X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
1240 (Subtarget->is64Bit() ||
1241 !getTargetMachine().isPositionIndependent())))) {
1280 switch (
N->getOpcode()) {
1285 MVT SrcVT =
N->getOperand(0).getSimpleValueType();
1286 MVT DstVT =
N->getSimpleValueType(0);
1298 if (SrcIsSSE && DstIsSSE)
1301 if (!SrcIsSSE && !DstIsSSE) {
1306 if (
N->getConstantOperandVal(1))
1314 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1315 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1323 CurDAG->getEntryNode(), dl,
N->getOperand(0), MemTmp, MPI, MemVT);
1325 MemTmp, MPI, MemVT);
1332 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Result);
1341 MVT SrcVT =
N->getOperand(1).getSimpleValueType();
1342 MVT DstVT =
N->getSimpleValueType(0);
1354 if (SrcIsSSE && DstIsSSE)
1357 if (!SrcIsSSE && !DstIsSSE) {
1362 if (
N->getConstantOperandVal(2))
1370 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1371 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1382 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1), MemTmp};
1386 if (
N->getFlags().hasNoFPExcept()) {
1388 Flags.setNoFPExcept(
true);
1389 Store->setFlags(Flags);
1392 assert(SrcVT == MemVT &&
"Unexpected VT!");
1393 Store = CurDAG->getStore(
N->getOperand(0), dl,
N->getOperand(1), MemTmp,
1400 Result = CurDAG->getMemIntrinsicNode(
1403 if (
N->getFlags().hasNoFPExcept()) {
1405 Flags.setNoFPExcept(
true);
1409 assert(DstVT == MemVT &&
"Unexpected VT!");
1410 Result = CurDAG->getLoad(DstVT, dl, Store, MemTmp, MPI);
1418 CurDAG->ReplaceAllUsesWith(
N,
Result.getNode());
1432 CurDAG->RemoveDeadNodes();
1436bool X86DAGToDAGISel::tryOptimizeRem8Extend(
SDNode *
N) {
1437 unsigned Opc =
N->getMachineOpcode();
1438 if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
1439 Opc != X86::MOVSX64rr8)
1451 unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
1452 : X86::MOVSX32rr8_NOREX;
1457 if (Opc == X86::MOVSX64rr8) {
1462 ReplaceUses(
N, Extend);
1471void X86DAGToDAGISel::PostprocessISelDAG() {
1478 bool MadeChange =
false;
1479 while (Position != CurDAG->allnodes_begin()) {
1482 if (
N->use_empty() || !
N->isMachineOpcode())
1485 if (tryOptimizeRem8Extend(
N)) {
1492 unsigned Opc =
N->getMachineOpcode();
1493 if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
1494 Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
1495 N->getOperand(0) ==
N->getOperand(1) &&
1496 N->getOperand(0)->hasNUsesOfValue(2,
N->getOperand(0).getResNo()) &&
1497 N->getOperand(0).isMachineOpcode()) {
1499 unsigned N0Opc =
And.getMachineOpcode();
1500 if ((N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
1501 N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) &&
1502 !
And->hasAnyUseOfValue(1)) {
1507 ReplaceUses(
N,
Test);
1511 if ((N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
1512 N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) &&
1513 !
And->hasAnyUseOfValue(1)) {
1516 case X86::AND8rm: NewOpc = X86::TEST8mr;
break;
1517 case X86::AND16rm: NewOpc = X86::TEST16mr;
break;
1518 case X86::AND32rm: NewOpc = X86::TEST32mr;
break;
1519 case X86::AND64rm: NewOpc = X86::TEST64mr;
break;
1529 And.getOperand(6) };
1532 CurDAG->setNodeMemRefs(
1533 Test, cast<MachineSDNode>(
And.getNode())->memoperands());
1545 if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
1546 Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
1547 N->getOperand(0) ==
N->getOperand(1) &&
1548 N->isOnlyUserOf(
N->getOperand(0).getNode()) &&
1549 N->getOperand(0).isMachineOpcode() &&
1552 unsigned N0Opc =
And.getMachineOpcode();
1555 if (N0Opc == X86::KANDBrr ||
1556 (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
1557 N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
1561 case X86::KORTESTBrr: NewOpc = X86::KTESTBrr;
break;
1562 case X86::KORTESTWrr: NewOpc = X86::KTESTWrr;
break;
1563 case X86::KORTESTDrr: NewOpc = X86::KTESTDrr;
break;
1564 case X86::KORTESTQrr: NewOpc = X86::KTESTQrr;
break;
1570 ReplaceUses(
N, KTest);
1577 if (Opc != TargetOpcode::SUBREG_TO_REG)
1580 unsigned SubRegIdx =
N->getConstantOperandVal(2);
1581 if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
1592 case X86::VMOVAPDrr:
case X86::VMOVUPDrr:
1593 case X86::VMOVAPSrr:
case X86::VMOVUPSrr:
1594 case X86::VMOVDQArr:
case X86::VMOVDQUrr:
1595 case X86::VMOVAPDYrr:
case X86::VMOVUPDYrr:
1596 case X86::VMOVAPSYrr:
case X86::VMOVUPSYrr:
1597 case X86::VMOVDQAYrr:
case X86::VMOVDQUYrr:
1598 case X86::VMOVAPDZ128rr:
case X86::VMOVUPDZ128rr:
1599 case X86::VMOVAPSZ128rr:
case X86::VMOVUPSZ128rr:
1600 case X86::VMOVDQA32Z128rr:
case X86::VMOVDQU32Z128rr:
1601 case X86::VMOVDQA64Z128rr:
case X86::VMOVDQU64Z128rr:
1602 case X86::VMOVAPDZ256rr:
case X86::VMOVUPDZ256rr:
1603 case X86::VMOVAPSZ256rr:
case X86::VMOVUPSZ256rr:
1604 case X86::VMOVDQA32Z256rr:
case X86::VMOVDQU32Z256rr:
1605 case X86::VMOVDQA64Z256rr:
case X86::VMOVDQU64Z256rr:
1610 if (!
In.isMachineOpcode() ||
1611 In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1624 CurDAG->UpdateNodeOperands(
N,
N->getOperand(0), In,
N->getOperand(2));
1629 CurDAG->RemoveDeadNodes();
1634void X86DAGToDAGISel::emitSpecialCodeForMain() {
1635 if (Subtarget->isTargetCygMing()) {
1637 auto &
DL = CurDAG->getDataLayout();
1640 CLI.setChain(CurDAG->getRoot())
1642 CurDAG->getExternalSymbol(
"__main", TLI->getPointerTy(
DL)),
1646 CurDAG->setRoot(
Result.second);
1650void X86DAGToDAGISel::emitFunctionEntryCode() {
1653 if (
F.hasExternalLinkage() &&
F.getName() ==
"main")
1654 emitSpecialCodeForMain();
1664 return isInt<31>(Val);
1668 X86ISelAddressMode &AM) {
1673 int64_t Val = AM.Disp +
Offset;
1676 if (Val != 0 && (AM.ES || AM.MCSym))
1680 if (Subtarget->is64Bit()) {
1683 AM.hasSymbolicDisplacement()))
1687 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1696bool X86DAGToDAGISel::matchLoadInAddress(
LoadSDNode *
N, X86ISelAddressMode &AM,
1697 bool AllowSegmentRegForX32) {
1709 if (
auto *
C = dyn_cast<ConstantSDNode>(Address)) {
1710 if (
C->getSExtValue() == 0 && AM.Segment.getNode() ==
nullptr &&
1711 !IndirectTlsSegRefs &&
1712 (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1713 Subtarget->isTargetFuchsia())) {
1714 if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32)
1716 switch (
N->getPointerInfo().getAddrSpace()) {
1718 AM.Segment = CurDAG->getRegister(X86::GS,
MVT::i16);
1721 AM.Segment = CurDAG->getRegister(X86::FS,
MVT::i16);
1735bool X86DAGToDAGISel::matchWrapper(
SDValue N, X86ISelAddressMode &AM) {
1738 if (AM.hasSymbolicDisplacement())
1741 bool IsRIPRelTLS =
false;
1755 if (Subtarget->is64Bit() &&
1761 if (IsRIPRel && AM.hasBaseOrIndexReg())
1765 X86ISelAddressMode Backup = AM;
1769 if (
auto *
G = dyn_cast<GlobalAddressSDNode>(N0)) {
1770 AM.GV =
G->getGlobal();
1771 AM.SymbolFlags =
G->getTargetFlags();
1773 }
else if (
auto *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1774 AM.CP =
CP->getConstVal();
1775 AM.Alignment =
CP->getAlign();
1776 AM.SymbolFlags =
CP->getTargetFlags();
1778 }
else if (
auto *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1779 AM.ES = S->getSymbol();
1780 AM.SymbolFlags = S->getTargetFlags();
1781 }
else if (
auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1782 AM.MCSym = S->getMCSymbol();
1783 }
else if (
auto *J = dyn_cast<JumpTableSDNode>(N0)) {
1784 AM.JT = J->getIndex();
1785 AM.SymbolFlags = J->getTargetFlags();
1786 }
else if (
auto *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1787 AM.BlockAddr = BA->getBlockAddress();
1788 AM.SymbolFlags = BA->getTargetFlags();
1789 Offset = BA->getOffset();
1793 if (foldOffsetIntoAddress(
Offset, AM)) {
1799 AM.setBaseReg(CurDAG->getRegister(X86::RIP,
MVT::i64));
1807bool X86DAGToDAGISel::matchAddress(
SDValue N, X86ISelAddressMode &AM) {
1808 if (matchAddressRecursively(
N, AM, 0))
1815 if (Subtarget->isTarget64BitILP32() &&
1816 AM.BaseType == X86ISelAddressMode::RegBase &&
1817 AM.Base_Reg.getNode() !=
nullptr && AM.IndexReg.getNode() ==
nullptr) {
1818 SDValue Save_Base_Reg = AM.Base_Reg;
1819 if (
auto *LoadN = dyn_cast<LoadSDNode>(Save_Base_Reg)) {
1821 if (matchLoadInAddress(LoadN, AM,
true))
1822 AM.Base_Reg = Save_Base_Reg;
1828 if (AM.Scale == 2 &&
1829 AM.BaseType == X86ISelAddressMode::RegBase &&
1830 AM.Base_Reg.getNode() ==
nullptr) {
1831 AM.Base_Reg = AM.IndexReg;
1838 switch (
TM.getCodeModel()) {
1842 if (Subtarget->is64Bit() &&
1844 AM.BaseType == X86ISelAddressMode::RegBase &&
1845 AM.Base_Reg.getNode() ==
nullptr &&
1846 AM.IndexReg.getNode() ==
nullptr &&
1848 AM.hasSymbolicDisplacement())
1849 AM.Base_Reg = CurDAG->getRegister(X86::RIP,
MVT::i64);
1856bool X86DAGToDAGISel::matchAdd(
SDValue &
N, X86ISelAddressMode &AM,
1862 X86ISelAddressMode Backup = AM;
1863 if (!matchAddressRecursively(
N.getOperand(0), AM,
Depth+1) &&
1864 !matchAddressRecursively(Handle.getValue().getOperand(1), AM,
Depth+1))
1869 if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM,
1871 !matchAddressRecursively(Handle.getValue().getOperand(0), AM,
Depth + 1))
1878 if (AM.BaseType == X86ISelAddressMode::RegBase &&
1879 !AM.Base_Reg.getNode() &&
1880 !AM.IndexReg.getNode()) {
1881 N = Handle.getValue();
1882 AM.Base_Reg =
N.getOperand(0);
1883 AM.IndexReg =
N.getOperand(1);
1887 N = Handle.getValue();
1897 if (
N->getNodeId() == -1 ||
1917 X86ISelAddressMode &AM) {
1924 if (ScaleLog <= 0 || ScaleLog >= 4 ||
1925 Mask != (0xffu << ScaleLog))
1928 MVT VT =
N.getSimpleValueType();
1951 AM.Scale = (1 << ScaleLog);
1959 X86ISelAddressMode &AM) {
1965 int64_t Mask = cast<ConstantSDNode>(
N->getOperand(1))->getSExtValue();
1970 bool FoundAnyExtend =
false;
1974 FoundAnyExtend =
true;
1992 if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1995 MVT VT =
N.getSimpleValueType();
1997 if (FoundAnyExtend) {
2018 AM.Scale = 1 << ShiftAmt;
2019 AM.IndexReg = NewAnd;
2053 X86ISelAddressMode &AM) {
2064 unsigned AMShiftAmt = MaskTZ;
2068 if (AMShiftAmt == 0 || AMShiftAmt > 3)
return true;
2076 unsigned ScaleDown = (64 -
X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
2077 if (MaskLZ < ScaleDown)
2079 MaskLZ -= ScaleDown;
2087 bool ReplacingAnyExtend =
false;
2089 unsigned ExtendBits =
X.getSimpleValueType().getSizeInBits() -
2090 X.getOperand(0).getSimpleValueType().getSizeInBits();
2093 X =
X.getOperand(0);
2094 MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
2095 ReplacingAnyExtend =
true;
2097 APInt MaskedHighBits =
2100 if (MaskedHighBits != Known.
Zero)
return true;
2104 MVT VT =
N.getSimpleValueType();
2105 if (ReplacingAnyExtend) {
2106 assert(
X.getValueType() != VT);
2130 AM.Scale = 1 << AMShiftAmt;
2131 AM.IndexReg = NewSRL;
2141 X86ISelAddressMode &AM,
2149 if (!Subtarget.hasTBM() &&
2150 !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
2164 if (AMShiftAmt == 0 || AMShiftAmt > 3)
return true;
2166 MVT VT =
N.getSimpleValueType();
2189 AM.Scale = 1 << AMShiftAmt;
2190 AM.IndexReg = NewAnd;
2194bool X86DAGToDAGISel::matchAddressRecursively(
SDValue N, X86ISelAddressMode &AM,
2198 dbgs() <<
"MatchAddress: ";
2203 return matchAddressBase(
N, AM);
2208 if (AM.isRIPRelative()) {
2212 if (!(AM.ES || AM.MCSym) && AM.JT != -1)
2215 if (
auto *Cst = dyn_cast<ConstantSDNode>(
N))
2216 if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
2221 switch (
N.getOpcode()) {
2224 if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
2225 if (
const auto *ESNode = dyn_cast<MCSymbolSDNode>(
N.getOperand(0))) {
2227 AM.MCSym = ESNode->getMCSymbol();
2233 uint64_t Val = cast<ConstantSDNode>(
N)->getSExtValue();
2234 if (!foldOffsetIntoAddress(Val, AM))
2241 if (!matchWrapper(
N, AM))
2246 if (!matchLoadInAddress(cast<LoadSDNode>(
N), AM))
2251 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2252 AM.Base_Reg.getNode() ==
nullptr &&
2254 AM.BaseType = X86ISelAddressMode::FrameIndexBase;
2255 AM.Base_FrameIndex = cast<FrameIndexSDNode>(
N)->getIndex();
2261 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
2264 if (
auto *CN = dyn_cast<ConstantSDNode>(
N.getOperand(1))) {
2265 unsigned Val = CN->getZExtValue();
2270 if (Val == 1 || Val == 2 || Val == 3) {
2271 AM.Scale = 1 << Val;
2277 if (CurDAG->isBaseWithConstantOffset(ShVal)) {
2279 auto *AddVal = cast<ConstantSDNode>(ShVal.
getOperand(1));
2281 if (!foldOffsetIntoAddress(Disp, AM))
2285 AM.IndexReg = ShVal;
2293 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
break;
2297 assert(
N.getSimpleValueType().getSizeInBits() <= 64 &&
2298 "Unexpected value size!");
2307 if (!isa<ConstantSDNode>(
N.getOperand(1)) ||
2308 !isa<ConstantSDNode>(
And.getOperand(1)))
2310 uint64_t Mask =
And.getConstantOperandVal(1) >>
N.getConstantOperandVal(1);
2322 if (
N.getResNo() != 0)
break;
2327 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2328 AM.Base_Reg.getNode() ==
nullptr &&
2329 AM.IndexReg.getNode() ==
nullptr) {
2330 if (
auto *CN = dyn_cast<ConstantSDNode>(
N.getOperand(1)))
2331 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
2332 CN->getZExtValue() == 9) {
2333 AM.Scale =
unsigned(CN->getZExtValue())-1;
2344 auto *AddVal = cast<ConstantSDNode>(MulVal.
getOperand(1));
2345 uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
2346 if (foldOffsetIntoAddress(Disp, AM))
2347 Reg =
N.getOperand(0);
2349 Reg =
N.getOperand(0);
2352 AM.IndexReg = AM.Base_Reg =
Reg;
2371 X86ISelAddressMode Backup = AM;
2372 if (matchAddressRecursively(
N.getOperand(0), AM,
Depth+1)) {
2373 N = Handle.getValue();
2377 N = Handle.getValue();
2379 if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
2398 if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
2399 !AM.Base_Reg.getNode()->hasOneUse()) ||
2400 AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2404 if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
2405 ((AM.Disp != 0) && (Backup.Disp == 0)) +
2406 (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
2418 AM.NegateIndex =
true;
2424 if (!matchAdd(
N, AM,
Depth))
2435 if (CurDAG->haveNoCommonBitsSet(
N.getOperand(0),
N.getOperand(1)) &&
2453 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
break;
2457 assert(
N.getSimpleValueType().getSizeInBits() <= 64 &&
2458 "Unexpected value size!");
2460 if (!isa<ConstantSDNode>(
N.getOperand(1)))
2463 if (
N.getOperand(0).getOpcode() ==
ISD::SRL) {
2492 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
2498 if (Src.getOpcode() ==
ISD::AND && Src.hasOneUse())
2499 if (
auto *MaskC = dyn_cast<ConstantSDNode>(Src.getOperand(1))) {
2500 Mask = MaskC->getAPIntValue();
2501 Src = Src.getOperand(0);
2504 if (Src.getOpcode() !=
ISD::SHL || !Src.hasOneUse())
2508 SDValue ShlSrc = Src.getOperand(0);
2509 SDValue ShlAmt = Src.getOperand(1);
2510 auto *ShAmtC = dyn_cast<ConstantSDNode>(ShlAmt);
2513 unsigned ShAmtV = ShAmtC->getZExtValue();
2521 if (!CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask))
2529 MVT VT =
N.getSimpleValueType();
2533 if (!
Mask.isAllOnes()) {
2534 Res = CurDAG->getConstant(
Mask.lshr(ShAmtV),
DL, SrcVT);
2536 Res = CurDAG->getNode(
ISD::AND,
DL, SrcVT, ShlSrc, Res);
2545 AM.Scale = 1 << ShAmtV;
2548 CurDAG->ReplaceAllUsesWith(
N, NewShl);
2549 CurDAG->RemoveDeadNode(
N.getNode());
2554 return matchAddressBase(
N, AM);
2559bool X86DAGToDAGISel::matchAddressBase(
SDValue N, X86ISelAddressMode &AM) {
2561 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
2563 if (!AM.IndexReg.getNode()) {
2574 AM.BaseType = X86ISelAddressMode::RegBase;
2579bool X86DAGToDAGISel::matchVectorAddressRecursively(
SDValue N,
2580 X86ISelAddressMode &AM,
2584 dbgs() <<
"MatchVectorAddress: ";
2589 return matchAddressBase(
N, AM);
2592 switch (
N.getOpcode()) {
2594 uint64_t Val = cast<ConstantSDNode>(
N)->getSExtValue();
2595 if (!foldOffsetIntoAddress(Val, AM))
2600 if (!matchWrapper(
N, AM))
2608 X86ISelAddressMode Backup = AM;
2609 if (!matchVectorAddressRecursively(
N.getOperand(0), AM,
Depth + 1) &&
2610 !matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
2616 if (!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
2618 !matchVectorAddressRecursively(Handle.getValue().getOperand(0), AM,
2623 N = Handle.getValue();
2628 return matchAddressBase(
N, AM);
2634bool X86DAGToDAGISel::matchVectorAddress(
SDValue N, X86ISelAddressMode &AM) {
2635 return matchVectorAddressRecursively(
N, AM, 0);
2643 X86ISelAddressMode AM;
2644 AM.IndexReg = IndexOp;
2645 AM.Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
2649 AM.Segment = CurDAG->getRegister(X86::GS,
MVT::i16);
2651 AM.Segment = CurDAG->getRegister(X86::FS,
MVT::i16);
2653 AM.Segment = CurDAG->getRegister(X86::SS,
MVT::i16);
2659 if (matchVectorAddress(BasePtr, AM))
2662 getAddressOperands(AM,
DL, VT,
Base, Scale,
Index, Disp, Segment);
2676 X86ISelAddressMode AM;
2688 unsigned AddrSpace =
2689 cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2691 AM.Segment = CurDAG->getRegister(X86::GS,
MVT::i16);
2693 AM.Segment = CurDAG->getRegister(X86::FS,
MVT::i16);
2695 AM.Segment = CurDAG->getRegister(X86::SS,
MVT::i16);
2700 MVT VT =
N.getSimpleValueType();
2702 if (matchAddress(
N, AM))
2705 getAddressOperands(AM,
DL, VT,
Base, Scale,
Index, Disp, Segment);
2715 N =
N.getOperand(0);
2726 std::optional<ConstantRange> CR =
2727 cast<GlobalAddressSDNode>(
N)->getGlobal()->getAbsoluteSymbolRange();
2731 return CR->getUnsignedMax().ult(1ull << 32);
2740 if (!selectLEAAddr(
N,
Base, Scale,
Index, Disp, Segment))
2743 auto *
RN = dyn_cast<RegisterSDNode>(
Base);
2744 if (RN &&
RN->getReg() == 0)
2746 else if (
Base.getValueType() ==
MVT::i32 && !isa<FrameIndexSDNode>(
Base)) {
2750 Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit,
DL,
MVT::i64, ImplDef,
2754 RN = dyn_cast<RegisterSDNode>(
Index);
2755 if (RN &&
RN->getReg() == 0)
2759 "Expect to be extending 32-bit registers for use in LEA");
2762 Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit,
DL,
MVT::i64, ImplDef,
2771bool X86DAGToDAGISel::selectLEAAddr(
SDValue N,
2775 X86ISelAddressMode AM;
2779 MVT VT =
N.getSimpleValueType();
2786 if (matchAddress(
N, AM))
2791 unsigned Complexity = 0;
2792 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
2794 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2797 if (AM.IndexReg.getNode())
2810 if (AM.hasSymbolicDisplacement()) {
2812 if (Subtarget->is64Bit())
2822 auto isMathWithFlags = [](
SDValue V) {
2823 switch (
V.getOpcode()) {
2844 if (isMathWithFlags(
N.getOperand(0)) || isMathWithFlags(
N.getOperand(1)))
2852 if (Complexity <= 2)
2855 getAddressOperands(AM,
DL, VT,
Base, Scale,
Index, Disp, Segment);
2864 auto *GA = cast<GlobalAddressSDNode>(
N);
2866 X86ISelAddressMode AM;
2867 AM.GV = GA->getGlobal();
2868 AM.Disp += GA->getOffset();
2869 AM.SymbolFlags = GA->getTargetFlags();
2871 if (Subtarget->is32Bit()) {
2873 AM.IndexReg = CurDAG->getRegister(X86::EBX,
MVT::i32);
2876 MVT VT =
N.getSimpleValueType();
2877 getAddressOperands(AM,
SDLoc(
N), VT,
Base, Scale,
Index, Disp, Segment);
2885 EVT VT =
N.getValueType();
2886 bool WasTruncated =
false;
2888 WasTruncated =
true;
2889 N =
N.getOperand(0);
2898 unsigned Opc =
N.getOperand(0)->getOpcode();
2900 Op =
N.getOperand(0);
2903 return !WasTruncated;
2907 auto *GA = cast<GlobalAddressSDNode>(
N.getOperand(0));
2908 std::optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2909 if (!CR || CR->getUnsignedMax().uge(1ull << VT.
getSizeInBits()))
2913 Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(),
SDLoc(
N), VT,
2914 GA->getOffset(), GA->getTargetFlags());
2922 assert(Root &&
P &&
"Unknown root/parent nodes");
2924 !IsProfitableToFold(
N,
P, Root) ||
2925 !IsLegalToFold(
N,
P, Root, OptLevel))
2928 return selectAddr(
N.getNode(),
2929 N.getOperand(1),
Base, Scale,
Index, Disp, Segment);
2936 assert(Root &&
P &&
"Unknown root/parent nodes");
2938 !IsProfitableToFold(
N,
P, Root) ||
2939 !IsLegalToFold(
N,
P, Root, OptLevel))
2942 return selectAddr(
N.getNode(),
2943 N.getOperand(1),
Base, Scale,
Index, Disp, Segment);
2949SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2950 unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2952 return CurDAG->getRegister(GlobalBaseReg, TLI->
getPointerTy(
DL)).getNode();
2955bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(
unsigned Width,
SDNode *
N)
const {
2957 N =
N->getOperand(0).getNode();
2961 auto *GA = dyn_cast<GlobalAddressSDNode>(
N->getOperand(0));
2965 std::optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2969 return CR->getSignedMin().sge(-1ull << Width) &&
2970 CR->getSignedMax().slt(1ull << Width);
2974 assert(
N->isMachineOpcode() &&
"Unexpected node");
2975 unsigned Opc =
N->getMachineOpcode();
2976 const MCInstrDesc &MCID = getInstrInfo()->get(Opc);
2981 return static_cast<X86::CondCode>(
N->getConstantOperandVal(CondNo));
2986bool X86DAGToDAGISel::onlyUsesZeroFlag(
SDValue Flags)
const {
2991 if (UI.getUse().getResNo() !=
Flags.getResNo())
2995 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2999 FlagUE = UI->
use_end(); FlagUI != FlagUE; ++FlagUI) {
3001 if (FlagUI.getUse().getResNo() != 1)
continue;
3003 if (!FlagUI->isMachineOpcode())
return false;
3022bool X86DAGToDAGISel::hasNoSignFlagUses(
SDValue Flags)
const {
3027 if (UI.getUse().getResNo() !=
Flags.getResNo())
3031 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3035 FlagUE = UI->
use_end(); FlagUI != FlagUE; ++FlagUI) {
3037 if (FlagUI.getUse().getResNo() != 1)
continue;
3039 if (!FlagUI->isMachineOpcode())
return false;
3078 bool X86DAGToDAGISel::hasNoCarryFlagUses(
SDValue Flags)
const {
3083 if (UI.getUse().getResNo() !=
Flags.getResNo())
3086 unsigned UIOpc = UI->getOpcode();
3090 if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3094 FlagUI != FlagUE; ++FlagUI) {
3096 if (FlagUI.getUse().getResNo() != 1)
3099 if (!FlagUI->isMachineOpcode())
3140 if (StoredVal.
getResNo() != 0)
return false;
3154 LoadNode = cast<LoadSDNode>(Load);
3157 if (!Load.hasOneUse())
3165 bool FoundLoad =
false;
3169 const unsigned int Max = 1024;
3211 if (Chain == Load.getValue(1)) {
3217 if (Op == Load.getValue(1)) {
3233 if (Op.getNode() != LoadNode)
3266bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(
SDNode *
Node) {
3267 auto *StoreNode = cast<StoreSDNode>(
Node);
3274 EVT MemVT = StoreNode->getMemoryVT();
3279 bool IsCommutable =
false;
3280 bool IsNegate =
false;
3294 IsCommutable =
true;
3298 unsigned LoadOpNo = IsNegate ? 1 : 0;
3302 LoadNode, InputChain)) {
3309 LoadNode, InputChain))
3318 auto SelectOpcode = [&](
unsigned Opc64,
unsigned Opc32,
unsigned Opc16,
3339 unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
3349 if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) {
3353 if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.
getValue(1))) {
3356 ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
3357 : SelectOpcode(
X86::DEC64m,
X86::DEC32m,
X86::DEC16m,
X86::DEC8m);
3370 auto SelectRegOpcode = [SelectOpcode](
unsigned Opc) {
3373 return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
3376 return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
3379 return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
3382 return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
3385 return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
3388 return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
3390 return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
3396 auto SelectImm8Opcode = [SelectOpcode](
unsigned Opc) {
3399 return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
3401 return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
3403 return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
3405 return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
3407 return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
3409 return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
3411 return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
3416 auto SelectImmOpcode = [SelectOpcode](
unsigned Opc) {
3419 return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
3422 return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
3425 return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
3428 return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
3431 return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
3434 return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
3437 return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
3444 unsigned NewOpc = SelectRegOpcode(Opc);
3449 if (
auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
3450 int64_t OperandV = OperandC->getSExtValue();
3456 ((MemVT !=
MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
3457 (MemVT ==
MVT::i64 && !isInt<32>(OperandV) &&
3458 isInt<32>(-OperandV))) &&
3459 hasNoCarryFlagUses(StoredVal.
getValue(1))) {
3460 OperandV = -OperandV;
3466 if (MemVT !=
MVT::i8 && isInt<8>(OperandV)) {
3467 Operand = CurDAG->getTargetConstant(OperandV,
SDLoc(
Node), MemVT);
3468 NewOpc = SelectImm8Opcode(Opc);
3469 }
else if (MemVT !=
MVT::i64 || isInt<32>(OperandV)) {
3470 Operand = CurDAG->getTargetConstant(OperandV,
SDLoc(
Node), MemVT);
3471 NewOpc = SelectImmOpcode(Opc);
3477 CurDAG->getCopyToReg(InputChain,
SDLoc(
Node), X86::EFLAGS,
3481 Segment, Operand, CopyTo, CopyTo.
getValue(1)};
3486 Segment, Operand, InputChain};
3498 CurDAG->setNodeMemRefs(Result, MemOps);
3504 CurDAG->RemoveDeadNode(
Node);
3514bool X86DAGToDAGISel::matchBitExtract(
SDNode *
Node) {
3517 "Should be either an and-mask, or right-shift after clearing high bits.");
3520 if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
3523 MVT NVT =
Node->getSimpleValueType(0);
3534 const bool AllowExtraUsesByDefault = Subtarget->hasBMI2();
3535 auto checkUses = [AllowExtraUsesByDefault](
3537 std::optional<bool> AllowExtraUses) {
3538 return AllowExtraUses.value_or(AllowExtraUsesByDefault) ||
3539 Op.getNode()->hasNUsesOfValue(NUses,
Op.getResNo());
3541 auto checkOneUse = [checkUses](
SDValue Op,
3542 std::optional<bool> AllowExtraUses =
3544 return checkUses(Op, 1, AllowExtraUses);
3546 auto checkTwoUse = [checkUses](
SDValue Op,
3547 std::optional<bool> AllowExtraUses =
3549 return checkUses(Op, 2, AllowExtraUses);
3552 auto peekThroughOneUseTruncation = [checkOneUse](
SDValue V) {
3555 V.getOperand(0).getSimpleValueType() ==
MVT::i64 &&
3556 "Expected i64 -> i32 truncation");
3557 V =
V.getOperand(0);
3563 auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, &NBits,
3566 if (
Mask->getOpcode() !=
ISD::ADD || !checkOneUse(Mask))
3572 SDValue M0 = peekThroughOneUseTruncation(
Mask->getOperand(0));
3577 NBits =
M0->getOperand(1);
3578 NegateNBits =
false;
3582 auto isAllOnes = [
this, peekThroughOneUseTruncation, NVT](
SDValue V) {
3583 V = peekThroughOneUseTruncation(V);
3584 return CurDAG->MaskedValueIsAllOnes(
3590 auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
3593 if (
Mask.getOpcode() !=
ISD::XOR || !checkOneUse(Mask))
3596 if (!isAllOnes(
Mask->getOperand(1)))
3599 SDValue M0 = peekThroughOneUseTruncation(
Mask->getOperand(0));
3603 if (!isAllOnes(
M0->getOperand(0)))
3605 NBits =
M0->getOperand(1);
3606 NegateNBits =
false;
3612 auto canonicalizeShiftAmt = [&NBits, &NegateNBits](
SDValue ShiftAmt,
3613 unsigned Bitwidth) {
3618 NBits = NBits.getOperand(0);
3623 auto *V0 = dyn_cast<ConstantSDNode>(NBits.getOperand(0));
3624 if (!V0 || V0->getZExtValue() != Bitwidth)
3626 NBits = NBits.getOperand(1);
3627 NegateNBits =
false;
3633 auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, &NegateNBits,
3636 Mask = peekThroughOneUseTruncation(Mask);
3637 unsigned Bitwidth =
Mask.getSimpleValueType().getSizeInBits();
3639 if (
Mask.getOpcode() !=
ISD::SRL || !checkOneUse(Mask))
3646 if (!checkOneUse(
M1))
3648 canonicalizeShiftAmt(
M1, Bitwidth);
3653 return !NegateNBits;
3661 auto matchPatternD = [checkOneUse, checkTwoUse, canonicalizeShiftAmt,
3662 AllowExtraUsesByDefault, &NegateNBits,
3675 canonicalizeShiftAmt(N1, Bitwidth);
3679 const bool AllowExtraUses = AllowExtraUsesByDefault && !NegateNBits;
3680 if (!checkOneUse(N0, AllowExtraUses) || !checkTwoUse(N1, AllowExtraUses))
3686 auto matchLowBitMask = [matchPatternA, matchPatternB,
3688 return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
3692 X =
Node->getOperand(0);
3695 if (matchLowBitMask(Mask)) {
3699 if (!matchLowBitMask(Mask))
3702 }
else if (!matchPatternD(
Node))
3707 if (NegateNBits && !Subtarget->hasBMI2())
3719 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL,
MVT::i32), 0);
3724 NBits =
SDValue(CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG,
DL,
3725 MVT::i32, ImplDef, NBits, SRIdxVal),
3739 if (Subtarget->hasBMI2()) {
3749 SelectCode(Extract.
getNode());
3758 SDValue RealX = peekThroughOneUseTruncation(
X);
3764 MVT XVT =
X.getSimpleValueType();
3782 SDValue ShiftAmt =
X.getOperand(1);
3783 X =
X.getOperand(0);
3786 "Expected shift amount to be i8");
3790 SDValue OrigShiftAmt = ShiftAmt;
3815 SelectCode(Extract.
getNode());
3822 MVT NVT =
Node->getSimpleValueType(0);
3835 Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR());
3836 if (!PreferBEXTR && !Subtarget->hasBMI2())
3852 auto *MaskCst = dyn_cast<ConstantSDNode>(N1);
3853 auto *ShiftCst = dyn_cast<ConstantSDNode>(N0->
getOperand(1));
3854 if (!MaskCst || !ShiftCst)
3862 uint64_t Shift = ShiftCst->getZExtValue();
3867 if (Shift == 8 && MaskSize == 8)
3878 if (!PreferBEXTR && MaskSize <= 32)
3882 unsigned ROpc, MOpc;
3885 assert(Subtarget->hasBMI2() &&
"We must have BMI2's BZHI then.");
3889 Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
3890 ROpc = NVT ==
MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr;
3891 MOpc = NVT ==
MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm;
3892 unsigned NewOpc = NVT ==
MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3893 Control =
SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
3899 Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
3900 if (Subtarget->hasTBM()) {
3901 ROpc = NVT ==
MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
3902 MOpc = NVT ==
MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
3904 assert(Subtarget->hasBMI() &&
"We must have BMI1's BEXTR then.");
3906 ROpc = NVT ==
MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
3907 MOpc = NVT ==
MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
3908 unsigned NewOpc = NVT ==
MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3909 Control =
SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
3915 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3916 if (tryFoldLoad(
Node, N0.
getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3918 Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.
getOperand(0)};
3920 NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3924 CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
3926 NewNode = CurDAG->getMachineNode(ROpc, dl, NVT,
MVT::i32, Input, Control);
3931 SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT);
3932 unsigned NewOpc = NVT ==
MVT::i64 ? X86::SHR64ri : X86::SHR32ri;
3934 CurDAG->getMachineNode(NewOpc, dl, NVT,
SDValue(NewNode, 0), ShAmt);
3941MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(
unsigned ROpc,
unsigned MOpc,
3942 bool MayFoldLoad,
const SDLoc &dl,
3947 auto *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3948 Imm = CurDAG->getTargetConstant(*Val,
SDLoc(
Node),
Imm.getValueType());
3951 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3952 if (MayFoldLoad && tryFoldLoad(
Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3953 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
Imm,
3956 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3960 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3966 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3973MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(
unsigned ROpc,
unsigned MOpc,
3974 bool MayFoldLoad,
const SDLoc &dl,
3980 auto *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3981 Imm = CurDAG->getTargetConstant(*Val,
SDLoc(
Node),
Imm.getValueType());
3984 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3985 if (MayFoldLoad && tryFoldLoad(
Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3986 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
Imm,
3989 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3994 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
4000 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
4005bool X86DAGToDAGISel::tryShiftAmountMod(
SDNode *
N) {
4006 EVT VT =
N->getValueType(0);
4015 SDValue OrigShiftAmt =
N->getOperand(1);
4016 SDValue ShiftAmt = OrigShiftAmt;
4031 auto *Add0C = dyn_cast<ConstantSDNode>(Add0);
4032 auto *Add1C = dyn_cast<ConstantSDNode>(Add1);
4035 if (Add1C && Add1C->getAPIntValue().urem(
Size) == 0) {
4039 ((Add0C && Add0C->getAPIntValue().urem(
Size) ==
Size - 1) ||
4040 (Add1C && Add1C->getAPIntValue().urem(
Size) ==
Size - 1))) {
4044 assert(Add0C ==
nullptr || Add1C ==
nullptr);
4053 NewShiftAmt = CurDAG->getNode(
ISD::XOR,
DL, OpVT,
4054 Add0C ==
nullptr ? Add0 : Add1,
AllOnes);
4060 Add0C->getZExtValue() != 0) {
4063 if (Add0C->getZExtValue() %
Size == 0)
4066 Add0C->getZExtValue() % 32 == 0) {
4074 Add0 = CurDAG->getZExtOrTrunc(Add0,
DL, SubVT);
4078 X = CurDAG->getNode(
ISD::ADD,
DL, SubVT, Add1, Add0);
4112 SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(
N,
N->getOperand(0),
4114 if (UpdatedNode !=
N) {
4117 ReplaceNode(
N, UpdatedNode);
4124 CurDAG->RemoveDeadNode(OrigShiftAmt.
getNode());
4132bool X86DAGToDAGISel::tryShrinkShlLogicImm(
SDNode *
N) {
4133 MVT NVT =
N->getSimpleValueType(0);
4134 unsigned Opcode =
N->getOpcode();
4142 auto *Cst = dyn_cast<ConstantSDNode>(N1);
4146 int64_t Val = Cst->getSExtValue();
4151 bool FoundAnyExtend =
false;
4155 FoundAnyExtend =
true;
4166 auto *ShlCst = dyn_cast<ConstantSDNode>(Shift.
getOperand(1));
4170 uint64_t ShAmt = ShlCst->getZExtValue();
4174 uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
4175 if (Opcode !=
ISD::AND && (Val & RemovedBitsMask) != 0)
4180 auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
4184 ShiftedVal = (
uint64_t)Val >> ShAmt;
4185 if (NVT ==
MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4188 if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
4191 ShiftedVal = Val >> ShAmt;
4192 if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
4193 (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
4197 ShiftedVal = (
uint64_t)Val >> ShAmt;
4198 if (NVT ==
MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4205 if (!CanShrinkImmediate(ShiftedVal))
4215 unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
4221 NeededMask &= ~Cst->getAPIntValue();
4223 if (CurDAG->MaskedValueIsZero(
N->getOperand(0), NeededMask))
4228 if (FoundAnyExtend) {
4234 SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
4236 SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT,
X, NewCst);
4245bool X86DAGToDAGISel::matchVPTERNLOG(
SDNode *Root,
SDNode *ParentA,
4249 assert(
A.isOperandOf(ParentA) &&
B.isOperandOf(ParentB) &&
4250 C.isOperandOf(ParentC) &&
"Incorrect parent node");
4252 auto tryFoldLoadOrBCast =
4255 if (tryFoldLoad(Root,
P, L,
Base, Scale,
Index, Disp, Segment))
4261 L =
L.getOperand(0);
4268 auto *MemIntr = cast<MemIntrinsicSDNode>(L);
4269 unsigned Size = MemIntr->getMemoryVT().getSizeInBits();
4273 return tryFoldBroadcast(Root,
P, L,
Base, Scale,
Index, Disp, Segment);
4276 bool FoldedLoad =
false;
4277 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4278 if (tryFoldLoadOrBCast(Root, ParentC,
C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4280 }
else if (tryFoldLoadOrBCast(Root, ParentA,
A, Tmp0, Tmp1, Tmp2, Tmp3,
4285 uint8_t OldImm =
Imm;
4286 Imm = OldImm & 0xa5;
4287 if (OldImm & 0x02)
Imm |= 0x10;
4288 if (OldImm & 0x10)
Imm |= 0x02;
4289 if (OldImm & 0x08)
Imm |= 0x40;
4290 if (OldImm & 0x40)
Imm |= 0x08;
4291 }
else if (tryFoldLoadOrBCast(Root, ParentB,
B, Tmp0, Tmp1, Tmp2, Tmp3,
4296 uint8_t OldImm =
Imm;
4297 Imm = OldImm & 0x99;
4298 if (OldImm & 0x02)
Imm |= 0x04;
4299 if (OldImm & 0x04)
Imm |= 0x02;
4300 if (OldImm & 0x20)
Imm |= 0x40;
4301 if (OldImm & 0x40)
Imm |= 0x20;
4316 auto *MemIntr = cast<MemIntrinsicSDNode>(
C);
4317 unsigned EltSize = MemIntr->getMemoryVT().getSizeInBits();
4318 assert((EltSize == 32 || EltSize == 64) &&
"Unexpected broadcast size!");
4320 bool UseD = EltSize == 32;
4322 Opc = UseD ? X86::VPTERNLOGDZ128rmbi : X86::VPTERNLOGQZ128rmbi;
4324 Opc = UseD ? X86::VPTERNLOGDZ256rmbi : X86::VPTERNLOGQZ256rmbi;
4326 Opc = UseD ? X86::VPTERNLOGDZrmbi : X86::VPTERNLOGQZrmbi;
4332 Opc = UseD ? X86::VPTERNLOGDZ128rmi : X86::VPTERNLOGQZ128rmi;
4334 Opc = UseD ? X86::VPTERNLOGDZ256rmi : X86::VPTERNLOGQZ256rmi;
4336 Opc = UseD ? X86::VPTERNLOGDZrmi : X86::VPTERNLOGQZrmi;
4341 SDValue Ops[] = {
A,
B, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, TImm,
C.getOperand(0)};
4342 MNode = CurDAG->getMachineNode(Opc,
DL, VTs, Ops);
4345 ReplaceUses(
C.getValue(1),
SDValue(MNode, 1));
4347 CurDAG->setNodeMemRefs(MNode, {cast<MemSDNode>(
C)->getMemOperand()});
4352 Opc = UseD ? X86::VPTERNLOGDZ128rri : X86::VPTERNLOGQZ128rri;
4354 Opc = UseD ? X86::VPTERNLOGDZ256rri : X86::VPTERNLOGQZ256rri;
4356 Opc = UseD ? X86::VPTERNLOGDZrri : X86::VPTERNLOGQZrri;
4360 MNode = CurDAG->getMachineNode(Opc,
DL, NVT, {
A,
B,
C, TImm});
4364 CurDAG->RemoveDeadNode(Root);
4370bool X86DAGToDAGISel::tryVPTERNLOG(
SDNode *
N) {
4371 MVT NVT =
N->getSimpleValueType(0);
4374 if (!NVT.
isVector() || !Subtarget->hasAVX512() ||
4385 auto getFoldableLogicOp = [](
SDValue Op) {
4388 Op =
Op.getOperand(0);
4390 if (!
Op.hasOneUse())
4393 unsigned Opc =
Op.getOpcode();
4402 if ((FoldableOp = getFoldableLogicOp(N1))) {
4404 }
else if ((FoldableOp = getFoldableLogicOp(N0))) {
4417 uint8_t TernlogMagicA = 0xf0;
4418 uint8_t TernlogMagicB = 0xcc;
4419 uint8_t TernlogMagicC = 0xaa;
4428 Parent =
Op.getNode();
4429 Op =
Op.getOperand(0);
4433 PeekThroughNot(
A, ParentA, TernlogMagicA);
4434 PeekThroughNot(
B, ParentB, TernlogMagicB);
4435 PeekThroughNot(
C, ParentC, TernlogMagicC);
4440 case ISD::AND:
Imm = TernlogMagicB & TernlogMagicC;
break;
4441 case ISD::OR:
Imm = TernlogMagicB | TernlogMagicC;
break;
4442 case ISD::XOR:
Imm = TernlogMagicB ^ TernlogMagicC;
break;
4446 switch (
N->getOpcode()) {
4450 Imm &= ~TernlogMagicA;
4452 Imm = ~(
Imm) & TernlogMagicA;
4459 return matchVPTERNLOG(
N, ParentA, ParentB, ParentC,
A,
B,
C, Imm);
4469bool X86DAGToDAGISel::shrinkAndImmediate(
SDNode *
And) {
4472 MVT VT =
And->getSimpleValueType(0);
4476 auto *And1C = dyn_cast<ConstantSDNode>(
And->getOperand(1));
4485 APInt MaskVal = And1C->getAPIntValue();
4487 if (!MaskLZ || (VT ==
MVT::i64 && MaskLZ == 32))
4491 if (VT ==
MVT::i64 && MaskLZ >= 32) {
4493 MaskVal = MaskVal.
trunc(32);
4498 APInt NegMaskVal = MaskVal | HighZeros;
4508 NegMaskVal = NegMaskVal.
zext(64);
4509 HighZeros = HighZeros.
zext(64);
4514 if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
4534 bool FoldedBCast,
bool Masked) {
4535#define VPTESTM_CASE(VT, SUFFIX) \
4538 return IsTestN ? X86::VPTESTNM##SUFFIX##k: X86::VPTESTM##SUFFIX##k; \
4539 return IsTestN ? X86::VPTESTNM##SUFFIX : X86::VPTESTM##SUFFIX;
4542#define VPTESTM_BROADCAST_CASES(SUFFIX) \
4543default: llvm_unreachable("Unexpected VT!"); \
4544VPTESTM_CASE(v4i32, DZ128##SUFFIX) \
4545VPTESTM_CASE(v2i64, QZ128##SUFFIX) \
4546VPTESTM_CASE(v8i32, DZ256##SUFFIX) \
4547VPTESTM_CASE(v4i64, QZ256##SUFFIX) \
4548VPTESTM_CASE(v16i32, DZ##SUFFIX) \
4549VPTESTM_CASE(v8i64, QZ##SUFFIX)
4551#define VPTESTM_FULL_CASES(SUFFIX) \
4552VPTESTM_BROADCAST_CASES(SUFFIX) \
4553VPTESTM_CASE(v16i8, BZ128##SUFFIX) \
4554VPTESTM_CASE(v8i16, WZ128##SUFFIX) \
4555VPTESTM_CASE(v32i8, BZ256##SUFFIX) \
4556VPTESTM_CASE(v16i16, WZ256##SUFFIX) \
4557VPTESTM_CASE(v64i8, BZ##SUFFIX) \
4558VPTESTM_CASE(v32i16, WZ##SUFFIX)
4576#undef VPTESTM_FULL_CASES
4577#undef VPTESTM_BROADCAST_CASES
4583bool X86DAGToDAGISel::tryVPTESTM(
SDNode *Root,
SDValue Setcc,
4585 assert(Subtarget->hasAVX512() &&
"Expected AVX512!");
4635 if (tryFoldLoad(Root,
P, L,
Base, Scale,
Index, Disp, Segment))
4646 L =
L.getOperand(0);
4652 auto *MemIntr = cast<MemIntrinsicSDNode>(L);
4653 if (MemIntr->getMemoryVT().getSizeInBits() != CmpSVT.
getSizeInBits())
4656 return tryFoldBroadcast(Root,
P, L,
Base, Scale,
Index, Disp, Segment);
4660 bool CanFoldLoads = Src0 != Src1;
4662 bool FoldedLoad =
false;
4663 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4665 FoldedLoad = tryFoldLoadOrBCast(Root, N0.
getNode(), Src1, Tmp0, Tmp1, Tmp2,
4669 FoldedLoad = tryFoldLoadOrBCast(Root, N0.
getNode(), Src0, Tmp0, Tmp1,
4678 bool IsMasked = InMask.
getNode() !=
nullptr;
4691 SDValue ImplDef =
SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
4693 Src0 = CurDAG->getTargetInsertSubreg(
SubReg, dl, CmpVT, ImplDef, Src0);
4696 Src1 = CurDAG->getTargetInsertSubreg(
SubReg, dl, CmpVT, ImplDef, Src1);
4702 InMask =
SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4703 dl, MaskVT, InMask, RC), 0);
4708 unsigned Opc =
getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
4716 SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4718 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4720 SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4722 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4728 CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Src1)->getMemOperand()});
4731 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
4733 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
4740 CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4741 dl, ResVT,
SDValue(CNode, 0), RC);
4745 CurDAG->RemoveDeadNode(Root);
4751bool X86DAGToDAGISel::tryMatchBitSelect(
SDNode *
N) {
4754 MVT NVT =
N->getSimpleValueType(0);
4757 if (!NVT.
isVector() || !Subtarget->hasAVX512())