22#include "llvm/Config/llvm-config.h"
27#include "llvm/IR/IntrinsicsX86.h"
37#define DEBUG_TYPE "x86-isel"
38#define PASS_NAME "X86 DAG->DAG Instruction Selection"
40STATISTIC(NumLoadMoved,
"Number of loads moved below TokenFactor");
43 cl::desc(
"Enable setting constant bits to reduce size of mask immediates"),
47 "x86-promote-anyext-load",
cl::init(
true),
59 struct X86ISelAddressMode {
67 int Base_FrameIndex = 0;
76 const char *ES =
nullptr;
81 bool NegateIndex =
false;
83 X86ISelAddressMode() =
default;
85 bool hasSymbolicDisplacement()
const {
86 return GV !=
nullptr ||
CP !=
nullptr || ES !=
nullptr ||
87 MCSym !=
nullptr ||
JT != -1 || BlockAddr !=
nullptr;
90 bool hasBaseOrIndexReg()
const {
97 if (
BaseType != RegBase)
return false;
99 dyn_cast_or_null<RegisterSDNode>(Base_Reg.
getNode()))
100 return RegNode->getReg() == X86::RIP;
109#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
111 dbgs() <<
"X86ISelAddressMode " <<
this <<
'\n';
112 dbgs() <<
"Base_Reg ";
118 dbgs() <<
" Base.FrameIndex " << Base_FrameIndex <<
'\n';
119 dbgs() <<
" Scale " << Scale <<
'\n'
127 dbgs() <<
" Disp " << Disp <<
'\n'
149 dbgs() <<
" JT" <<
JT <<
" Align" << Alignment.
value() <<
'\n';
169 bool IndirectTlsSegRefs;
174 X86DAGToDAGISel() =
delete;
178 OptForMinSize(
false), IndirectTlsSegRefs(
false) {}
184 "indirect-tls-seg-refs");
189 "OptForMinSize implies OptForSize");
203#include "X86GenDAGISel.inc"
208 bool foldOffsetIntoAddress(
uint64_t Offset, X86ISelAddressMode &AM);
209 bool matchLoadInAddress(
LoadSDNode *
N, X86ISelAddressMode &AM,
210 bool AllowSegmentRegForX32 =
false);
211 bool matchWrapper(
SDValue N, X86ISelAddressMode &AM);
212 bool matchAddress(
SDValue N, X86ISelAddressMode &AM);
213 bool matchVectorAddress(
SDValue N, X86ISelAddressMode &AM);
214 bool matchAdd(
SDValue &
N, X86ISelAddressMode &AM,
unsigned Depth);
217 bool matchAddressRecursively(
SDValue N, X86ISelAddressMode &AM,
219 bool matchVectorAddressRecursively(
SDValue N, X86ISelAddressMode &AM,
221 bool matchAddressBase(
SDValue N, X86ISelAddressMode &AM);
250 return tryFoldLoad(
P,
P,
N,
Base, Scale,
Index, Disp, Segment);
258 bool isProfitableToFormMaskedOp(
SDNode *
N)
const;
263 std::vector<SDValue> &OutOps)
override;
265 void emitSpecialCodeForMain();
267 inline void getAddressOperands(X86ISelAddressMode &AM,
const SDLoc &
DL,
271 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
272 Base = CurDAG->getTargetFrameIndex(
273 AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
274 else if (AM.Base_Reg.getNode())
277 Base = CurDAG->getRegister(0, VT);
279 Scale = getI8Imm(AM.Scale,
DL);
282 if (AM.NegateIndex) {
283 unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r;
289 if (AM.IndexReg.getNode())
292 Index = CurDAG->getRegister(0, VT);
297 Disp = CurDAG->getTargetGlobalAddress(AM.GV,
SDLoc(),
301 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Alignment,
302 AM.Disp, AM.SymbolFlags);
304 assert(!AM.Disp &&
"Non-zero displacement is ignored with ES.");
305 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
306 }
else if (AM.MCSym) {
307 assert(!AM.Disp &&
"Non-zero displacement is ignored with MCSym.");
308 assert(AM.SymbolFlags == 0 &&
"oo");
309 Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
310 }
else if (AM.JT != -1) {
311 assert(!AM.Disp &&
"Non-zero displacement is ignored with JT.");
312 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
313 }
else if (AM.BlockAddr)
314 Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
317 Disp = CurDAG->getTargetConstant(AM.Disp,
DL, MVT::i32);
319 if (AM.Segment.getNode())
320 Segment = AM.Segment;
322 Segment = CurDAG->getRegister(0, MVT::i16);
331 bool shouldAvoidImmediateInstFormsForSize(
SDNode *
N)
const {
337 if (!CurDAG->shouldOptForSize())
347 if (
User->isMachineOpcode()) {
370 auto *
C = dyn_cast<ConstantSDNode>(
N);
371 if (
C && isInt<8>(
C->getSExtValue()))
391 (RegNode = dyn_cast_or_null<RegisterSDNode>(
393 if ((RegNode->
getReg() == X86::ESP) ||
394 (RegNode->
getReg() == X86::RSP))
403 return (UseCount > 1);
408 return CurDAG->getTargetConstant(Imm,
DL, MVT::i8);
413 return CurDAG->getTargetConstant(Imm,
DL, MVT::i32);
418 return CurDAG->getTargetConstant(Imm,
DL, MVT::i64);
423 assert((VecWidth == 128 || VecWidth == 256) &&
"Unexpected vector width");
425 MVT VecVT =
N->getOperand(0).getSimpleValueType();
431 assert((VecWidth == 128 || VecWidth == 256) &&
"Unexpected vector width");
433 MVT VecVT =
N->getSimpleValueType(0);
437 SDValue getPermuteVINSERTCommutedImmediate(
SDNode *
N,
unsigned VecWidth,
439 assert(VecWidth == 128 &&
"Unexpected vector width");
441 MVT VecVT =
N->getSimpleValueType(0);
443 assert((InsertIdx == 0 || InsertIdx == 1) &&
"Bad insertf128 index");
446 return getI8Imm(InsertIdx ? 0x02 : 0x30,
DL);
451 MVT VT =
N->getSimpleValueType(0);
454 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
456 CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, std::nullopt), 0);
457 if (VT == MVT::i64) {
459 CurDAG->getMachineNode(
460 TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
461 CurDAG->getTargetConstant(0, dl, MVT::i64), Zero,
462 CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)),
467 unsigned Opcode =
N->getOpcode();
469 "Unexpected opcode for SBB materialization");
470 unsigned FlagOpIndex = Opcode ==
X86ISD::SBB ? 2 : 1;
472 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
473 N->getOperand(FlagOpIndex),
SDValue());
477 unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr;
478 MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
479 VTs = CurDAG->getVTList(SBBVT, MVT::i32);
481 CurDAG->getMachineNode(Opc, dl, VTs,
482 {Zero, Zero, EFLAGS, EFLAGS.getValue(1)}),
488 bool isUnneededShiftMask(
SDNode *
N,
unsigned Width)
const {
490 const APInt &Val = cast<ConstantSDNode>(
N->getOperand(1))->getAPIntValue();
495 APInt Mask = Val | CurDAG->computeKnownBits(
N->getOperand(0)).Zero;
502 SDNode *getGlobalBaseReg();
513 return Subtarget->getInstrInfo();
526 bool isSExtAbsoluteSymbolRef(
unsigned Width,
SDNode *
N)
const;
530 if (!
N->isNonTemporal())
533 unsigned StoreSize =
N->getMemoryVT().getStoreSize();
535 if (
N->getAlign().value() < StoreSize)
544 return Subtarget->hasSSE41();
546 return Subtarget->hasAVX2();
548 return Subtarget->hasAVX512();
552 bool foldLoadStoreIntoMemOperand(
SDNode *
Node);
555 bool shrinkAndImmediate(
SDNode *
N);
556 bool isMaskZeroExtended(
SDNode *
N)
const;
557 bool tryShiftAmountMod(
SDNode *
N);
558 bool tryShrinkShlLogicImm(
SDNode *
N);
564 bool tryMatchBitSelect(
SDNode *
N);
566 MachineSDNode *emitPCMPISTR(
unsigned ROpc,
unsigned MOpc,
bool MayFoldLoad,
568 MachineSDNode *emitPCMPESTR(
unsigned ROpc,
unsigned MOpc,
bool MayFoldLoad,
572 bool tryOptimizeRem8Extend(
SDNode *
N);
574 bool onlyUsesZeroFlag(
SDValue Flags)
const;
575 bool hasNoSignFlagUses(
SDValue Flags)
const;
576 bool hasNoCarryFlagUses(
SDValue Flags)
const;
580char X86DAGToDAGISel::ID = 0;
587 unsigned Opcode =
N->getOpcode();
594 EVT OpVT =
N->getOperand(0).getValueType();
598 OpVT =
N->getOperand(1).getValueType();
600 return Subtarget->hasVLX();
614bool X86DAGToDAGISel::isMaskZeroExtended(
SDNode *
N)
const {
627 if (OptLevel == CodeGenOptLevel::None)
637 if (useNonTemporalLoad(cast<LoadSDNode>(
N)))
642 switch (
U->getOpcode()) {
668 if (
auto *Imm = dyn_cast<ConstantSDNode>(Op1)) {
669 if (
Imm->getAPIntValue().isSignedIntN(8))
678 Imm->getAPIntValue().getBitWidth() == 64 &&
679 Imm->getAPIntValue().isIntN(32))
686 (
Imm->getAPIntValue() == UINT8_MAX ||
687 Imm->getAPIntValue() == UINT16_MAX ||
688 Imm->getAPIntValue() == UINT32_MAX))
694 (-
Imm->getAPIntValue()).isSignedIntN(8))
698 (-
Imm->getAPIntValue()).isSignedIntN(8) &&
699 hasNoCarryFlagUses(
SDValue(U, 1)))
724 if (
U->getOperand(0).getOpcode() ==
ISD::SHL &&
728 if (
U->getOperand(1).getOpcode() ==
ISD::SHL &&
736 auto *
C = dyn_cast<ConstantSDNode>(U0.
getOperand(0));
737 if (
C &&
C->getSExtValue() == -2)
742 auto *
C = dyn_cast<ConstantSDNode>(U1.
getOperand(0));
743 if (
C &&
C->getSExtValue() == -2)
757 if (isa<ConstantSDNode>(
U->getOperand(1)))
778bool X86DAGToDAGISel::isProfitableToFormMaskedOp(
SDNode *
N)
const {
781 "Unexpected opcode!");
786 return N->getOperand(1).hasOneUse();
795 if (Chain.
getNode() == Load.getNode())
799 "Unexpected chain operand");
813 Load.getOperand(1), Load.getOperand(2));
817 Ops.
append(Call->op_begin() + 1, Call->op_end());
831 if (Callee.getNode() == Chain.
getNode() || !Callee.hasOneUse())
833 auto *LD = dyn_cast<LoadSDNode>(Callee.getNode());
851 if (isa<MemSDNode>(Chain.
getNode()) &&
852 cast<MemSDNode>(Chain.
getNode())->writeMem())
858 Callee.getValue(1).hasOneUse())
866 if ((Imm & 0x00FFFFFF) != 0x0F1EFA)
869 uint8_t OptionalPrefixBytes [] = {0x26, 0x2e, 0x36, 0x3e, 0x64,
870 0x65, 0x66, 0x67, 0xf0, 0xf2};
873 uint8_t Byte = (Imm >> i) & 0xFF;
884void X86DAGToDAGISel::PreprocessISelDAG() {
885 bool MadeChange =
false;
887 E = CurDAG->allnodes_end();
I !=
E; ) {
906 MVT VT =
N->getSimpleValueType(0);
907 int64_t
Imm = cast<ConstantSDNode>(
N)->getSExtValue();
908 int32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB;
915 SDValue Complement = CurDAG->getConstant(~Imm, dl, VT,
false,
true);
916 Complement = CurDAG->getNOT(dl, Complement, VT);
918 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Complement);
928 if (
N->getOpcode() ==
X86ISD::AND && !
N->hasAnyUseOfValue(1)) {
930 N->getOperand(0),
N->getOperand(1));
932 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
956 auto mayPreventLoadFold = [&]() {
958 N->getOpcode() ==
ISD::ADD && Subtarget->hasAVX() &&
959 !
N->getOperand(1).hasOneUse();
962 N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) {
968 MVT VT =
N->getSimpleValueType(0);
976 CurDAG->getNode(NewOpcode,
DL, VT,
N->getOperand(0),
AllOnes);
978 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
985 switch (
N->getOpcode()) {
987 MVT VT =
N->getSimpleValueType(0);
989 if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) {
990 MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8;
996 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
997 unsigned Index = VT == MVT::v32i16 ? 16 : 32;
999 CurDAG->getIntPtrConstant(
Index, dl));
1002 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
1011 MVT VT =
N->getSimpleValueType(0);
1013 if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) {
1014 MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8;
1015 auto *MemNode = cast<MemSDNode>(
N);
1017 SDVTList VTs = CurDAG->getVTList(NarrowVT, MVT::Other);
1018 SDValue Ops[] = {MemNode->getChain(), MemNode->getBasePtr()};
1019 SDValue NarrowBCast = CurDAG->getMemIntrinsicNode(
1021 MemNode->getMemOperand());
1024 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
1025 unsigned Index = VT == MVT::v32i16 ? 16 : 32;
1027 CurDAG->getIntPtrConstant(
Index, dl));
1031 CurDAG->ReplaceAllUsesWith(
N, To);
1041 EVT EleVT =
N->getOperand(0).getValueType().getVectorElementType();
1042 if (EleVT == MVT::i1)
1045 assert(Subtarget->hasSSE41() &&
"Expected SSE4.1 support!");
1046 assert(
N->getValueType(0).getVectorElementType() != MVT::i16 &&
1047 "We can't replace VSELECT with BLENDV in vXi16!");
1049 if (Subtarget->hasVLX() && CurDAG->ComputeNumSignBits(
N->getOperand(0)) ==
1052 N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1053 CurDAG->getTargetConstant(0xCA,
SDLoc(
N), MVT::i8));
1056 N->getOperand(0),
N->getOperand(1),
1060 CurDAG->ReplaceAllUsesWith(
N,
R.getNode());
1073 if (!
N->getSimpleValueType(0).isVector())
1077 switch (
N->getOpcode()) {
1087 if (
N->isStrictFPOpcode())
1089 CurDAG->getNode(NewOpc,
SDLoc(
N), {
N->getValueType(0), MVT::Other},
1090 {
N->getOperand(0),
N->getOperand(1)});
1093 CurDAG->getNode(NewOpc,
SDLoc(
N),
N->getValueType(0),
1096 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
1106 if (!
N->getValueType(0).isVector())
1110 switch (
N->getOpcode()) {
1116 SDValue Res = CurDAG->getNode(NewOpc,
SDLoc(
N),
N->getValueType(0),
1117 N->getOperand(0),
N->getOperand(1));
1119 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
1128 if (!
N->getValueType(0).isVector())
1132 if (
N->getOperand(0).getScalarValueSizeInBits() == 1) {
1134 "Unexpected opcode for mask vector!");
1142 SDValue Res = CurDAG->getNode(NewOpc,
SDLoc(
N),
N->getValueType(0),
1145 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
1165 switch (
N->getOpcode()) {
1181 bool IsStrict =
N->isStrictFPOpcode();
1185 {
N->getValueType(0), MVT::Other},
1186 {
N->getOperand(0),
N->getOperand(1),
1187 CurDAG->getTargetConstant(Imm, dl, MVT::i32)});
1191 CurDAG->getTargetConstant(Imm, dl, MVT::i32));
1193 CurDAG->ReplaceAllUsesWith(
N, Res.
getNode());
1204 MVT VT =
N->getSimpleValueType(0);
1205 if (VT.
isVector() || VT == MVT::f128)
1208 MVT VecVT = VT == MVT::f64 ? MVT::v2f64
1209 : VT == MVT::f32 ? MVT::v4f32
1219 if (Subtarget->hasSSE2()) {
1224 switch (
N->getOpcode()) {
1231 Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
1234 Res = CurDAG->getNode(
N->getOpcode(), dl, VecVT, Op0, Op1);
1237 CurDAG->getIntPtrConstant(0, dl));
1239 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Res);
1246 if (OptLevel != CodeGenOptLevel::None &&
1249 !Subtarget->useIndirectThunkCalls() &&
1250 ((
N->getOpcode() ==
X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
1252 (Subtarget->is64Bit() ||
1253 !getTargetMachine().isPositionIndependent())))) {
1292 switch (
N->getOpcode()) {
1297 MVT SrcVT =
N->getOperand(0).getSimpleValueType();
1298 MVT DstVT =
N->getSimpleValueType(0);
1310 if (SrcIsSSE && DstIsSSE)
1313 if (!SrcIsSSE && !DstIsSSE) {
1318 if (
N->getConstantOperandVal(1))
1326 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1327 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1335 CurDAG->getEntryNode(), dl,
N->getOperand(0), MemTmp, MPI, MemVT);
1337 MemTmp, MPI, MemVT);
1344 CurDAG->ReplaceAllUsesOfValueWith(
SDValue(
N, 0), Result);
1353 MVT SrcVT =
N->getOperand(1).getSimpleValueType();
1354 MVT DstVT =
N->getSimpleValueType(0);
1366 if (SrcIsSSE && DstIsSSE)
1369 if (!SrcIsSSE && !DstIsSSE) {
1374 if (
N->getConstantOperandVal(2))
1382 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1383 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1393 SDVTList VTs = CurDAG->getVTList(MVT::Other);
1394 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1), MemTmp};
1398 if (
N->getFlags().hasNoFPExcept()) {
1400 Flags.setNoFPExcept(
true);
1401 Store->setFlags(Flags);
1404 assert(SrcVT == MemVT &&
"Unexpected VT!");
1405 Store = CurDAG->getStore(
N->getOperand(0), dl,
N->getOperand(1), MemTmp,
1410 SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other);
1412 Result = CurDAG->getMemIntrinsicNode(
1415 if (
N->getFlags().hasNoFPExcept()) {
1417 Flags.setNoFPExcept(
true);
1421 assert(DstVT == MemVT &&
"Unexpected VT!");
1422 Result = CurDAG->getLoad(DstVT, dl, Store, MemTmp, MPI);
1430 CurDAG->ReplaceAllUsesWith(
N,
Result.getNode());
1444 CurDAG->RemoveDeadNodes();
1448bool X86DAGToDAGISel::tryOptimizeRem8Extend(
SDNode *
N) {
1449 unsigned Opc =
N->getMachineOpcode();
1450 if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
1451 Opc != X86::MOVSX64rr8)
1463 unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
1464 : X86::MOVSX32rr8_NOREX;
1469 if (Opc == X86::MOVSX64rr8) {
1474 ReplaceUses(
N, Extend);
1483void X86DAGToDAGISel::PostprocessISelDAG() {
1485 if (
TM.getOptLevel() == CodeGenOptLevel::None)
1490 bool MadeChange =
false;
1491 while (Position != CurDAG->allnodes_begin()) {
1494 if (
N->use_empty() || !
N->isMachineOpcode())
1497 if (tryOptimizeRem8Extend(
N)) {
1504 unsigned Opc =
N->getMachineOpcode();
1505 if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
1506 Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
1507 N->getOperand(0) ==
N->getOperand(1) &&
1508 N->getOperand(0)->hasNUsesOfValue(2,
N->getOperand(0).getResNo()) &&
1509 N->getOperand(0).isMachineOpcode()) {
1511 unsigned N0Opc =
And.getMachineOpcode();
1512 if ((N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
1513 N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) &&
1514 !
And->hasAnyUseOfValue(1)) {
1519 ReplaceUses(
N,
Test);
1523 if ((N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
1524 N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) &&
1525 !
And->hasAnyUseOfValue(1)) {
1528 case X86::AND8rm: NewOpc = X86::TEST8mr;
break;
1529 case X86::AND16rm: NewOpc = X86::TEST16mr;
break;
1530 case X86::AND32rm: NewOpc = X86::TEST32mr;
break;
1531 case X86::AND64rm: NewOpc = X86::TEST64mr;
break;
1541 And.getOperand(6) };
1543 MVT::i32, MVT::Other, Ops);
1544 CurDAG->setNodeMemRefs(
1545 Test, cast<MachineSDNode>(
And.getNode())->memoperands());
1557 if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
1558 Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
1559 N->getOperand(0) ==
N->getOperand(1) &&
1560 N->isOnlyUserOf(
N->getOperand(0).getNode()) &&
1561 N->getOperand(0).isMachineOpcode() &&
1564 unsigned N0Opc =
And.getMachineOpcode();
1567 if (N0Opc == X86::KANDBrr ||
1568 (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
1569 N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
1573 case X86::KORTESTBrr: NewOpc = X86::KTESTBrr;
break;
1574 case X86::KORTESTWrr: NewOpc = X86::KTESTWrr;
break;
1575 case X86::KORTESTDrr: NewOpc = X86::KTESTDrr;
break;
1576 case X86::KORTESTQrr: NewOpc = X86::KTESTQrr;
break;
1582 ReplaceUses(
N, KTest);
1589 if (Opc != TargetOpcode::SUBREG_TO_REG)
1592 unsigned SubRegIdx =
N->getConstantOperandVal(2);
1593 if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
1604 case X86::VMOVAPDrr:
case X86::VMOVUPDrr:
1605 case X86::VMOVAPSrr:
case X86::VMOVUPSrr:
1606 case X86::VMOVDQArr:
case X86::VMOVDQUrr:
1607 case X86::VMOVAPDYrr:
case X86::VMOVUPDYrr:
1608 case X86::VMOVAPSYrr:
case X86::VMOVUPSYrr:
1609 case X86::VMOVDQAYrr:
case X86::VMOVDQUYrr:
1610 case X86::VMOVAPDZ128rr:
case X86::VMOVUPDZ128rr:
1611 case X86::VMOVAPSZ128rr:
case X86::VMOVUPSZ128rr:
1612 case X86::VMOVDQA32Z128rr:
case X86::VMOVDQU32Z128rr:
1613 case X86::VMOVDQA64Z128rr:
case X86::VMOVDQU64Z128rr:
1614 case X86::VMOVAPDZ256rr:
case X86::VMOVUPDZ256rr:
1615 case X86::VMOVAPSZ256rr:
case X86::VMOVUPSZ256rr:
1616 case X86::VMOVDQA32Z256rr:
case X86::VMOVDQU32Z256rr:
1617 case X86::VMOVDQA64Z256rr:
case X86::VMOVDQU64Z256rr:
1622 if (!
In.isMachineOpcode() ||
1623 In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1636 CurDAG->UpdateNodeOperands(
N,
N->getOperand(0), In,
N->getOperand(2));
1641 CurDAG->RemoveDeadNodes();
1646void X86DAGToDAGISel::emitSpecialCodeForMain() {
1647 if (Subtarget->isTargetCygMing()) {
1649 auto &
DL = CurDAG->getDataLayout();
1652 CLI.setChain(CurDAG->getRoot())
1654 CurDAG->getExternalSymbol(
"__main", TLI->getPointerTy(
DL)),
1658 CurDAG->setRoot(
Result.second);
1662void X86DAGToDAGISel::emitFunctionEntryCode() {
1665 if (
F.hasExternalLinkage() &&
F.getName() ==
"main")
1666 emitSpecialCodeForMain();
1676 return isInt<31>(Val);
1680 X86ISelAddressMode &AM) {
1685 int64_t Val = AM.Disp +
Offset;
1688 if (Val != 0 && (AM.ES || AM.MCSym))
1692 if (Subtarget->is64Bit()) {
1695 AM.hasSymbolicDisplacement()))
1699 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1708bool X86DAGToDAGISel::matchLoadInAddress(
LoadSDNode *
N, X86ISelAddressMode &AM,
1709 bool AllowSegmentRegForX32) {
1721 if (
isNullConstant(Address) && AM.Segment.getNode() ==
nullptr &&
1722 !IndirectTlsSegRefs &&
1723 (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1724 Subtarget->isTargetFuchsia())) {
1725 if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32)
1727 switch (
N->getPointerInfo().getAddrSpace()) {
1729 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1732 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1745bool X86DAGToDAGISel::matchWrapper(
SDValue N, X86ISelAddressMode &AM) {
1748 if (AM.hasSymbolicDisplacement())
1751 bool IsRIPRelTLS =
false;
1765 if (Subtarget->is64Bit() &&
1771 if (IsRIPRel && AM.hasBaseOrIndexReg())
1775 X86ISelAddressMode Backup = AM;
1779 if (
auto *
G = dyn_cast<GlobalAddressSDNode>(N0)) {
1780 AM.GV =
G->getGlobal();
1781 AM.SymbolFlags =
G->getTargetFlags();
1783 }
else if (
auto *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1784 AM.CP =
CP->getConstVal();
1785 AM.Alignment =
CP->getAlign();
1786 AM.SymbolFlags =
CP->getTargetFlags();
1788 }
else if (
auto *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1789 AM.ES = S->getSymbol();
1790 AM.SymbolFlags = S->getTargetFlags();
1791 }
else if (
auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1792 AM.MCSym = S->getMCSymbol();
1793 }
else if (
auto *J = dyn_cast<JumpTableSDNode>(N0)) {
1794 AM.JT = J->getIndex();
1795 AM.SymbolFlags = J->getTargetFlags();
1796 }
else if (
auto *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1797 AM.BlockAddr = BA->getBlockAddress();
1798 AM.SymbolFlags = BA->getTargetFlags();
1799 Offset = BA->getOffset();
1803 if (foldOffsetIntoAddress(
Offset, AM)) {
1809 AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1817bool X86DAGToDAGISel::matchAddress(
SDValue N, X86ISelAddressMode &AM) {
1818 if (matchAddressRecursively(
N, AM, 0))
1825 if (Subtarget->isTarget64BitILP32() &&
1826 AM.BaseType == X86ISelAddressMode::RegBase &&
1827 AM.Base_Reg.getNode() !=
nullptr && AM.IndexReg.getNode() ==
nullptr) {
1828 SDValue Save_Base_Reg = AM.Base_Reg;
1829 if (
auto *LoadN = dyn_cast<LoadSDNode>(Save_Base_Reg)) {
1831 if (matchLoadInAddress(LoadN, AM,
true))
1832 AM.Base_Reg = Save_Base_Reg;
1838 if (AM.Scale == 2 &&
1839 AM.BaseType == X86ISelAddressMode::RegBase &&
1840 AM.Base_Reg.getNode() ==
nullptr) {
1841 AM.Base_Reg = AM.IndexReg;
1848 switch (
TM.getCodeModel()) {
1852 if (Subtarget->is64Bit() &&
1854 AM.BaseType == X86ISelAddressMode::RegBase &&
1855 AM.Base_Reg.getNode() ==
nullptr &&
1856 AM.IndexReg.getNode() ==
nullptr &&
1858 AM.hasSymbolicDisplacement())
1859 AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1866bool X86DAGToDAGISel::matchAdd(
SDValue &
N, X86ISelAddressMode &AM,
1872 X86ISelAddressMode Backup = AM;
1873 if (!matchAddressRecursively(
N.getOperand(0), AM,
Depth+1) &&
1874 !matchAddressRecursively(Handle.getValue().getOperand(1), AM,
Depth+1))
1879 if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM,
1881 !matchAddressRecursively(Handle.getValue().getOperand(0), AM,
Depth + 1))
1888 if (AM.BaseType == X86ISelAddressMode::RegBase &&
1889 !AM.Base_Reg.getNode() &&
1890 !AM.IndexReg.getNode()) {
1891 N = Handle.getValue();
1892 AM.Base_Reg =
N.getOperand(0);
1893 AM.IndexReg =
N.getOperand(1);
1897 N = Handle.getValue();
1907 if (
N->getNodeId() == -1 ||
1927 X86ISelAddressMode &AM) {
1934 if (ScaleLog <= 0 || ScaleLog >= 4 ||
1935 Mask != (0xffu << ScaleLog))
1938 MVT XVT =
X.getSimpleValueType();
1939 MVT VT =
N.getSimpleValueType();
1964 AM.Scale = (1 << ScaleLog);
1972 X86ISelAddressMode &AM) {
1978 int64_t Mask = cast<ConstantSDNode>(
N->getOperand(1))->getSExtValue();
1983 bool FoundAnyExtend =
false;
1987 FoundAnyExtend =
true;
2005 if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
2008 MVT VT =
N.getSimpleValueType();
2010 if (FoundAnyExtend) {
2031 AM.Scale = 1 << ShiftAmt;
2032 AM.IndexReg = NewAnd;
2066 X86ISelAddressMode &AM) {
2072 unsigned MaskIdx, MaskLen;
2075 unsigned MaskLZ = 64 - (MaskIdx + MaskLen);
2081 unsigned AMShiftAmt = MaskIdx;
2085 if (AMShiftAmt == 0 || AMShiftAmt > 3)
return true;
2089 unsigned ScaleDown = (64 -
X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
2090 if (MaskLZ < ScaleDown)
2092 MaskLZ -= ScaleDown;
2100 bool ReplacingAnyExtend =
false;
2102 unsigned ExtendBits =
X.getSimpleValueType().getSizeInBits() -
2103 X.getOperand(0).getSimpleValueType().getSizeInBits();
2106 X =
X.getOperand(0);
2107 MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
2108 ReplacingAnyExtend =
true;
2110 APInt MaskedHighBits =
2117 MVT VT =
N.getSimpleValueType();
2118 if (ReplacingAnyExtend) {
2119 assert(
X.getValueType() != VT);
2126 MVT XVT =
X.getSimpleValueType();
2147 AM.Scale = 1 << AMShiftAmt;
2148 AM.IndexReg = NewExt;
2158 X86ISelAddressMode &AM,
2166 if (!Subtarget.hasTBM() &&
2167 !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
2171 unsigned MaskIdx, MaskLen;
2179 unsigned AMShiftAmt = MaskIdx;
2183 if (AMShiftAmt == 0 || AMShiftAmt > 3)
return true;
2185 MVT XVT =
X.getSimpleValueType();
2186 MVT VT =
N.getSimpleValueType();
2211 AM.Scale = 1 << AMShiftAmt;
2212 AM.IndexReg = NewExt;
2219 X86ISelAddressMode &AM,
2221 assert(AM.IndexReg.getNode() ==
nullptr &&
"IndexReg already matched");
2222 assert((AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8) &&
2223 "Illegal index scale");
2229 EVT VT =
N.getValueType();
2230 unsigned Opc =
N.getOpcode();
2233 if (CurDAG->isBaseWithConstantOffset(
N)) {
2234 auto *AddVal = cast<ConstantSDNode>(
N.getOperand(1));
2236 if (!foldOffsetIntoAddress(
Offset, AM))
2237 return matchIndexRecursively(
N.getOperand(0), AM,
Depth + 1);
2241 if (Opc ==
ISD::ADD &&
N.getOperand(0) ==
N.getOperand(1)) {
2242 if (AM.Scale <= 4) {
2244 return matchIndexRecursively(
N.getOperand(0), AM,
Depth + 1);
2250 uint64_t ShiftAmt =
N.getConstantOperandVal(1);
2251 uint64_t ScaleAmt = 1ULL << ShiftAmt;
2252 if ((AM.Scale * ScaleAmt) <= 8) {
2253 AM.Scale *= ScaleAmt;
2254 return matchIndexRecursively(
N.getOperand(0), AM,
Depth + 1);
2262 if (Src.getOpcode() ==
ISD::ADD && Src->getFlags().hasNoSignedWrap() &&
2264 if (CurDAG->isBaseWithConstantOffset(Src)) {
2265 SDValue AddSrc = Src.getOperand(0);
2266 auto *AddVal = cast<ConstantSDNode>(Src.getOperand(1));
2268 if (!foldOffsetIntoAddress(
Offset * AM.Scale, AM)) {
2270 SDValue ExtSrc = CurDAG->getNode(Opc,
DL, VT, AddSrc);
2276 CurDAG->ReplaceAllUsesWith(
N, ExtAdd);
2277 CurDAG->RemoveDeadNode(
N.getNode());
2289 unsigned SrcOpc = Src.getOpcode();
2290 if (((SrcOpc ==
ISD::ADD && Src->getFlags().hasNoUnsignedWrap()) ||
2291 CurDAG->isADDLike(Src)) &&
2293 if (CurDAG->isBaseWithConstantOffset(Src)) {
2294 SDValue AddSrc = Src.getOperand(0);
2295 auto *AddVal = cast<ConstantSDNode>(Src.getOperand(1));
2297 if (!foldOffsetIntoAddress(
Offset * AM.Scale, AM)) {
2299 SDValue ExtSrc = CurDAG->getNode(Opc,
DL, VT, AddSrc);
2301 SDValue ExtAdd = CurDAG->getNode(SrcOpc,
DL, VT, ExtSrc, ExtVal);
2305 CurDAG->ReplaceAllUsesWith(
N, ExtAdd);
2306 CurDAG->RemoveDeadNode(
N.getNode());
2317bool X86DAGToDAGISel::matchAddressRecursively(
SDValue N, X86ISelAddressMode &AM,
2321 dbgs() <<
"MatchAddress: ";
2326 return matchAddressBase(
N, AM);
2331 if (AM.isRIPRelative()) {
2335 if (!(AM.ES || AM.MCSym) && AM.JT != -1)
2338 if (
auto *Cst = dyn_cast<ConstantSDNode>(
N))
2339 if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
2344 switch (
N.getOpcode()) {
2347 if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
2348 if (
const auto *ESNode = dyn_cast<MCSymbolSDNode>(
N.getOperand(0))) {
2350 AM.MCSym = ESNode->getMCSymbol();
2356 uint64_t Val = cast<ConstantSDNode>(
N)->getSExtValue();
2357 if (!foldOffsetIntoAddress(Val, AM))
2364 if (!matchWrapper(
N, AM))
2369 if (!matchLoadInAddress(cast<LoadSDNode>(
N), AM))
2374 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2375 AM.Base_Reg.getNode() ==
nullptr &&
2377 AM.BaseType = X86ISelAddressMode::FrameIndexBase;
2378 AM.Base_FrameIndex = cast<FrameIndexSDNode>(
N)->getIndex();
2384 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
2387 if (
auto *CN = dyn_cast<ConstantSDNode>(
N.getOperand(1))) {
2388 unsigned Val = CN->getZExtValue();
2393 if (Val == 1 || Val == 2 || Val == 3) {
2395 AM.Scale = 1 << Val;
2396 AM.IndexReg = matchIndexRecursively(ShVal, AM,
Depth + 1);
2404 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
break;
2408 assert(
N.getSimpleValueType().getSizeInBits() <= 64 &&
2409 "Unexpected value size!");
2418 if (!isa<ConstantSDNode>(
N.getOperand(1)) ||
2419 !isa<ConstantSDNode>(
And.getOperand(1)))
2421 uint64_t Mask =
And.getConstantOperandVal(1) >>
N.getConstantOperandVal(1);
2433 if (
N.getResNo() != 0)
break;
2438 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2439 AM.Base_Reg.getNode() ==
nullptr &&
2440 AM.IndexReg.getNode() ==
nullptr) {
2441 if (
auto *CN = dyn_cast<ConstantSDNode>(
N.getOperand(1)))
2442 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
2443 CN->getZExtValue() == 9) {
2444 AM.Scale =
unsigned(CN->getZExtValue())-1;
2455 auto *AddVal = cast<ConstantSDNode>(MulVal.
getOperand(1));
2456 uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
2457 if (foldOffsetIntoAddress(Disp, AM))
2458 Reg =
N.getOperand(0);
2460 Reg =
N.getOperand(0);
2463 AM.IndexReg = AM.Base_Reg =
Reg;
2482 X86ISelAddressMode Backup = AM;
2483 if (matchAddressRecursively(
N.getOperand(0), AM,
Depth+1)) {
2484 N = Handle.getValue();
2488 N = Handle.getValue();
2490 if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
2505 RHS.getOperand(0).getValueType() == MVT::i32))
2509 if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
2510 !AM.Base_Reg.getNode()->hasOneUse()) ||
2511 AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2515 if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
2516 ((AM.Disp != 0) && (Backup.Disp == 0)) +
2517 (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
2529 AM.NegateIndex =
true;
2537 if (!CurDAG->isADDLike(
N))
2541 if (!matchAdd(
N, AM,
Depth))
2550 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
break;
2554 assert(
N.getSimpleValueType().getSizeInBits() <= 64 &&
2555 "Unexpected value size!");
2557 if (!isa<ConstantSDNode>(
N.getOperand(1)))
2560 if (
N.getOperand(0).getOpcode() ==
ISD::SRL) {
2589 if (AM.IndexReg.getNode() !=
nullptr || AM.Scale != 1)
2595 if (Src.getOpcode() ==
ISD::AND && Src.hasOneUse())
2596 if (
auto *MaskC = dyn_cast<ConstantSDNode>(Src.getOperand(1))) {
2597 Mask = MaskC->getAPIntValue();
2598 Src = Src.getOperand(0);
2601 if (Src.getOpcode() ==
ISD::SHL && Src.hasOneUse()) {
2603 SDValue ShlSrc = Src.getOperand(0);
2604 SDValue ShlAmt = Src.getOperand(1);
2605 auto *ShAmtC = dyn_cast<ConstantSDNode>(ShlAmt);
2608 unsigned ShAmtV = ShAmtC->getZExtValue();
2616 if (!CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask))
2624 MVT VT =
N.getSimpleValueType();
2628 if (!
Mask.isAllOnes()) {
2629 Res = CurDAG->getConstant(
Mask.lshr(ShAmtV),
DL, SrcVT);
2631 Res = CurDAG->getNode(
ISD::AND,
DL, SrcVT, ShlSrc, Res);
2640 AM.Scale = 1 << ShAmtV;
2643 CurDAG->ReplaceAllUsesWith(
N, NewShl);
2644 CurDAG->RemoveDeadNode(
N.getNode());
2648 if (Src.getOpcode() ==
ISD::SRL && !
Mask.isAllOnes()) {
2651 Src.getOperand(0), AM))
2656 Src.getOperand(0), AM))
2661 Src.getOperand(0), AM, *Subtarget))
2669 return matchAddressBase(
N, AM);
2674bool X86DAGToDAGISel::matchAddressBase(
SDValue N, X86ISelAddressMode &AM) {
2676 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
2678 if (!AM.IndexReg.getNode()) {
2689 AM.BaseType = X86ISelAddressMode::RegBase;
2694bool X86DAGToDAGISel::matchVectorAddressRecursively(
SDValue N,
2695 X86ISelAddressMode &AM,
2699 dbgs() <<
"MatchVectorAddress: ";
2704 return matchAddressBase(
N, AM);
2707 switch (
N.getOpcode()) {
2709 uint64_t Val = cast<ConstantSDNode>(
N)->getSExtValue();
2710 if (!foldOffsetIntoAddress(Val, AM))
2715 if (!matchWrapper(
N, AM))
2723 X86ISelAddressMode Backup = AM;
2724 if (!matchVectorAddressRecursively(
N.getOperand(0), AM,
Depth + 1) &&
2725 !matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
2731 if (!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
2733 !matchVectorAddressRecursively(Handle.getValue().getOperand(0), AM,
2738 N = Handle.getValue();
2743 return matchAddressBase(
N, AM);
2749bool X86DAGToDAGISel::matchVectorAddress(
SDValue N, X86ISelAddressMode &AM) {
2750 return matchVectorAddressRecursively(
N, AM, 0);
2758 X86ISelAddressMode AM;
2759 AM.Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
2764 AM.IndexReg = matchIndexRecursively(IndexOp, AM, 0);
2766 AM.IndexReg = IndexOp;
2770 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2772 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2774 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2780 if (matchVectorAddress(BasePtr, AM))
2783 getAddressOperands(AM,
DL, VT,
Base, Scale,
Index, Disp, Segment);
2797 X86ISelAddressMode AM;
2809 unsigned AddrSpace =
2810 cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2812 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2814 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2816 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2821 MVT VT =
N.getSimpleValueType();
2823 if (matchAddress(
N, AM))
2826 getAddressOperands(AM,
DL, VT,
Base, Scale,
Index, Disp, Segment);
2836 N =
N.getOperand(0);
2847 std::optional<ConstantRange> CR =
2848 cast<GlobalAddressSDNode>(
N)->getGlobal()->getAbsoluteSymbolRange();
2852 return CR->getUnsignedMax().ult(1ull << 32);
2861 if (!selectLEAAddr(
N,
Base, Scale,
Index, Disp, Segment))
2864 auto *
RN = dyn_cast<RegisterSDNode>(
Base);
2865 if (RN &&
RN->getReg() == 0)
2866 Base = CurDAG->getRegister(0, MVT::i64);
2867 else if (
Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(
Base)) {
2871 Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit,
DL, MVT::i64, ImplDef,
2875 RN = dyn_cast<RegisterSDNode>(
Index);
2876 if (RN &&
RN->getReg() == 0)
2877 Index = CurDAG->getRegister(0, MVT::i64);
2880 "Expect to be extending 32-bit registers for use in LEA");
2883 Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit,
DL, MVT::i64, ImplDef,
2892bool X86DAGToDAGISel::selectLEAAddr(
SDValue N,
2896 X86ISelAddressMode AM;
2900 MVT VT =
N.getSimpleValueType();
2905 SDValue T = CurDAG->getRegister(0, MVT::i32);
2907 if (matchAddress(
N, AM))
2912 unsigned Complexity = 0;
2913 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
2915 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2918 if (AM.IndexReg.getNode())
2931 if (AM.hasSymbolicDisplacement()) {
2933 if (Subtarget->is64Bit())
2943 auto isMathWithFlags = [](
SDValue V) {
2944 switch (
V.getOpcode()) {
2965 if (isMathWithFlags(
N.getOperand(0)) || isMathWithFlags(
N.getOperand(1)))
2973 if (Complexity <= 2)
2976 getAddressOperands(AM,
DL, VT,
Base, Scale,
Index, Disp, Segment);
2985 auto *GA = cast<GlobalAddressSDNode>(
N);
2987 X86ISelAddressMode AM;
2988 AM.GV = GA->getGlobal();
2989 AM.Disp += GA->getOffset();
2990 AM.SymbolFlags = GA->getTargetFlags();
2992 if (Subtarget->is32Bit()) {
2994 AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
2997 MVT VT =
N.getSimpleValueType();
2998 getAddressOperands(AM,
SDLoc(
N), VT,
Base, Scale,
Index, Disp, Segment);
3006 EVT VT =
N.getValueType();
3007 bool WasTruncated =
false;
3009 WasTruncated =
true;
3010 N =
N.getOperand(0);
3019 unsigned Opc =
N.getOperand(0)->getOpcode();
3021 Op =
N.getOperand(0);
3024 return !WasTruncated;
3028 auto *GA = cast<GlobalAddressSDNode>(
N.getOperand(0));
3029 std::optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
3030 if (!CR || CR->getUnsignedMax().uge(1ull << VT.
getSizeInBits()))
3034 Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(),
SDLoc(
N), VT,
3035 GA->getOffset(), GA->getTargetFlags());
3043 assert(Root &&
P &&
"Unknown root/parent nodes");
3045 !IsProfitableToFold(
N,
P, Root) ||
3046 !IsLegalToFold(
N,
P, Root, OptLevel))
3049 return selectAddr(
N.getNode(),
3050 N.getOperand(1),
Base, Scale,
Index, Disp, Segment);
3057 assert(Root &&
P &&
"Unknown root/parent nodes");
3059 !IsProfitableToFold(
N,
P, Root) ||
3060 !IsLegalToFold(
N,
P, Root, OptLevel))
3063 return selectAddr(
N.getNode(),
3064 N.getOperand(1),
Base, Scale,
Index, Disp, Segment);
3070SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
3071 unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
3073 return CurDAG->getRegister(GlobalBaseReg, TLI->
getPointerTy(
DL)).getNode();
3076bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(
unsigned Width,
SDNode *
N)
const {
3078 N =
N->getOperand(0).getNode();
3082 auto *GA = dyn_cast<GlobalAddressSDNode>(
N->getOperand(0));
3086 std::optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
3090 return CR->getSignedMin().sge(-1ull << Width) &&
3091 CR->getSignedMax().slt(1ull << Width);
3095 assert(
N->isMachineOpcode() &&
"Unexpected node");
3096 unsigned Opc =
N->getMachineOpcode();
3097 const MCInstrDesc &MCID = getInstrInfo()->get(Opc);
3102 return static_cast<X86::CondCode>(
N->getConstantOperandVal(CondNo));
3107bool X86DAGToDAGISel::onlyUsesZeroFlag(
SDValue Flags)
const {
3112 if (UI.getUse().getResNo() !=
Flags.getResNo())
3116 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3120 FlagUE = UI->
use_end(); FlagUI != FlagUE; ++FlagUI) {
3122 if (FlagUI.getUse().getResNo() != 1)
continue;
3124 if (!FlagUI->isMachineOpcode())
return false;
3143bool X86DAGToDAGISel::hasNoSignFlagUses(
SDValue Flags)
const {
3148 if (UI.getUse().getResNo() !=
Flags.getResNo())
3152 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3156 FlagUE = UI->
use_end(); FlagUI != FlagUE; ++FlagUI) {
3158 if (FlagUI.getUse().getResNo() != 1)
continue;
3160 if (!FlagUI->isMachineOpcode())
return false;
3199 bool X86DAGToDAGISel::hasNoCarryFlagUses(
SDValue Flags)
const {
3204 if (UI.getUse().getResNo() !=
Flags.getResNo())
3207 unsigned UIOpc = UI->getOpcode();
3211 if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3215 FlagUI != FlagUE; ++FlagUI) {
3217 if (FlagUI.getUse().getResNo() != 1)
3220 if (!FlagUI->isMachineOpcode())
3261 if (StoredVal.
getResNo() != 0)
return false;
3275 LoadNode = cast<LoadSDNode>(Load);
3278 if (!Load.hasOneUse())
3286 bool FoundLoad =
false;
3290 const unsigned int Max = 1024;
3332 if (Chain == Load.getValue(1)) {
3338 if (
Op == Load.getValue(1)) {
3354 if (
Op.getNode() != LoadNode)
3387bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(
SDNode *
Node) {
3388 auto *StoreNode = cast<StoreSDNode>(
Node);
3395 EVT MemVT = StoreNode->getMemoryVT();
3396 if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
3400 bool IsCommutable =
false;
3401 bool IsNegate =
false;
3415 IsCommutable =
true;
3419 unsigned LoadOpNo = IsNegate ? 1 : 0;
3423 LoadNode, InputChain)) {
3430 LoadNode, InputChain))
3439 auto SelectOpcode = [&](
unsigned Opc64,
unsigned Opc32,
unsigned Opc16,
3460 unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
3470 if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) {
3474 if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.
getValue(1))) {
3477 ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
3478 : SelectOpcode(
X86::DEC64m,
X86::DEC32m,
X86::DEC16m,
X86::DEC8m);
3491 auto SelectRegOpcode = [SelectOpcode](
unsigned Opc) {
3494 return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
3497 return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
3500 return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
3503 return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
3506 return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
3509 return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
3511 return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
3517 auto SelectImmOpcode = [SelectOpcode](
unsigned Opc) {
3520 return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
3523 return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
3526 return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
3529 return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
3532 return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
3535 return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
3538 return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
3545 unsigned NewOpc = SelectRegOpcode(Opc);
3550 if (
auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
3551 int64_t OperandV = OperandC->getSExtValue();
3557 ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
3558 (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
3559 isInt<32>(-OperandV))) &&
3560 hasNoCarryFlagUses(StoredVal.
getValue(1))) {
3561 OperandV = -OperandV;
3565 if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
3566 Operand = CurDAG->getTargetConstant(OperandV,
SDLoc(
Node), MemVT);
3567 NewOpc = SelectImmOpcode(Opc);
3573 CurDAG->getCopyToReg(InputChain,
SDLoc(
Node), X86::EFLAGS,
3577 Segment, Operand, CopyTo, CopyTo.
getValue(1)};
3578 Result = CurDAG->getMachineNode(NewOpc,
SDLoc(
Node), MVT::i32, MVT::Other,
3582 Segment, Operand, InputChain};
3583 Result = CurDAG->getMachineNode(NewOpc,
SDLoc(
Node), MVT::i32, MVT::Other,
3594 CurDAG->setNodeMemRefs(Result, MemOps);
3600 CurDAG->RemoveDeadNode(
Node);
3611bool X86DAGToDAGISel::matchBitExtract(
SDNode *
Node) {
3615 "Should be either an and-mask, or right-shift after clearing high bits.");
3618 if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
3621 MVT NVT =
Node->getSimpleValueType(0);
3624 if (NVT != MVT::i32 && NVT != MVT::i64)
3632 const bool AllowExtraUsesByDefault = Subtarget->hasBMI2();
3633 auto checkUses = [AllowExtraUsesByDefault](
3635 std::optional<bool> AllowExtraUses) {
3636 return AllowExtraUses.value_or(AllowExtraUsesByDefault) ||
3637 Op.getNode()->hasNUsesOfValue(NUses,
Op.getResNo());
3639 auto checkOneUse = [checkUses](
SDValue Op,
3640 std::optional<bool> AllowExtraUses =
3642 return checkUses(
Op, 1, AllowExtraUses);
3644 auto checkTwoUse = [checkUses](
SDValue Op,
3645 std::optional<bool> AllowExtraUses =
3647 return checkUses(
Op, 2, AllowExtraUses);
3650 auto peekThroughOneUseTruncation = [checkOneUse](
SDValue V) {
3652 assert(
V.getSimpleValueType() == MVT::i32 &&
3653 V.getOperand(0).getSimpleValueType() == MVT::i64 &&
3654 "Expected i64 -> i32 truncation");
3655 V =
V.getOperand(0);
3661 auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, &NBits,
3664 if (
Mask->getOpcode() !=
ISD::ADD || !checkOneUse(Mask))
3670 SDValue M0 = peekThroughOneUseTruncation(
Mask->getOperand(0));
3675 NBits =
M0->getOperand(1);
3676 NegateNBits =
false;
3680 auto isAllOnes = [
this, peekThroughOneUseTruncation, NVT](
SDValue V) {
3681 V = peekThroughOneUseTruncation(V);
3682 return CurDAG->MaskedValueIsAllOnes(
3688 auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
3691 if (
Mask.getOpcode() !=
ISD::XOR || !checkOneUse(Mask))
3694 if (!isAllOnes(
Mask->getOperand(1)))
3697 SDValue M0 = peekThroughOneUseTruncation(
Mask->getOperand(0));
3701 if (!isAllOnes(
M0->getOperand(0)))
3703 NBits =
M0->getOperand(1);
3704 NegateNBits =
false;
3710 auto canonicalizeShiftAmt = [&NBits, &NegateNBits](
SDValue ShiftAmt,
3711 unsigned Bitwidth) {
3716 NBits = NBits.getOperand(0);
3721 auto *V0 = dyn_cast<ConstantSDNode>(NBits.getOperand(0));
3722 if (!V0 || V0->getZExtValue() != Bitwidth)
3724 NBits = NBits.getOperand(1);
3725 NegateNBits =
false;
3731 auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, &NegateNBits,
3734 Mask = peekThroughOneUseTruncation(Mask);
3735 unsigned Bitwidth =
Mask.getSimpleValueType().getSizeInBits();
3737 if (
Mask.getOpcode() !=
ISD::SRL || !checkOneUse(Mask))
3744 if (!checkOneUse(
M1))
3746 canonicalizeShiftAmt(
M1, Bitwidth);
3751 return !NegateNBits;
3759 auto matchPatternD = [checkOneUse, checkTwoUse, canonicalizeShiftAmt,
3760 AllowExtraUsesByDefault, &NegateNBits,
3773 canonicalizeShiftAmt(N1, Bitwidth);
3777 const bool AllowExtraUses = AllowExtraUsesByDefault && !NegateNBits;
3778 if (!checkOneUse(N0, AllowExtraUses) || !checkTwoUse(N1, AllowExtraUses))
3784 auto matchLowBitMask = [matchPatternA, matchPatternB,
3786 return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
3790 X =
Node->getOperand(0);
3793 if (matchLowBitMask(Mask)) {
3797 if (!matchLowBitMask(Mask))
3801 X = CurDAG->getAllOnesConstant(
SDLoc(
Node), NVT);
3802 }
else if (!matchPatternD(
Node))
3807 if (NegateNBits && !Subtarget->hasBMI2())
3819 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, MVT::i32), 0);
3822 SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit,
DL, MVT::i32);
3824 NBits =
SDValue(CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG,
DL,
3825 MVT::i32, ImplDef, NBits, SRIdxVal),
3835 NBits = CurDAG->getNode(
ISD::SUB,
DL, MVT::i32, BitWidthC, NBits);
3839 if (Subtarget->hasBMI2()) {
3841 if (NVT != MVT::i32) {
3849 SelectCode(Extract.
getNode());
3858 SDValue RealX = peekThroughOneUseTruncation(
X);
3864 MVT XVT =
X.getSimpleValueType();
3874 SDValue C8 = CurDAG->getConstant(8,
DL, MVT::i8);
3882 SDValue ShiftAmt =
X.getOperand(1);
3883 X =
X.getOperand(0);
3886 "Expected shift amount to be i8");
3890 SDValue OrigShiftAmt = ShiftAmt;
3895 Control = CurDAG->getNode(
ISD::OR,
DL, MVT::i32, Control, ShiftAmt);
3900 if (XVT != MVT::i32) {
3915 SelectCode(Extract.
getNode());
3922 MVT NVT =
Node->getSimpleValueType(0);
3935 Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR());
3936 if (!PreferBEXTR && !Subtarget->hasBMI2())
3948 if (NVT != MVT::i32 && NVT != MVT::i64)
3952 auto *MaskCst = dyn_cast<ConstantSDNode>(N1);
3953 auto *ShiftCst = dyn_cast<ConstantSDNode>(N0->
getOperand(1));
3954 if (!MaskCst || !ShiftCst)
3962 uint64_t Shift = ShiftCst->getZExtValue();
3967 if (Shift == 8 && MaskSize == 8)
3978 if (!PreferBEXTR && MaskSize <= 32)
3982 unsigned ROpc, MOpc;
3985 assert(Subtarget->hasBMI2() &&
"We must have BMI2's BZHI then.");
3989 Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
3990 ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr;
3991 MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm;
3992 unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3993 Control =
SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
3999 Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
4000 if (Subtarget->hasTBM()) {
4001 ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
4002 MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
4004 assert(Subtarget->hasBMI() &&
"We must have BMI1's BEXTR then.");
4006 ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
4007 MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
4008 unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
4009 Control =
SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
4015 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4016 if (tryFoldLoad(
Node, N0.
getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4018 Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.
getOperand(0)};
4019 SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4020 NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4024 CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
4026 NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control);
4031 SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT);
4032 unsigned NewOpc = NVT == MVT::i64 ? X86::SHR64ri : X86::SHR32ri;
4034 CurDAG->getMachineNode(NewOpc, dl, NVT,
SDValue(NewNode, 0), ShAmt);
4041MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(
unsigned ROpc,
unsigned MOpc,
4042 bool MayFoldLoad,
const SDLoc &dl,
4047 auto *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
4048 Imm = CurDAG->getTargetConstant(*Val,
SDLoc(
Node),
Imm.getValueType());
4051 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4052 if (MayFoldLoad && tryFoldLoad(
Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4053 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
Imm,
4055 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
4056 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4060 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4065 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
4066 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
4073MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(
unsigned ROpc,
unsigned MOpc,
4074 bool MayFoldLoad,
const SDLoc &dl,
4080 auto *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
4081 Imm = CurDAG->getTargetConstant(*Val,
SDLoc(
Node),
Imm.getValueType());
4084 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4085 if (MayFoldLoad && tryFoldLoad(
Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4086 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
Imm,
4088 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
4089 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4094 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
4099 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
4100 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
4105bool X86DAGToDAGISel::tryShiftAmountMod(
SDNode *
N) {
4106 EVT VT =
N->getValueType(0);
4113 unsigned Size = VT == MVT::i64 ? 64 : 32;
4115 SDValue OrigShiftAmt =
N->getOperand(1);
4116 SDValue ShiftAmt = OrigShiftAmt;
4131 auto *Add0C = dyn_cast<ConstantSDNode>(Add0);
4132 auto *Add1C = dyn_cast<ConstantSDNode>(Add1);
4135 if (Add1C && Add1C->getAPIntValue().urem(
Size) == 0) {
4139 ((Add0C && Add0C->getAPIntValue().urem(
Size) ==
Size - 1) ||
4140 (Add1C && Add1C->getAPIntValue().urem(
Size) ==
Size - 1))) {
4144 assert(Add0C ==
nullptr || Add1C ==
nullptr);
4153 NewShiftAmt = CurDAG->getNode(
ISD::XOR,
DL, OpVT,
4154 Add0C ==
nullptr ? Add0 : Add1,
AllOnes);
4160 Add0C->getZExtValue() != 0) {
4163 if (Add0C->getZExtValue() %
Size == 0)
4166 Add0C->getZExtValue() % 32 == 0) {
4174 Add0 = CurDAG->getZExtOrTrunc(Add0,
DL, SubVT);
4178 X = CurDAG->getNode(
ISD::ADD,
DL, SubVT, Add1, Add0);
4200 NewShiftAmt = CurDAG->getNode(
ISD::TRUNCATE,
DL, MVT::i8, NewShiftAmt);
4207 NewShiftAmt = CurDAG->getNode(
ISD::AND,
DL, MVT::i8, NewShiftAmt,
4208 CurDAG->getConstant(
Size - 1,
DL, MVT::i8));
4212 SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(
N,
N->getOperand(0),
4214 if (UpdatedNode !=
N) {
4217 ReplaceNode(
N, UpdatedNode);
4224 CurDAG->RemoveDeadNode(OrigShiftAmt.
getNode());
4232bool X86DAGToDAGISel::tryShrinkShlLogicImm(
SDNode *
N) {
4233 MVT NVT =
N->getSimpleValueType(0);
4234 unsigned Opcode =
N->getOpcode();
4242 auto *Cst = dyn_cast<ConstantSDNode>(N1);
4246 int64_t Val = Cst->getSExtValue();
4251 bool FoundAnyExtend =
false;
4255 FoundAnyExtend =
true;
4263 if (NVT != MVT::i32 && NVT != MVT::i64)
4266 auto *ShlCst = dyn_cast<ConstantSDNode>(Shift.
getOperand(1));
4270 uint64_t ShAmt = ShlCst->getZExtValue();
4274 uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
4275 if (Opcode !=
ISD::AND && (Val & RemovedBitsMask) != 0)
4280 auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
4284 ShiftedVal = (
uint64_t)Val >> ShAmt;
4285 if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4288 if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
4291 ShiftedVal = Val >> ShAmt;
4292 if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
4293 (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
4297 ShiftedVal = (
uint64_t)Val >> ShAmt;
4298 if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4305 if (!CanShrinkImmediate(ShiftedVal))
4315 unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
4321 NeededMask &= ~Cst->getAPIntValue();
4323 if (CurDAG->MaskedValueIsZero(
N->getOperand(0), NeededMask))
4328 if (FoundAnyExtend) {
4334 SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
4336 SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT,
X, NewCst);
4345bool X86DAGToDAGISel::matchVPTERNLOG(
SDNode *Root,
SDNode *ParentA,
4349 assert(
A.isOperandOf(ParentA) &&
B.isOperandOf(ParentB) &&
4350 C.isOperandOf(ParentC) &&
"Incorrect parent node");
4352 auto tryFoldLoadOrBCast =
4355 if (tryFoldLoad(Root,
P, L,
Base, Scale,
Index, Disp, Segment))
4361 L =
L.getOperand(0);
4368 auto *MemIntr = cast<MemIntrinsicSDNode>(L);
4369 unsigned Size = MemIntr->getMemoryVT().getSizeInBits();
4373 return tryFoldBroadcast(Root,
P, L,
Base, Scale,
Index, Disp, Segment);
4376 bool FoldedLoad =
false;
4377 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4378 if (tryFoldLoadOrBCast(Root, ParentC,
C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4380 }
else if (tryFoldLoadOrBCast(Root, ParentA,
A, Tmp0, Tmp1, Tmp2, Tmp3,
4385 uint8_t OldImm =
Imm;
4386 Imm = OldImm & 0xa5;
4387 if (OldImm & 0x02)
Imm |= 0x10;
4388 if (OldImm & 0x10)
Imm |= 0x02;
4389 if (OldImm & 0x08)
Imm |= 0x40;
4390 if (OldImm & 0x40)
Imm |= 0x08;
4391 }
else if (tryFoldLoadOrBCast(Root, ParentB,
B, Tmp0, Tmp1, Tmp2, Tmp3,
4396 uint8_t OldImm =
Imm;
4397 Imm = OldImm & 0x99;
4398 if (OldImm & 0x02)
Imm |= 0x04;
4399 if (OldImm & 0x04)
Imm |= 0x02;
4400 if (OldImm & 0x20)
Imm |= 0x40;
4401 if (OldImm & 0x40)
Imm |= 0x20;
4406 SDValue TImm = CurDAG->getTargetConstant(Imm,
DL, MVT::i8);
4412 SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
4416 auto *MemIntr = cast<MemIntrinsicSDNode>(
C);
4417 unsigned EltSize = MemIntr->getMemoryVT().getSizeInBits();
4418 assert((EltSize == 32 || EltSize == 64) &&
"Unexpected broadcast size!");
4420 bool UseD = EltSize == 32;
4422 Opc = UseD ? X86::VPTERNLOGDZ128rmbi : X86::VPTERNLOGQZ128rmbi;
4424 Opc = UseD ? X86::VPTERNLOGDZ256rmbi : X86::VPTERNLOGQZ256rmbi;
4426 Opc = UseD ? X86::VPTERNLOGDZrmbi : X86::VPTERNLOGQZrmbi;
4432 Opc = UseD ? X86::VPTERNLOGDZ128rmi : X86::VPTERNLOGQZ128rmi;
4434 Opc = UseD ? X86::VPTERNLOGDZ256rmi : X86::VPTERNLOGQZ256rmi;
4436 Opc = UseD ? X86::VPTERNLOGDZrmi : X86::VPTERNLOGQZrmi;
4441 SDValue Ops[] = {
A,
B, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, TImm,
C.getOperand(0)};
4442 MNode = CurDAG->getMachineNode(Opc,
DL, VTs, Ops);
4445 ReplaceUses(
C.getValue(1),
SDValue(MNode, 1));
4447 CurDAG->setNodeMemRefs(MNode, {cast<MemSDNode>(
C)->getMemOperand()});
4452 Opc = UseD ? X86::VPTERNLOGDZ128rri : X86::VPTERNLOGQZ128rri;
4454 Opc = UseD ? X86::VPTERNLOGDZ256rri : X86::VPTERNLOGQZ256rri;
4456 Opc = UseD ? X86::VPTERNLOGDZrri : X86::VPTERNLOGQZrri;
4460 MNode = CurDAG->getMachineNode(Opc,
DL, NVT, {
A,
B,
C, TImm});
4464 CurDAG->RemoveDeadNode(Root);
4470bool X86DAGToDAGISel::tryVPTERNLOG(
SDNode *
N) {
4471 MVT NVT =
N->getSimpleValueType(0);
4474 if (!NVT.
isVector() || !Subtarget->hasAVX512() ||
4485 auto getFoldableLogicOp = [](
SDValue Op) {
4488 Op =
Op.getOperand(0);
4490 if (!
Op.hasOneUse())
4493 unsigned Opc =
Op.getOpcode();
4502 if ((FoldableOp = getFoldableLogicOp(N1))) {
4504 }
else if ((FoldableOp = getFoldableLogicOp(N0))) {
4517 uint8_t TernlogMagicA = 0xf0;
4518 uint8_t TernlogMagicB = 0xcc;
4519 uint8_t TernlogMagicC = 0xaa;
4528 Parent =
Op.getNode();
4529 Op =
Op.getOperand(0);
4533 PeekThroughNot(
A, ParentA, TernlogMagicA);
4534 PeekThroughNot(
B, ParentB, TernlogMagicB);
4535 PeekThroughNot(
C, ParentC, TernlogMagicC);
4540 case ISD::AND:
Imm = TernlogMagicB & TernlogMagicC;
break;
4541 case ISD::OR:
Imm = TernlogMagicB | TernlogMagicC;
break;
4542 case ISD::XOR:
Imm = TernlogMagicB ^ TernlogMagicC;
break;
4546 switch (
N->getOpcode()) {
4550 Imm &= ~TernlogMagicA;
4552 Imm = ~(
Imm) & TernlogMagicA;
4559 return matchVPTERNLOG(
N, ParentA, ParentB, ParentC,
A,
B,
C, Imm);
4569bool X86DAGToDAGISel::shrinkAndImmediate(
SDNode *
And) {
4572 MVT VT =
And->getSimpleValueType(0);
4573 if (VT != MVT::i32 && VT != MVT::i64)
4576 auto *And1C = dyn_cast<ConstantSDNode>(
And->getOperand(1));
4585 APInt MaskVal = And1C->getAPIntValue();
4587 if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
4591 if (VT == MVT::i64 && MaskLZ >= 32) {
4593 MaskVal = MaskVal.
trunc(32);
4598 APInt NegMaskVal = MaskVal | HighZeros;
4607 if (VT == MVT::i64 && MaskVal.
getBitWidth() < 64) {
4608 NegMaskVal = NegMaskVal.
zext(64);
4609 HighZeros = HighZeros.
zext(64);
4614 if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
4634 bool FoldedBCast,
bool Masked) {
4635#define VPTESTM_CASE(VT, SUFFIX) \
4638 return IsTestN ? X86::VPTESTNM##SUFFIX##k: X86::VPTESTM##SUFFIX##k; \
4639 return IsTestN ? X86::VPTESTNM##SUFFIX : X86::VPTESTM##SUFFIX;
4642#define VPTESTM_BROADCAST_CASES(SUFFIX) \
4643default: llvm_unreachable("Unexpected VT!"); \
4644VPTESTM_CASE(v4i32, DZ128##SUFFIX) \
4645VPTESTM_CASE(v2i64, QZ128##SUFFIX) \
4646VPTESTM_CASE(v8i32, DZ256##SUFFIX) \
4647VPTESTM_CASE(v4i64, QZ256##SUFFIX) \
4648VPTESTM_CASE(v16i32, DZ##SUFFIX) \
4649VPTESTM_CASE(v8i64, QZ##SUFFIX)
4651#define VPTESTM_FULL_CASES(SUFFIX) \
4652VPTESTM_BROADCAST_CASES(SUFFIX) \
4653VPTESTM_CASE(v16i8, BZ128##SUFFIX) \
4654VPTESTM_CASE(v8i16, WZ128##SUFFIX) \
4655VPTESTM_CASE(v32i8, BZ256##SUFFIX) \
4656VPTESTM_CASE(v16i16, WZ256##SUFFIX) \
4657VPTESTM_CASE(v64i8, BZ##SUFFIX) \
4658VPTESTM_CASE(v32i16, WZ##SUFFIX)
4676#undef VPTESTM_FULL_CASES
4677#undef VPTESTM_BROADCAST_CASES
4683bool X86DAGToDAGISel::tryVPTESTM(
SDNode *Root,
SDValue Setcc,
4685 assert(Subtarget->hasAVX512() &&
"Expected AVX512!");
4735 if (tryFoldLoad(Root,
P, L,
Base, Scale,
Index, Disp, Segment))
4740 if (CmpSVT != MVT::i32 && CmpSVT != MVT::i64)
4746 L =
L.getOperand(0);
4752 auto *MemIntr = cast<MemIntrinsicSDNode>(L);
4753 if (MemIntr->getMemoryVT().getSizeInBits() != CmpSVT.
getSizeInBits())
4756 return tryFoldBroadcast(Root,
P, L,
Base, Scale,
Index, Disp, Segment);
4760 bool CanFoldLoads = Src0 != Src1;
4762 bool FoldedLoad =
false;
4763 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4765 FoldedLoad = tryFoldLoadOrBCast(Root, N0.
getNode(), Src1, Tmp0, Tmp1, Tmp2,
4769 FoldedLoad = tryFoldLoadOrBCast(Root, N0.
getNode(), Src0, Tmp0, Tmp1,
4778 bool IsMasked = InMask.
getNode() !=
nullptr;
4791 SDValue ImplDef =
SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
4793 Src0 = CurDAG->getTargetInsertSubreg(
SubReg, dl, CmpVT, ImplDef, Src0);
4796 Src1 = CurDAG->getTargetInsertSubreg(
SubReg, dl, CmpVT, ImplDef, Src1);
4801 SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4802 InMask =
SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4803 dl, MaskVT, InMask, RC), 0);
4808 unsigned Opc =
getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
4813 SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
4816 SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4818 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4820 SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4822 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4828 CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Src1)->getMemOperand()});
4831 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
4833 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
4839 SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4840 CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4841 dl, ResVT,
SDValue(CNode, 0), RC);
4845 CurDAG->RemoveDeadNode(Root);
4851bool X86DAGToDAGISel::tryMatchBitSelect(
SDNode *
N) {
4854 MVT NVT =
N->getSimpleValueType(0);
4857 if (!NVT.
isVector() || !Subtarget->hasAVX512())