84#define DEBUG_TYPE "dagcombine"
86STATISTIC(NodesCombined ,
"Number of dag nodes combined");
87STATISTIC(PreIndexedNodes ,
"Number of pre-indexed nodes created");
88STATISTIC(PostIndexedNodes,
"Number of post-indexed nodes created");
89STATISTIC(OpsNarrowed ,
"Number of load/op/store narrowed");
90STATISTIC(LdStFP2Int ,
"Number of fp load/store pairs transformed to int");
92STATISTIC(NumFPLogicOpsConv,
"Number of logic ops converted to fp ops");
95 "Controls whether a DAG combine is performed for a node");
99 cl::desc(
"Enable DAG combiner's use of IR alias analysis"));
103 cl::desc(
"Enable DAG combiner's use of TBAA"));
108 cl::desc(
"Only use DAG-combiner alias analysis in this"
116 cl::desc(
"Bypass the profitability model of load slicing"),
121 cl::desc(
"DAG combiner may split indexing from loads"));
125 cl::desc(
"DAG combiner enable merging multiple stores "
126 "into a wider store"));
130 cl::desc(
"Limit the number of operands to inline for Token Factors"));
134 cl::desc(
"Limit the number of times for the same StoreNode and RootNode "
135 "to bail out in store merging dependence check"));
139 cl::desc(
"DAG combiner enable reducing the width of load/op/store "
142 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
144 cl::desc(
"DAG combiner force override the narrowing profitable check when "
145 "reducing the width of load/op/store sequences"));
149 cl::desc(
"DAG combiner enable load/<replace bytes>/store with "
150 "a narrower store"));
155 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
164 bool LegalDAG =
false;
165 bool LegalOperations =
false;
166 bool LegalTypes =
false;
168 bool DisableGenericCombines;
204 void AddUsersToWorklist(
SDNode *
N) {
210 void AddToWorklistWithUsers(
SDNode *
N) {
211 AddUsersToWorklist(
N);
218 void clearAddedDanglingWorklistEntries() {
220 while (!PruningList.
empty()) {
223 recursivelyDeleteUnusedNodes(
N);
227 SDNode *getNextWorklistEntry() {
229 clearAddedDanglingWorklistEntries();
233 while (!
N && !Worklist.
empty()) {
238 assert(
N->getCombinerWorklistIndex() >= 0 &&
239 "Found a worklist entry without a corresponding map entry!");
241 N->setCombinerWorklistIndex(-2);
251 : DAG(
D), TLI(
D.getTargetLoweringInfo()),
252 STI(
D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
257 MaximumLegalStoreInBits = 0;
263 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
264 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
267 void ConsiderForPruning(
SDNode *
N) {
274 void AddToWorklist(
SDNode *
N,
bool IsCandidateForPruning =
true,
275 bool SkipIfCombinedBefore =
false) {
277 "Deleted Node added to Worklist");
284 if (SkipIfCombinedBefore &&
N->getCombinerWorklistIndex() == -2)
287 if (IsCandidateForPruning)
288 ConsiderForPruning(
N);
290 if (
N->getCombinerWorklistIndex() < 0) {
291 N->setCombinerWorklistIndex(Worklist.
size());
297 void removeFromWorklist(
SDNode *
N) {
299 StoreRootCountMap.
erase(
N);
301 int WorklistIndex =
N->getCombinerWorklistIndex();
305 if (WorklistIndex < 0)
309 Worklist[WorklistIndex] =
nullptr;
310 N->setCombinerWorklistIndex(-1);
313 void deleteAndRecombine(
SDNode *
N);
314 bool recursivelyDeleteUnusedNodes(
SDNode *
N);
322 return CombineTo(
N, &Res, 1, AddTo);
329 return CombineTo(
N, To, 2, AddTo);
335 unsigned MaximumLegalStoreInBits;
341 unsigned BitWidth =
Op.getScalarValueSizeInBits();
347 EVT VT =
Op.getValueType();
351 return SimplifyDemandedBits(
Op,
DemandedBits, DemandedElts,
false);
357 bool SimplifyDemandedVectorElts(
SDValue Op) {
359 if (
Op.getValueType().isScalableVector())
362 unsigned NumElts =
Op.getValueType().getVectorNumElements();
364 return SimplifyDemandedVectorElts(
Op, DemandedElts);
368 const APInt &DemandedElts,
369 bool AssumeSingleUse =
false);
370 bool SimplifyDemandedVectorElts(
SDValue Op,
const APInt &DemandedElts,
371 bool AssumeSingleUse =
false);
373 bool CombineToPreIndexedLoadStore(
SDNode *
N);
374 bool CombineToPostIndexedLoadStore(
SDNode *
N);
399 void ReplaceLoadWithPromotedLoad(
SDNode *Load,
SDNode *ExtLoad);
540 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
SDNode *
N);
572 template <
class MatchContextClass>
574 template <
class MatchContextClass>
579 bool reassociationCanBreakAddressingModePattern(
unsigned Opc,
588 SDValue reassociateReduction(
unsigned RedOpc,
unsigned Opc,
const SDLoc &
DL,
602 bool NotExtCompare =
false);
603 SDValue convertSelectOfFPConstantsToLoadOffset(
620 const SDLoc &
DL,
bool foldBooleans);
624 SDValue &
CC,
bool MatchStrict =
false)
const;
625 bool isOneUseSetCC(
SDValue N)
const;
650 bool KnownNeverZero =
false,
651 bool InexpensiveOnly =
false,
652 std::optional<EVT> OutVT = std::nullopt);
662 bool DemandHighBits =
true);
666 unsigned PosOpcode,
unsigned NegOpcode,
670 unsigned PosOpcode,
unsigned NegOpcode,
686 SDValue VecIn2,
unsigned LeftIdx,
721 int64_t OffsetFromBase;
724 : MemNode(
N), OffsetFromBase(
Offset) {}
729 StoreSource getStoreSource(
SDValue StoreVal) {
733 return StoreSource::Constant;
737 return StoreSource::Constant;
738 return StoreSource::Unknown;
741 return StoreSource::Extract;
743 return StoreSource::Load;
745 return StoreSource::Unknown;
753 bool isMulAddWithConstProfitable(
SDNode *MulNode,
SDValue AddNode,
760 EVT LoadResultTy,
EVT &ExtVT);
765 EVT &MemVT,
unsigned ShAmt = 0);
773 bool BackwardsPropagateMask(
SDNode *
N);
790 EVT MemVT,
unsigned NumStores,
791 bool IsConstantSrc,
bool UseVector,
804 bool checkMergeStoreCandidatesForDependencies(
812 int64_t ElementSizeBytes)
const;
817 unsigned NumConsecutiveStores,
818 EVT MemVT,
SDNode *Root,
bool AllowVectors);
825 unsigned NumConsecutiveStores,
EVT MemVT,
831 unsigned NumConsecutiveStores,
EVT MemVT,
832 SDNode *Root,
bool AllowVectors,
833 bool IsNonTemporalStore,
bool IsNonTemporalLoad);
852 bool hasOperation(
unsigned Opcode,
EVT VT) {
863 EVT getShiftAmountTy(
EVT LHSTy) {
869 bool isTypeLegal(
const EVT &VT) {
870 if (!LegalTypes)
return true;
875 EVT getSetCCResultType(
EVT VT)
const {
890 explicit WorklistRemover(DAGCombiner &dc)
891 :
SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
894 DC.removeFromWorklist(
N);
902 explicit WorklistInserter(DAGCombiner &dc)
903 :
SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
917 ((DAGCombiner*)
DC)->AddToWorklist(
N);
922 return ((DAGCombiner*)DC)->CombineTo(
N, &To[0], To.
size(), AddTo);
927 return ((DAGCombiner*)DC)->CombineTo(
N, Res, AddTo);
932 return ((DAGCombiner*)DC)->CombineTo(
N, Res0, Res1, AddTo);
937 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(
N);
942 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
949void DAGCombiner::deleteAndRecombine(
SDNode *
N) {
950 removeFromWorklist(
N);
958 if (
Op->hasOneUse() ||
Op->getNumValues() > 1)
959 AddToWorklist(
Op.getNode());
968 unsigned Bits =
Offset + std::max(
LHS.getBitWidth(),
RHS.getBitWidth());
981 LHS =
N.getOperand(0);
982 RHS =
N.getOperand(1);
983 CC =
N.getOperand(2);
990 LHS =
N.getOperand(1);
991 RHS =
N.getOperand(2);
992 CC =
N.getOperand(3);
1004 LHS =
N.getOperand(0);
1005 RHS =
N.getOperand(1);
1006 CC =
N.getOperand(4);
1013bool DAGCombiner::isOneUseSetCC(
SDValue N)
const {
1015 if (isSetCCEquivalent(
N, N0, N1, N2) &&
N->hasOneUse())
1027 MaskForTy = 0xFFULL;
1030 MaskForTy = 0xFFFFULL;
1033 MaskForTy = 0xFFFFFFFFULL;
1052 return !(Const->isOpaque() && NoOpaques);
1055 unsigned BitWidth =
N.getScalarValueSizeInBits();
1060 if (!Const || Const->getAPIntValue().getBitWidth() !=
BitWidth ||
1061 (Const->isOpaque() && NoOpaques))
1080 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1083bool DAGCombiner::reassociationCanBreakAddressingModePattern(
unsigned Opc,
1116 ScalableOffset = -ScalableOffset;
1118 if (
auto *LoadStore = dyn_cast<MemSDNode>(
Node);
1124 unsigned AS =
LoadStore->getAddressSpace();
1137 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1141 const APInt &C2APIntVal = C2->getAPIntValue();
1145 if (
auto *C1 = dyn_cast<ConstantSDNode>(N0.
getOperand(1))) {
1149 const APInt &C1APIntVal = C1->getAPIntValue();
1150 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1153 const int64_t CombinedValue = CombinedValueIntVal.
getSExtValue();
1156 if (
auto *LoadStore = dyn_cast<MemSDNode>(
Node)) {
1162 AM.
BaseOffs = C2APIntVal.getSExtValue();
1164 unsigned AS =
LoadStore->getAddressSpace();
1176 if (
auto *GA = dyn_cast<GlobalAddressSDNode>(N0.
getOperand(1)))
1189 AM.
BaseOffs = C2APIntVal.getSExtValue();
1191 unsigned AS =
LoadStore->getAddressSpace();
1204SDValue DAGCombiner::reassociateOpsCommutative(
unsigned Opc,
const SDLoc &
DL,
1218 Flags.hasNoUnsignedWrap())
1226 return DAG.
getNode(Opc,
DL, VT, N00, OpNode, NewFlags);
1234 return DAG.
getNode(Opc,
DL, VT, OpNode, N01, NewFlags);
1244 if (N1 == N00 || N1 == N01)
1290 if (CC1 == CC00 && CC1 != CC01) {
1292 return DAG.
getNode(Opc,
DL, VT, OpNode, N01, Flags);
1294 if (CC1 == CC01 && CC1 != CC00) {
1296 return DAG.
getNode(Opc,
DL, VT, OpNode, N00, Flags);
1314 if (!
Flags.hasAllowReassociation() || !
Flags.hasNoSignedZeros())
1317 if (
SDValue Combined = reassociateOpsCommutative(Opc,
DL, N0, N1, Flags))
1319 if (
SDValue Combined = reassociateOpsCommutative(Opc,
DL, N1, N0, Flags))
1327SDValue DAGCombiner::reassociateReduction(
unsigned RedOpc,
unsigned Opc,
1345 assert(
N->getNumValues() == NumTo &&
"Broken CombineTo call!");
1349 dbgs() <<
" and " << NumTo - 1 <<
" other values\n");
1350 for (
unsigned i = 0, e = NumTo; i !=
e; ++i)
1352 N->getValueType(i) == To[i].getValueType()) &&
1353 "Cannot combine value to value of different type!");
1355 WorklistRemover DeadNodes(*
this);
1359 for (
unsigned i = 0, e = NumTo; i !=
e; ++i) {
1361 AddToWorklistWithUsers(To[i].
getNode());
1369 deleteAndRecombine(
N);
1387 recursivelyDeleteUnusedNodes(TLO.
Old.
getNode());
1393 const APInt &DemandedElts,
1394 bool AssumeSingleUse) {
1402 AddToWorklist(
Op.getNode());
1404 CommitTargetLoweringOpt(TLO);
1411bool DAGCombiner::SimplifyDemandedVectorElts(
SDValue Op,
1412 const APInt &DemandedElts,
1413 bool AssumeSingleUse) {
1415 APInt KnownUndef, KnownZero;
1417 TLO, 0, AssumeSingleUse))
1421 AddToWorklist(
Op.getNode());
1423 CommitTargetLoweringOpt(TLO);
1427void DAGCombiner::ReplaceLoadWithPromotedLoad(
SDNode *Load,
SDNode *ExtLoad) {
1429 EVT VT =
Load->getValueType(0);
1438 AddToWorklist(Trunc.
getNode());
1439 recursivelyDeleteUnusedNodes(Load);
1447 EVT MemVT =
LD->getMemoryVT();
1449 :
LD->getExtensionType();
1452 LD->getChain(),
LD->getBasePtr(),
1453 MemVT,
LD->getMemOperand());
1456 unsigned Opc =
Op.getOpcode();
1460 if (
SDValue Op0 = SExtPromoteOperand(
Op.getOperand(0), PVT))
1464 if (
SDValue Op0 = ZExtPromoteOperand(
Op.getOperand(0), PVT))
1482 EVT OldVT =
Op.getValueType();
1484 bool Replace =
false;
1485 SDValue NewOp = PromoteOperand(
Op, PVT, Replace);
1488 AddToWorklist(NewOp.
getNode());
1491 ReplaceLoadWithPromotedLoad(
Op.getNode(), NewOp.
getNode());
1497 EVT OldVT =
Op.getValueType();
1499 bool Replace =
false;
1500 SDValue NewOp = PromoteOperand(
Op, PVT, Replace);
1503 AddToWorklist(NewOp.
getNode());
1506 ReplaceLoadWithPromotedLoad(
Op.getNode(), NewOp.
getNode());
1514 if (!LegalOperations)
1517 EVT VT =
Op.getValueType();
1523 unsigned Opc =
Op.getOpcode();
1531 assert(PVT != VT &&
"Don't know what type to promote to!");
1535 bool Replace0 =
false;
1537 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1539 bool Replace1 =
false;
1541 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1553 Replace1 &= (N0 != N1) && !N1->
hasOneUse();
1556 CombineTo(
Op.getNode(), RV);
1582 if (!LegalOperations)
1585 EVT VT =
Op.getValueType();
1591 unsigned Opc =
Op.getOpcode();
1599 assert(PVT != VT &&
"Don't know what type to promote to!");
1603 bool Replace =
false;
1606 N0 = SExtPromoteOperand(N0, PVT);
1608 N0 = ZExtPromoteOperand(N0, PVT);
1610 N0 = PromoteOperand(N0, PVT, Replace);
1621 ReplaceLoadWithPromotedLoad(
Op.getOperand(0).getNode(), N0.
getNode());
1631 if (!LegalOperations)
1634 EVT VT =
Op.getValueType();
1640 unsigned Opc =
Op.getOpcode();
1648 assert(PVT != VT &&
"Don't know what type to promote to!");
1658bool DAGCombiner::PromoteLoad(
SDValue Op) {
1659 if (!LegalOperations)
1665 EVT VT =
Op.getValueType();
1671 unsigned Opc =
Op.getOpcode();
1679 assert(PVT != VT &&
"Don't know what type to promote to!");
1684 EVT MemVT =
LD->getMemoryVT();
1686 :
LD->getExtensionType();
1688 LD->getChain(),
LD->getBasePtr(),
1689 MemVT,
LD->getMemOperand());
1698 AddToWorklist(
Result.getNode());
1699 recursivelyDeleteUnusedNodes(
N);
1712bool DAGCombiner::recursivelyDeleteUnusedNodes(
SDNode *
N) {
1713 if (!
N->use_empty())
1723 if (
N->use_empty()) {
1724 for (
const SDValue &ChildN :
N->op_values())
1725 Nodes.
insert(ChildN.getNode());
1727 removeFromWorklist(
N);
1732 }
while (!Nodes.
empty());
1747 WorklistInserter AddNodes(*
this);
1756 AddToWorklist(&
Node,
Node.use_empty());
1764 while (
SDNode *
N = getNextWorklistEntry()) {
1768 if (recursivelyDeleteUnusedNodes(
N))
1771 WorklistRemover DeadNodes(*
this);
1779 for (
SDNode *LN : UpdatedNodes)
1780 AddToWorklistWithUsers(LN);
1792 for (
const SDValue &ChildN :
N->op_values())
1793 AddToWorklist(ChildN.getNode(),
true,
1804 ChainsWithoutMergeableStores.
clear();
1815 "Node was deleted but visit returned new node!");
1823 N->getNumValues() == 1 &&
"Type mismatch");
1833 AddToWorklistWithUsers(RV.
getNode());
1839 recursivelyDeleteUnusedNodes(
N);
1849 switch (
N->getOpcode()) {
1875 case ISD::MUL:
return visitMUL<EmptyMatchContext>(
N);
1939 case ISD::FMA:
return visitFMA<EmptyMatchContext>(
N);
2011#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2012#include "llvm/IR/VPIntrinsics.def"
2013 return visitVPOp(
N);
2024 if (!DisableGenericCombines)
2030 "Node was deleted but visit returned NULL!");
2037 DagCombineInfo(DAG, Level,
false,
this);
2045 switch (
N->getOpcode()) {
2053 RV = PromoteIntBinOp(
SDValue(
N, 0));
2058 RV = PromoteIntShiftOp(
SDValue(
N, 0));
2079 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2094 if (
unsigned NumOps =
N->getNumOperands()) {
2095 if (
N->getOperand(0).getValueType() == MVT::Other)
2096 return N->getOperand(0);
2097 if (
N->getOperand(NumOps-1).getValueType() == MVT::Other)
2098 return N->getOperand(NumOps-1);
2099 for (
unsigned i = 1; i < NumOps-1; ++i)
2100 if (
N->getOperand(i).getValueType() == MVT::Other)
2101 return N->getOperand(i);
2107 SDValue Operand =
N->getOperand(0);
2122 if (
N->getNumOperands() == 2) {
2124 return N->getOperand(0);
2126 return N->getOperand(1);
2141 AddToWorklist(*(
N->user_begin()));
2146 bool Changed =
false;
2153 for (
unsigned i = 0; i < TFs.
size(); ++i) {
2158 for (
unsigned j = i;
j < TFs.
size();
j++)
2169 switch (
Op.getOpcode()) {
2187 if (SeenOps.
insert(
Op.getNode()).second)
2198 for (
unsigned i = 1, e = TFs.
size(); i < e; i++)
2199 AddToWorklist(TFs[i]);
2211 bool DidPruneOps =
false;
2213 unsigned NumLeftToConsider = 0;
2215 Worklist.
push_back(std::make_pair(
Op.getNode(), NumLeftToConsider++));
2219 auto AddToWorklist = [&](
unsigned CurIdx,
SDNode *
Op,
unsigned OpNumber) {
2225 unsigned OrigOpNumber = 0;
2226 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() !=
Op)
2228 assert((OrigOpNumber != Ops.size()) &&
2229 "expected to find TokenFactor Operand");
2231 for (
unsigned i = CurIdx + 1; i < Worklist.
size(); ++i) {
2232 if (Worklist[i].second == OrigOpNumber) {
2233 Worklist[i].second = OpNumber;
2236 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2237 OpWorkCount[OrigOpNumber] = 0;
2238 NumLeftToConsider--;
2241 if (SeenChains.
insert(
Op).second) {
2242 OpWorkCount[OpNumber]++;
2247 for (
unsigned i = 0; i < Worklist.
size() && i < 1024; ++i) {
2249 if (NumLeftToConsider <= 1)
2251 auto CurNode = Worklist[i].first;
2252 auto CurOpNumber = Worklist[i].second;
2253 assert((OpWorkCount[CurOpNumber] > 0) &&
2254 "Node should not appear in worklist");
2255 switch (CurNode->getOpcode()) {
2261 NumLeftToConsider++;
2264 for (
const SDValue &
Op : CurNode->op_values())
2265 AddToWorklist(i,
Op.getNode(), CurOpNumber);
2271 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2274 if (
auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2275 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2278 OpWorkCount[CurOpNumber]--;
2279 if (OpWorkCount[CurOpNumber] == 0)
2280 NumLeftToConsider--;
2294 if (SeenChains.
count(
Op.getNode()) == 0)
2309 WorklistRemover DeadNodes(*
this);
2315 AddUsersToWorklist(
N);
2320 }
while (!
N->use_empty());
2321 deleteAndRecombine(
N);
2329 return Const !=
nullptr && !Const->isOpaque() ? Const :
nullptr;
2339 Op =
N->getOperand(0);
2341 if (
N->getFlags().hasNoUnsignedWrap())
2346 if (
N.getValueType().getScalarType() != MVT::i1 ||
2363 if (LD->isIndexed() || LD->getBasePtr().getNode() !=
N)
2365 VT = LD->getMemoryVT();
2366 AS = LD->getAddressSpace();
2368 if (ST->isIndexed() || ST->getBasePtr().getNode() !=
N)
2370 VT = ST->getMemoryVT();
2371 AS = ST->getAddressSpace();
2373 if (LD->isIndexed() || LD->getBasePtr().getNode() !=
N)
2375 VT = LD->getMemoryVT();
2376 AS = LD->getAddressSpace();
2378 if (ST->isIndexed() || ST->getBasePtr().getNode() !=
N)
2380 VT = ST->getMemoryVT();
2381 AS = ST->getAddressSpace();
2396 }
else if (
N->getOpcode() ==
ISD::SUB) {
2418 bool ShouldCommuteOperands) {
2423 if (ShouldCommuteOperands)
2435 unsigned Opcode =
N->getOpcode();
2436 EVT VT =
N->getValueType(0);
2443 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2461 "Unexpected binary operator");
2478 unsigned SelOpNo = 0;
2514 bool CanFoldNonConst =
2520 if (!CanFoldNonConst &&
2528 if (CanFoldNonConst) {
2565 "Expecting add or sub");
2570 bool IsAdd =
N->getOpcode() ==
ISD::ADD;
2571 SDValue C = IsAdd ?
N->getOperand(1) :
N->getOperand(0);
2572 SDValue Z = IsAdd ?
N->getOperand(0) :
N->getOperand(1);
2573 auto *CN = dyn_cast<ConstantSDNode>(
C);
2578 if (Z.getOperand(0).getValueType() != MVT::i1)
2590 EVT VT =
C.getValueType();
2603 if ((!LegalOperations || hasOperation(
ISD::AVGCEILU, VT)) &&
2608 if ((!LegalOperations || hasOperation(
ISD::AVGCEILS, VT)) &&
2621 "Expecting add or sub");
2625 bool IsAdd =
N->getOpcode() ==
ISD::ADD;
2626 SDValue ConstantOp = IsAdd ?
N->getOperand(1) :
N->getOperand(0);
2627 SDValue ShiftOp = IsAdd ?
N->getOperand(0) :
N->getOperand(1);
2649 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2651 Not.getOperand(0), ShAmt);
2693 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
2725 if ((!LegalOperations ||
2728 X.getScalarValueSizeInBits() == 1) {
2744 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
2748 if (!reassociationCanBreakAddressingModePattern(
ISD::ADD,
DL,
N, N0, N1)) {
2836 return (!Max && !
Op) ||
2837 (
Max &&
Op &&
Max->getAPIntValue() == (-
Op->getAPIntValue()));
2878 !
N->getFlags().hasNoSignedWrap()))) {
2899 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2903 if (
N->getFlags().hasNoUnsignedWrap() &&
2907 if (
N->getFlags().hasNoSignedWrap() &&
2916 DAG.
getConstant(CA * CM + CB->getAPIntValue(),
DL, VT), Flags);
2924 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2930 if (
N->getFlags().hasNoUnsignedWrap() &&
2935 if (
N->getFlags().hasNoSignedWrap() &&
2946 DAG.
getConstant(CA * CM + CB->getAPIntValue(),
DL, VT), Flags);
2951 if (
SDValue Combined = visitADDLikeCommutative(N0, N1,
N))
2954 if (
SDValue Combined = visitADDLikeCommutative(N1, N0,
N))
2986 if (
SDValue Combined = visitADDLike(
N))
3026 APInt NewStep = C0 + C1;
3036 APInt NewStep = SV0 + SV1;
3045 unsigned Opcode =
N->getOpcode();
3063 return DAG.
getNode(Opcode,
DL, VT, N1, N0);
3067 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
3087 bool ForceCarryReconstruction =
false) {
3093 V = V.getOperand(0);
3098 if (ForceCarryReconstruction)
3102 V = V.getOperand(0);
3106 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3113 if (V.getResNo() != 1)
3120 EVT VT = V->getValueType(0);
3228 if (TN->
getVT() == MVT::i1) {
3245 DAG.
getVTList(VT, Carry.getValueType()), N0,
3258 if (!
N->hasAnyUseOfValue(1))
3291 if (Force && isa<ConstantSDNode>(V))
3298 return V.getOperand(0);
3310 EVT CarryVT =
N->getValueType(1);
3314 if (!
N->hasAnyUseOfValue(1))
3321 return DAG.
getNode(
N->getOpcode(),
DL,
N->getVTList(), N1, N0);
3346 if (
SDValue Combined = visitUADDOLike(N0, N1,
N))
3349 if (
SDValue Combined = visitUADDOLike(N1, N0,
N))
3383 SDValue CarryIn =
N->getOperand(2);
3402 SDValue CarryIn =
N->getOperand(2);
3413 if (!LegalOperations ||
3423 AddToWorklist(CarryExt.
getNode());
3429 if (
SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn,
N))
3432 if (
SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn,
N))
3440 SDValue Ops[] = {N1, N0, CarryIn};
3579 EVT CarryOutType =
N->getValueType(0);
3595 unsigned CarryInOperandNum =
3597 if (Opcode ==
ISD::USUBO && CarryInOperandNum != 1)
3691 SDValue CarryIn =
N->getOperand(2);
3702 if (!LegalOperations ||
3707 if (
SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn,
N))
3710 if (
SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn,
N))
3722 "Illegal truncation");
3748 !(!LegalOperations || hasOperation(
ISD::USUBSAT, DstVT)))
3751 EVT SubVT =
N->getValueType(0);
3819template <
class MatchContextClass>
3842 if ((
BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
3852 if (!(AndMask.
isMask(AndMaskWidth) && XorMask.
countr_one() >= AndMaskWidth))
3883 auto PeekThroughFreeze = [](
SDValue N) {
3885 return N->getOperand(0);
3889 if (
SDValue V = foldSubCtlzNot<EmptyMatchContext>(
N, DAG))
3894 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3903 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
3911 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
3934 if (
N->getFlags().hasNoUnsignedWrap())
3940 if (
N->getFlags().hasNoSignedWrap())
3966 if (hasOperation(NewOpc, VT))
4093 if (!reassociationCanBreakAddressingModePattern(
ISD::SUB,
DL,
N, N0, N1) &&
4131 if ((!LegalOperations || hasOperation(
ISD::ABS, VT)) &&
4141 if (GA->getGlobal() == GB->getGlobal())
4149 if (TN->
getVT() == MVT::i1) {
4202 DAG.
getVTList(VT, Carry.getValueType()), NegX, Zero,
4210 if (!C0->isOpaque()) {
4211 const APInt &C0Val = C0->getAPIntValue();
4212 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4213 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4219 if ((!LegalOperations || hasOperation(
ISD::ABDS, VT)) &&
4231 if ((!LegalOperations || hasOperation(
ISD::ABDU, VT)) &&
4246 unsigned Opcode =
N->getOpcode();
4267 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
4293 if (!
N->hasAnyUseOfValue(1))
4320 EVT CarryVT =
N->getValueType(1);
4324 if (!
N->hasAnyUseOfValue(1))
4359 SDValue CarryIn =
N->getOperand(2);
4371 SDValue CarryIn =
N->getOperand(2);
4375 if (!LegalOperations ||
4386 SDValue CarryIn =
N->getOperand(2);
4390 if (!LegalOperations ||
4422template <
class MatchContextClass>
SDValue DAGCombiner::visitMUL(
SDNode *
N) {
4428 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4429 MatchContextClass Matcher(DAG, TLI,
N);
4442 return Matcher.getNode(
ISD::MUL,
DL, VT, N1, N0);
4444 bool N1IsConst =
false;
4445 bool N1IsOpaqueConst =
false;
4452 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
4457 "Splat APInt should be element width");
4459 N1IsConst = isa<ConstantSDNode>(N1);
4462 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4467 if (N1IsConst && ConstValue1.
isZero())
4471 if (N1IsConst && ConstValue1.
isOne())
4475 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
4479 if (N1IsConst && ConstValue1.
isAllOnes())
4485 if (
SDValue LogBase2 = BuildLogBase2(N1,
DL)) {
4494 unsigned Log2Val = (-ConstValue1).logBase2();
4498 return Matcher.getNode(
4512 if (LoHi->hasAnyUseOfValue(1))
4515 if (LoHi->hasAnyUseOfValue(1))
4536 if (!UseVP && N1IsConst &&
4544 unsigned TZeros = MulC == 2 ? 0 : MulC.
countr_zero();
4546 if ((MulC - 1).isPowerOf2())
4548 else if ((MulC + 1).isPowerOf2())
4553 MathOp ==
ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4556 "multiply-by-constant generated out of bounds shift");
4560 TZeros ? DAG.
getNode(MathOp,
DL, VT, Shl,
4602 return Matcher.getNode(
4620 APInt NewStep = C0 * MulVal;
4626 if (!UseVP && (!LegalOperations || hasOperation(
ISD::ABS, VT)) &&
4643 if (!V ||
V->isZero()) {
4657 for (
unsigned I = 0;
I != NumElts; ++
I)
4688 EVT NodeType =
Node->getValueType(0);
4689 if (!NodeType.isSimple())
4691 switch (NodeType.getSimpleVT().SimpleTy) {
4692 default:
return false;
4693 case MVT::i8: LC=
isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8;
break;
4694 case MVT::i16: LC=
isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16;
break;
4695 case MVT::i32: LC=
isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
break;
4696 case MVT::i64: LC=
isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64;
break;
4697 case MVT::i128: LC=
isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128;
break;
4705 if (
Node->use_empty())
4708 unsigned Opcode =
Node->getOpcode();
4713 EVT VT =
Node->getValueType(0);
4727 unsigned OtherOpcode = 0;
4748 unsigned UserOpc =
User->getOpcode();
4749 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4753 if (UserOpc == OtherOpcode) {
4756 }
else if (UserOpc == DivRemOpc) {
4759 assert(UserOpc == Opcode);
4764 CombineTo(
User, combined);
4775 EVT VT =
N->getValueType(0);
4778 unsigned Opc =
N->getOpcode();
4787 if (DAG.
isUndef(Opc, {N0, N1}))
4798 if (N0C && N0C->
isZero())
4821 EVT VT =
N->getValueType(0);
4831 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
4848 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
4856 if (
SDValue V = visitSDIVLike(N0, N1,
N)) {
4863 AddToWorklist(
Mul.getNode());
4865 CombineTo(RemNode, Sub);
4885 if (
C->isZero() ||
C->isOpaque())
4887 if (
C->getAPIntValue().isPowerOf2())
4889 if (
C->getAPIntValue().isNegatedPowerOf2())
4899 EVT VT =
N->getValueType(0);
4924 AddToWorklist(Sign.
getNode());
4930 AddToWorklist(
Add.getNode());
4941 Sra = DAG.
getSelect(
DL, VT, IsOneOrAllOnes, N0, Sra);
4969 EVT VT =
N->getValueType(0);
4979 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
4993 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
4996 if (
SDValue V = visitUDIVLike(N0, N1,
N)) {
5003 AddToWorklist(
Mul.getNode());
5005 CombineTo(RemNode, Sub);
5029 EVT VT =
N->getValueType(0);
5033 if (
SDValue LogBase2 = BuildLogBase2(N1,
DL)) {
5034 AddToWorklist(LogBase2.getNode());
5038 AddToWorklist(Trunc.
getNode());
5047 if (
SDValue LogBase2 = BuildLogBase2(N10,
DL)) {
5048 AddToWorklist(LogBase2.getNode());
5052 AddToWorklist(Trunc.
getNode());
5054 AddToWorklist(
Add.getNode());
5082 unsigned Opcode =
N->getOpcode();
5085 EVT VT =
N->getValueType(0);
5107 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
5120 AddToWorklist(
Add.getNode());
5131 AddToWorklist(
Add.getNode());
5148 if (
SDValue OptimizedRem = buildOptimizedSREM(N0, N1,
N))
5149 return OptimizedRem;
5153 isSigned ? visitSDIVLike(N0, N1,
N) : visitUDIVLike(N0, N1,
N);
5159 CombineTo(DivNode, OptimizedDiv);
5162 AddToWorklist(OptimizedDiv.
getNode());
5163 AddToWorklist(
Mul.getNode());
5170 return DivRem.getValue(1);
5178 EVT VT =
N->getValueType(0);
5191 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
5219 unsigned SimpleSize =
Simple.getSizeInBits();
5237 EVT VT =
N->getValueType(0);
5250 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
5274 if (
SDValue LogBase2 = BuildLogBase2(N1,
DL)) {
5289 unsigned SimpleSize =
Simple.getSizeInBits();
5311 unsigned Opcode =
N->getOpcode();
5314 EVT VT =
N->getValueType(0);
5325 return DAG.
getNode(Opcode,
DL,
N->getVTList(), N1, N0);
5328 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
5354 X.getValueType() ==
Y.getValueType() &&
5355 hasOperation(Opcode,
X.getValueType())) {
5361 X.getValueType() ==
Y.getValueType() &&
5362 hasOperation(Opcode,
X.getValueType())) {
5395 if (IsSigned &&
Add->getFlags().hasNoSignedWrap())
5398 if (!IsSigned &&
Add->getFlags().hasNoUnsignedWrap())
5407 unsigned Opcode =
N->getOpcode();
5410 EVT VT =
N->getValueType(0);
5420 return DAG.
getNode(Opcode,
DL,
N->getVTList(), N1, N0);
5423 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
5438 (!LegalOperations || hasOperation(
ISD::ABS, VT)))
5456SDValue DAGCombiner::SimplifyNodeWithTwoResults(
SDNode *
N,
unsigned LoOp,
5459 bool HiExists =
N->hasAnyUseOfValue(1);
5460 if (!HiExists && (!LegalOperations ||
5463 return CombineTo(
N, Res, Res);
5467 bool LoExists =
N->hasAnyUseOfValue(0);
5468 if (!LoExists && (!LegalOperations ||
5471 return CombineTo(
N, Res, Res);
5475 if (LoExists && HiExists)
5481 AddToWorklist(
Lo.getNode());
5484 (!LegalOperations ||
5486 return CombineTo(
N, LoOpt, LoOpt);
5491 AddToWorklist(
Hi.getNode());
5494 (!LegalOperations ||
5496 return CombineTo(
N, HiOpt, HiOpt);
5508 EVT VT =
N->getValueType(0);
5512 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5524 unsigned SimpleSize =
Simple.getSizeInBits();
5536 return CombineTo(
N,
Lo,
Hi);
5549 EVT VT =
N->getValueType(0);
5553 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5564 return CombineTo(
N, Zero, Zero);
5570 return CombineTo(
N, N0, Zero);
5577 unsigned SimpleSize =
Simple.getSizeInBits();
5589 return CombineTo(
N,
Lo,
Hi);
5602 EVT CarryVT =
N->getValueType(1);
5623 return DAG.
getNode(
N->getOpcode(),
DL,
N->getVTList(), N1, N0);
5635 N->getVTList(), N0, N0);
5642 return CombineTo(
N,
And, Cmp);
5680 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3,
CC);
5718 N0CC = cast<CondCodeSDNode>(N0.
getOperand(4))->get();
5734 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5735 if (!Opcode1 || Opcode0 == Opcode1)
5745 APInt MinCPlus1 = MinC + 1;
5746 if (-MaxC == MinCPlus1 && MinCPlus1.
isPowerOf2()) {
5803 unsigned BW = (C1 + 1).exactLogBase2();
5823 unsigned Opcode =
N->getOpcode();
5837 return DAG.
getNode(Opcode,
DL, VT, N1, N0);
5841 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
5845 if (
SDValue RMINMAX = reassociateOps(Opcode,
DL, N0, N1,
N->getFlags()))
5865 return DAG.
getNode(AltOpcode,
DL, VT, N0, N1);
5877 auto ReductionOpcode = [](
unsigned Opcode) {
5891 if (
SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5905 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
5907 unsigned LogicOpcode =
N->getOpcode();
5922 EVT XVT =
X.getValueType();
5932 if (XVT !=
Y.getValueType())
5936 if ((VT.
isVector() || LegalOperations) &&
5949 return DAG.
getNode(HandOpcode,
DL, VT, Logic);
5959 if (XVT !=
Y.getValueType())
5971 return DAG.
getNode(HandOpcode,
DL, VT, Logic);
5992 return DAG.
getNode(HandOpcode,
DL, VT, Logic);
6007 return DAG.
getNode(HandOpcode,
DL, VT, Logic0, Logic1, S);
6020 if (XVT.
isInteger() && XVT ==
Y.getValueType() &&
6024 return DAG.
getNode(HandOpcode,
DL, VT, Logic);
6041 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
6042 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
6043 assert(
X.getValueType() ==
Y.getValueType() &&
6044 "Inputs to shuffles are not the same type");
6050 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6051 !SVN0->getMask().equals(SVN1->getMask()))
6087 SDValue LL, LR, RL, RR, N0CC, N1CC;
6088 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
6089 !isSetCCEquivalent(N1, RL, RR, N1CC))
6093 "Unexpected operand types for bitwise logic op");
6096 "Unexpected operand types for setcc");
6112 if (LR == RR && CC0 == CC1 && IsInteger) {
6117 bool AndEqZero = IsAnd && CC1 ==
ISD::SETEQ && IsZero;
6119 bool AndGtNeg1 = IsAnd && CC1 ==
ISD::SETGT && IsNeg1;
6121 bool OrNeZero = !IsAnd && CC1 ==
ISD::SETNE && IsZero;
6123 bool OrLtZero = !IsAnd && CC1 ==
ISD::SETLT && IsZero;
6129 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6131 AddToWorklist(
Or.getNode());
6136 bool AndEqNeg1 = IsAnd && CC1 ==
ISD::SETEQ && IsNeg1;
6138 bool AndLtZero = IsAnd && CC1 ==
ISD::SETLT && IsZero;
6140 bool OrNeNeg1 = !IsAnd && CC1 ==
ISD::SETNE && IsNeg1;
6142 bool OrGtNeg1 = !IsAnd && CC1 ==
ISD::SETGT && IsNeg1;
6148 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6150 AddToWorklist(
And.getNode());
6164 AddToWorklist(
Add.getNode());
6191 return !C0->
isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6209 if (LL == RR && LR == RL) {
6216 if (LL == RL && LR == RR) {
6220 (!LegalOperations ||
6243 bool isFMAXNUMFMINNUM_IEEE,
6244 bool isFMAXNUMFMINNUM) {
6255 isFMAXNUMFMINNUM_IEEE
6263 isFMAXNUMFMINNUM_IEEE
6281 isFMAXNUMFMINNUM_IEEE
6290 isFMAXNUMFMINNUM_IEEE
6300 "Invalid Op to combine SETCC with");
6311 LogicOp,
LHS.getNode(),
RHS.getNode());
6346 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6352 SDValue CommonValue, Operand1, Operand2;
6360 }
else if (LHS1 == RHS1) {
6373 }
else if (RHS0 == LHS1) {
6390 bool IsSigned = isSignedIntSetCC(
CC);
6402 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6406 DAG.
getNode(NewOpcode,
DL, OpVT, Operand1, Operand2);
6407 return DAG.
getSetCC(
DL, VT, MinMaxValue, CommonValue,
CC);
6417 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.
isInteger()) {
6418 const APInt &APLhs = LHS1C->getAPIntValue();
6419 const APInt &APRhs = RHS1C->getAPIntValue();
6423 if (APLhs == (-APRhs) &&
6434 }
else if (TargetPreference &
6455 APInt Dif = MaxC - MinC;
6489 EVT CondVT =
Cond.getValueType();
6500 EVT OpVT =
T.getValueType();
6519 if (
SDValue V = foldLogicOfSetCCs(
true, N0, N1,
DL))
6537 APInt SRLC = SRLI->getAPIntValue();
6549 CombineTo(N0.
getNode(), NewAdd);
6563 EVT LoadResultTy,
EVT &ExtVT) {
6572 if (ExtVT == LoadedVT &&
6573 (!LegalOperations ||
6589 if (LegalOperations &&
6625 if (LdStMemVT.
bitsLT(MemVT))
6630 assert(ShAmt % 8 == 0 &&
"ShAmt is byte offset");
6631 const unsigned ByteShAmt = ShAmt / 8;
6642 if (PtrType == MVT::Untyped || PtrType.
isExtended())
6645 if (isa<LoadSDNode>(LDST)) {
6649 if (!
SDValue(Load, 0).hasOneUse())
6652 if (LegalOperations &&
6661 if (
Load->getNumValues() > 2)
6674 assert(isa<StoreSDNode>(LDST) &&
"It is not a Load nor a Store SDNode");
6680 if (LegalOperations &&
6687bool DAGCombiner::SearchForAndLoads(
SDNode *
N,
6695 if (
Op.getValueType().isVector())
6699 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
6701 (
Mask->getAPIntValue() &
C->getAPIntValue()) !=
C->getAPIntValue())
6706 if (!
Op.hasOneUse())
6709 switch(
Op.getOpcode()) {
6711 auto *
Load = cast<LoadSDNode>(
Op);
6713 if (isAndLoadExtLoad(Mask, Load,
Load->getValueType(0), ExtVT) &&
6731 unsigned ActiveBits =
Mask->getAPIntValue().countr_one();
6734 cast<VTSDNode>(
Op.getOperand(1))->getVT() :
6735 Op.getOperand(0).getValueType();
6746 if (!SearchForAndLoads(
Op.getNode(), Loads, NodesWithConsts, Mask,
6757 NodeToMask =
Op.getNode();
6760 for (
unsigned i = 0, e = NodeToMask->
getNumValues(); i < e; ++i) {
6762 if (VT != MVT::Glue && VT != MVT::Other) {
6764 NodeToMask =
nullptr;
6776bool DAGCombiner::BackwardsPropagateMask(
SDNode *
N) {
6777 auto *
Mask = dyn_cast<ConstantSDNode>(
N->getOperand(1));
6781 if (!
Mask->getAPIntValue().isMask())
6785 if (isa<LoadSDNode>(
N->getOperand(0)))
6790 SDNode *FixupNode =
nullptr;
6791 if (SearchForAndLoads(
N, Loads, NodesWithConsts, Mask, FixupNode)) {
6804 SDValue(FixupNode, 0), MaskOp);
6806 if (
And.getOpcode() == ISD ::AND)
6811 for (
auto *LogicN : NodesWithConsts) {
6815 if (isa<ConstantSDNode>(Op0))
6819 if (isa<ConstantSDNode>(Op1))
6823 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6830 for (
auto *Load : Loads) {
6835 if (
And.getOpcode() == ISD ::AND)
6838 SDValue NewLoad = reduceLoadWidth(
And.getNode());
6840 "Shouldn't be masking the load if it can't be narrowed");
6841 CombineTo(Load, NewLoad, NewLoad.
getValue(1));
6854SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(
SDNode *
N) {
6865 unsigned OuterShift;
6866 unsigned InnerShift;
6868 auto matchMask = [&OuterShift, &InnerShift, &
Y](
SDValue M) ->
bool {
6871 OuterShift =
M->getOpcode();
6880 Y =
M->getOperand(1);
6887 else if (matchMask(N0))
6893 EVT VT =
N->getValueType(0);
6910 SDValue And0 =
And->getOperand(0), And1 =
And->getOperand(1);
6920 bool FoundNot =
false;
6923 Src = Src.getOperand(0);
6929 Src = Src.getOperand(0);
6933 if (Src.getOpcode() !=
ISD::SRL || !Src.hasOneUse())
6937 EVT SrcVT = Src.getValueType();
6944 SDValue ShiftAmt = Src.getOperand(1);
6945 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6946 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(
BitWidth))
6950 Src = Src.getOperand(0);
6957 Src = Src.getOperand(0);
6981 EVT VT =
N->getValueType(0);
7007 unsigned LogicOpcode =
N->getOpcode();
7009 "Expected bitwise logic operation");
7015 unsigned ShiftOpcode = ShiftOp.
getOpcode();
7016 if (LogicOp.
getOpcode() != LogicOpcode ||
7040 EVT VT =
N->getValueType(0);
7044 return DAG.
getNode(LogicOpcode,
DL, VT, NewShift, Z);
7055 unsigned LogicOpcode =
N->getOpcode();
7057 "Expected bitwise logic operation");
7058 if (LeftHand.
getOpcode() != LogicOpcode ||
7079 EVT VT =
N->getValueType(0);
7081 return DAG.
getNode(LogicOpcode,
DL, VT, CombinedShifts, W);
7108 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
7122 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
7125 EVT LoadVT = MLoad->getMemoryVT();
7133 if (
Splat->getAPIntValue().isMask(ElementSize)) {
7135 ExtVT,
DL, MLoad->getChain(), MLoad->getBasePtr(),
7136 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7137 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7139 bool LoadHasOtherUsers = !N0.
hasOneUse();
7140 CombineTo(
N, NewLoad);
7141 if (LoadHasOtherUsers)
7162 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
7176 return RHS->getAPIntValue().isSubsetOf(
LHS->getAPIntValue());
7244 unsigned EltBitWidth =
Vector->getValueType(0).getScalarSizeInBits();
7245 APInt SplatValue, SplatUndef;
7246 unsigned SplatBitSize;
7253 const bool IsBigEndian =
false;
7255 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7256 HasAnyUndefs, EltBitWidth, IsBigEndian);
7260 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7263 SplatValue |= SplatUndef;
7270 for (
unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7279 Load->getValueType(0),
7280 Load->getMemoryVT());
7288 switch (
Load->getExtensionType()) {
7289 default:
B =
false;
break;
7301 CombineTo(
N, (N0.
getNode() == Load) ? NewLoad : N0);
7306 Load->getChain(),
Load->getBasePtr(),
7307 Load->getOffset(),
Load->getMemoryVT(),
7308 Load->getMemOperand());
7310 if (
Load->getNumValues() == 3) {
7314 CombineTo(Load, To, 3,
true);
7316 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7326 if (
SDValue Shuffle = XformToShuffleWithZero(
N))
7335 EVT ExtVT =
Ext->getValueType(0);
7352 if (
auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7353 EVT MemVT = GN0->getMemoryVT();
7356 if (
SDValue(GN0, 0).hasOneUse() &&
7359 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7360 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7363 DAG.
getVTList(VT, MVT::Other), MemVT,
DL, Ops, GN0->getMemOperand(),
7366 CombineTo(
N, ZExtLoad);
7367 AddToWorklist(ZExtLoad.
getNode());
7376 if (
SDValue Res = reduceLoadWidth(
N))
7384 if (BackwardsPropagateMask(
N))
7388 if (
SDValue Combined = visitANDLike(N0, N1,
N))
7393 if (
SDValue V = hoistLogicOpWithSameOpcodeHands(
N))
7440 X.getOperand(0).getScalarValueSizeInBits() == 1)
7443 X.getOperand(0).getScalarValueSizeInBits() == 1)
7457 auto *LN0 = cast<LoadSDNode>(N0);
7458 EVT MemVT = LN0->getMemoryVT();
7465 ((!LegalOperations && LN0->isSimple()) ||
7469 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7483 if (
SDValue Shifts = unfoldExtremeBitClearingToShifts(
N))
7499 auto *
C = dyn_cast<ConstantSDNode>(RHS);
7503 if (!
C->getAPIntValue().isMask(
7504 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7511 if (IsAndZeroExtMask(N0, N1))
7520 if (LegalOperations || VT.
isVector())
7529 bool DemandHighBits) {
7530 if (!LegalOperations)
7533 EVT VT =
N->getValueType(0);
7534 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7540 bool LookPassAnd0 =
false;
7541 bool LookPassAnd1 =
false;
7556 LookPassAnd0 =
true;
7566 LookPassAnd1 =
true;
7592 LookPassAnd0 =
true;
7606 LookPassAnd1 =
true;
7615 if (OpSizeInBits > 16) {
7619 if (DemandHighBits && !LookPassAnd0)
7626 if (!LookPassAnd1) {
7627 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7635 if (OpSizeInBits > 16) {
7650 if (!
N->hasOneUse())
7653 unsigned Opc =
N.getOpcode();
7665 N1C = dyn_cast<ConstantSDNode>(
N.getOperand(1));
7667 N1C = dyn_cast<ConstantSDNode>(N0.
getOperand(1));
7671 unsigned MaskByteOffset;
7675 case 0xFF: MaskByteOffset = 0;
break;
7676 case 0xFF00: MaskByteOffset = 1;
break;
7685 case 0xFF0000: MaskByteOffset = 2;
break;
7686 case 0xFF000000: MaskByteOffset = 3;
break;
7691 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7697 if (!
C ||
C->getZExtValue() != 8)
7705 if (!
C ||
C->getZExtValue() != 8)
7711 if (MaskByteOffset != 0 && MaskByteOffset != 2)
7714 if (!
C ||
C->getZExtValue() != 8)
7719 if (MaskByteOffset != 1 && MaskByteOffset != 3)
7722 if (!
C ||
C->getZExtValue() != 8)
7726 if (Parts[MaskByteOffset])
7741 if (!
C ||
C->getAPIntValue() != 16)
7743 Parts[0] = Parts[1] =
N.getOperand(0).getOperand(0).getNode();
7758 "MatchBSwapHWordOrAndAnd: expecting i32");
7768 if (!Mask0 || !Mask1)
7779 if (!ShiftAmt0 || !ShiftAmt1)
7799 if (!LegalOperations)
7802 EVT VT =
N->getValueType(0);
7840 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7868 if (
SDValue V = foldLogicOfSetCCs(
false, N0, N1,
DL))
7883 const APInt &LHSMask = N0O1C->getAPIntValue();
7884 const APInt &RHSMask = N1O1C->getAPIntValue();
7918 auto peekThroughResize = [](
SDValue V) {
7920 return V->getOperand(0);
7924 SDValue N0Resized = peekThroughResize(N0);
7926 SDValue N1Resized = peekThroughResize(N1);
7931 if (N00 == N1Resized || N01 == N1Resized)
7938 if (peekThroughResize(NotOperand) == N1Resized)
7946 if (peekThroughResize(NotOperand) == N1Resized)
7967 auto peekThroughZext = [](
SDValue V) {
7969 return V->getOperand(0);
7991 Lo.getScalarValueSizeInBits() == (BW / 2) &&
7992 Lo.getValueType() ==
Hi.getValueType()) {
8029 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
8043 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
8044 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
8051 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8052 assert((!ZeroN00 || !ZeroN01) &&
"Both inputs zero!");
8053 assert((!ZeroN10 || !ZeroN11) &&
"Both inputs zero!");
8054 bool CanFold =
true;
8058 for (
int i = 0; i != NumElts; ++i) {
8059 int M0 = SV0->getMaskElt(i);
8060 int M1 = SV1->getMaskElt(i);
8063 bool M0Zero =
M0 < 0 || (ZeroN00 == (
M0 < NumElts));
8064 bool M1Zero =
M1 < 0 || (ZeroN10 == (
M1 < NumElts));
8068 if ((M0Zero &&
M1 < 0) || (M1Zero &&
M0 < 0))
8072 if (M0Zero == M1Zero) {
8077 assert((
M0 >= 0 ||
M1 >= 0) &&
"Undef index!");
8083 Mask[i] = M1Zero ?
M0 % NumElts : (
M1 % NumElts) + NumElts;
8092 return LegalShuffle;
8106 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
8117 if (
SDValue Combined = visitORLike(N0, N1,
DL))
8124 if (
SDValue BSwap = MatchBSwapHWord(
N, N0, N1))
8126 if (
SDValue BSwap = MatchBSwapHWordLow(
N, N0, N1))
8160 if (
SDValue V = hoistLogicOpWithSameOpcodeHands(
N))
8164 if (
SDValue Rot = MatchRotate(N0, N1,
DL))
8167 if (
SDValue Load = MatchLoadCombine(
N))
8177 if (
SDValue Combined = visitADDLike(
N))
8182 if (LegalOperations || VT.
isVector())
8193 Mask =
Op.getOperand(1);
8194 return Op.getOperand(0);
8237 assert(OppShift && ExtractFrom &&
"Empty SDValue");
8265 bool IsMulOrDiv =
false;
8268 auto SelectOpcode = [&](
unsigned NeededShift,
unsigned MulOrDivVariant) {
8269 IsMulOrDiv = ExtractFrom.
getOpcode() == MulOrDivVariant;
8270 if (!IsMulOrDiv && ExtractFrom.
getOpcode() != NeededShift)
8272 Opcode = NeededShift;
8322 if (Rem != 0 || ResultAmt != OppLHSAmt)
8328 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.
zextOrTrunc(
8337 return DAG.
getNode(Opcode,
DL, ResVT, OppShiftLHS, NewShiftNode);
8391 unsigned MaskLoBits = 0;
8393 unsigned Bits =
Log2_64(EltSize);
8395 if (NegBits >= Bits) {
8418 if (PosBits >= MaskLoBits) {
8440 if ((Pos == NegOp1) ||
8464 return Width.
getLoBits(MaskLoBits) == 0;
8465 return Width == EltSize;
8475 SDValue InnerNeg,
bool HasPos,
8476 unsigned PosOpcode,
unsigned NegOpcode,
8488 return DAG.
getNode(HasPos ? PosOpcode : NegOpcode,
DL, VT, Shifted,
8489 HasPos ? Pos : Neg);
8503 SDValue InnerNeg,
bool HasPos,
8504 unsigned PosOpcode,
unsigned NegOpcode,
8516 if (
matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, N0 == N1)) {
8517 return DAG.
getNode(HasPos ? PosOpcode : NegOpcode,
DL, VT, N0, N1,
8518 HasPos ? Pos : Neg);
8525 auto IsBinOpImm = [](
SDValue Op,
unsigned BinOpc,
unsigned Imm) {
8526 if (
Op.getOpcode() != BinOpc)
8535 IsBinOpImm(InnerNeg,
ISD::XOR, EltBits - 1) &&
8544 IsBinOpImm(InnerPos,
ISD::XOR, EltBits - 1) &&
8554 IsBinOpImm(InnerPos,
ISD::XOR, EltBits - 1) &&
8569 EVT VT =
LHS.getValueType();
8574 bool HasROTL = hasOperation(
ISD::ROTL, VT);
8575 bool HasROTR = hasOperation(
ISD::ROTR, VT);
8576 bool HasFSHL = hasOperation(
ISD::FSHL, VT);
8577 bool HasFSHR = hasOperation(
ISD::FSHR, VT);
8588 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8593 LHS.getOperand(0).getValueType() ==
RHS.getOperand(0).getValueType()) {
8595 if (
SDValue Rot = MatchRotate(
LHS.getOperand(0),
RHS.getOperand(0),
DL)) {
8610 if (!LHSShift && !RHSShift)
8625 RHSShift = NewRHSShift;
8630 LHSShift = NewLHSShift;
8633 if (!RHSShift || !LHSShift)
8660 return (
LHS->getAPIntValue() +
RHS->getAPIntValue()) == EltSizeInBits;
8663 auto ApplyMasks = [&](
SDValue Res) {
8687 bool IsRotate = LHSShiftArg == RHSShiftArg;
8688 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8697 if (CommonOp ==
Or.getOperand(0)) {
8699 Y =
Or.getOperand(1);
8702 if (CommonOp ==
Or.getOperand(1)) {
8704 Y =
Or.getOperand(0);
8711 if (matchOr(LHSShiftArg, RHSShiftArg)) {
8716 }
else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8725 return ApplyMasks(Res);
8738 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8739 bool UseROTL = !LegalOperations || HasROTL;
8741 UseROTL ? LHSShiftAmt : RHSShiftAmt);
8743 bool UseFSHL = !LegalOperations || HasFSHL;
8745 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8748 return ApplyMasks(Res);
8753 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8762 SDValue LExtOp0 = LHSShiftAmt;
8763 SDValue RExtOp0 = RHSShiftAmt;
8776 if (IsRotate && (HasROTL || HasROTR)) {
8778 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8784 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8791 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8797 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8848static std::optional<SDByteProvider>
8850 std::optional<uint64_t> VectorIndex,
8851 unsigned StartingIndex = 0) {
8855 return std::nullopt;
8859 if (
Depth && !
Op.hasOneUse() &&
8860 (
Op.getOpcode() !=
ISD::LOAD || !
Op.getValueType().isVector()))
8861 return std::nullopt;
8865 if (
Op.getOpcode() !=
ISD::LOAD && VectorIndex.has_value())
8866 return std::nullopt;
8870 return std::nullopt;
8872 assert(Index < ByteWidth &&
"invalid index requested");
8875 switch (
Op.getOpcode()) {
8880 return std::nullopt;
8884 return std::nullopt;
8886 if (
LHS->isConstantZero())
8888 if (
RHS->isConstantZero())
8890 return std::nullopt;
8893 auto ShiftOp = dyn_cast<ConstantSDNode>(
Op->getOperand(1));
8895 return std::nullopt;
8897 uint64_t BitShift = ShiftOp->getZExtValue();
8899 if (BitShift % 8 != 0)
8900 return std::nullopt;
8906 return Index < ByteShift
8909 Depth + 1, VectorIndex, Index);
8916 if (NarrowBitWidth % 8 != 0)
8917 return std::nullopt;
8918 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8920 if (Index >= NarrowByteWidth)
8922 ? std::optional<SDByteProvider>(
8930 Depth + 1, VectorIndex, StartingIndex);
8932 auto OffsetOp = dyn_cast<ConstantSDNode>(
Op->getOperand(1));
8934 return std::nullopt;
8936 VectorIndex = OffsetOp->getZExtValue();
8940 if (NarrowBitWidth % 8 != 0)
8941 return std::nullopt;
8942 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8945 if (Index >= NarrowByteWidth)
8946 return std::nullopt;
8954 if (*VectorIndex * NarrowByteWidth > StartingIndex)
8955 return std::nullopt;
8956 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8957 return std::nullopt;
8960 VectorIndex, StartingIndex);
8963 auto L = cast<LoadSDNode>(
Op.getNode());
8964 if (!L->isSimple() || L->isIndexed())
8965 return std::nullopt;
8967 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8968 if (NarrowBitWidth % 8 != 0)
8969 return std::nullopt;
8970 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8975 if (Index >= NarrowByteWidth)
8977 ? std::optional<SDByteProvider>(
8981 unsigned BPVectorIndex = VectorIndex.value_or(0U);
8986 return std::nullopt;
9001 int64_t FirstOffset) {
9003 unsigned Width = ByteOffsets.
size();
9005 return std::nullopt;
9007 bool BigEndian =
true, LittleEndian =
true;
9008 for (
unsigned i = 0; i < Width; i++) {
9009 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9012 if (!BigEndian && !LittleEndian)
9013 return std::nullopt;
9016 assert((BigEndian != LittleEndian) &&
"It should be either big endian or"
9023 switch (
Value.getOpcode()) {
9028 return Value.getOperand(0);
9066 EVT MemVT =
N->getMemoryVT();
9067 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
9068 !
N->isSimple() ||
N->isIndexed())
9075 unsigned MaxWideNumBits = 64;
9076 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9077 while (
auto *Store = dyn_cast<StoreSDNode>(Chain)) {
9085 if (
Store->getMemoryVT() != MemVT || !
Store->isSimple() ||
9089 Chain =
Store->getChain();
9090 if (MaxStores < Stores.
size())
9094 if (Stores.
size() < 2)
9099 unsigned NumStores = Stores.
size();
9100 unsigned WideNumBits = NumStores * NarrowNumBits;
9102 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
9111 std::optional<BaseIndexOffset>
Base;
9112 for (
auto *Store : Stores) {
9123 isa<ConstantSDNode>(WideVal.
getOperand(1))) {
9131 if (ShiftAmtC % NarrowNumBits != 0)
9138 Offset = ShiftAmtC / NarrowNumBits;
9144 SourceValue = WideVal;
9145 else if (SourceValue != WideVal) {
9153 SourceValue = WideVal;
9162 int64_t ByteOffsetFromBase = 0;
9165 else if (!
Base->equalBaseIndex(
Ptr, DAG, ByteOffsetFromBase))
9169 if (ByteOffsetFromBase < FirstOffset) {
9171 FirstOffset = ByteOffsetFromBase;
9175 if (Offset < 0 || Offset >= NumStores || OffsetMap[
Offset] !=
INT64_MAX)
9177 OffsetMap[
Offset] = ByteOffsetFromBase;
9181 assert(FirstStore &&
"First store must be set");
9188 if (!Allowed || !
Fast)
9193 auto checkOffsets = [&](
bool MatchLittleEndian) {
9194 if (MatchLittleEndian) {
9195 for (
unsigned i = 0; i != NumStores; ++i)
9196 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9199 for (
unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --
j)
9200 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9207 bool NeedBswap =
false;
9208 bool NeedRotate =
false;
9211 if (NarrowNumBits == 8 && checkOffsets(Layout.
isBigEndian()))
9213 else if (NumStores == 2 && checkOffsets(Layout.
isBigEndian()))
9222 "Unexpected store value to merge");
9231 }
else if (NeedRotate) {
9232 assert(WideNumBits % 2 == 0 &&
"Unexpected type for rotate");
9278 "Can only match load combining against OR nodes");
9281 EVT VT =
N->getValueType(0);
9282 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9288 assert(
P.hasSrc() &&
"Must be a memory byte provider");
9289 auto *
Load = cast<LoadSDNode>(
P.Src.value());
9291 unsigned LoadBitWidth =
Load->getMemoryVT().getScalarSizeInBits();
9293 assert(LoadBitWidth % 8 == 0 &&
9294 "can only analyze providers for individual bytes not bit");
9295 unsigned LoadByteWidth = LoadBitWidth / 8;
9300 std::optional<BaseIndexOffset>
Base;
9304 std::optional<SDByteProvider> FirstByteProvider;
9310 unsigned ZeroExtendedBytes = 0;
9311 for (
int i = ByteWidth - 1; i >= 0; --i) {
9318 if (
P->isConstantZero()) {
9321 if (++ZeroExtendedBytes != (ByteWidth -
static_cast<unsigned>(i)))
9325 assert(
P->hasSrc() &&
"provenance should either be memory or zero");
9326 auto *
L = cast<LoadSDNode>(
P->Src.value());
9332 else if (Chain != LChain)
9337 int64_t ByteOffsetFromBase = 0;
9346 if (
L->getMemoryVT().isVector()) {
9347 unsigned LoadWidthInBit =
L->getMemoryVT().getScalarSizeInBits();
9348 if (LoadWidthInBit % 8 != 0)
9350 unsigned ByteOffsetFromVector =
P->SrcOffset * LoadWidthInBit / 8;
9351 Ptr.addToOffset(ByteOffsetFromVector);
9357 else if (!
Base->equalBaseIndex(
Ptr, DAG, ByteOffsetFromBase))
9361 ByteOffsetFromBase += MemoryByteOffset(*
P);
9362 ByteOffsets[i] = ByteOffsetFromBase;
9365 if (ByteOffsetFromBase < FirstOffset) {
9366 FirstByteProvider =
P;
9367 FirstOffset = ByteOffsetFromBase;
9373 assert(!Loads.
empty() &&
"All the bytes of the value must be loaded from "
9374 "memory, so there must be at least one load which produces the value");
9375 assert(
Base &&
"Base address of the accessed memory location must be set");
9378 bool NeedsZext = ZeroExtendedBytes > 0;
9389 if (LegalOperations &&
9397 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9401 assert(FirstByteProvider &&
"must be set");
9405 if (MemoryByteOffset(*FirstByteProvider) != 0)
9407 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9414 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9421 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9427 if (NeedsBswap && NeedsZext && LegalOperations &&
9435 *FirstLoad->getMemOperand(), &
Fast);
9436 if (!Allowed || !
Fast)
9441 Chain, FirstLoad->getBasePtr(),
9442 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9482 EVT VT =
N->getValueType(0);
9504 M =
And.getOperand(XorIdx ? 0 : 1);
9510 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9511 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9517 if (isa<ConstantSDNode>(
M.getNode()))
9585 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
9597 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
9618 if (
SDValue Combined = visitADDLike(
N))
9625 isSetCCEquivalent(N0, LHS, RHS,
CC,
true)) {
9627 LHS.getValueType());
9628 if (!LegalOperations ||
9646 CombineTo(
N, SetCC);
9648 recursivelyDeleteUnusedNodes(N0.
getNode());
9664 AddToWorklist(
V.getNode());
9673 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9678 return DAG.
getNode(NewOpcode,
DL, VT, N00, N01);
9686 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9691 return DAG.
getNode(NewOpcode,
DL, VT, N00, N01);
9714 AddToWorklist(NotX.
getNode());
9719 if (!LegalOperations || hasOperation(
ISD::ABS, VT)) {
9723 SDValue A0 =
A.getOperand(0), A1 =
A.getOperand(1);
9725 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9762 if (
SDValue V = hoistLogicOpWithSameOpcodeHands(
N))
9773 if (
SDValue MM = unfoldMaskedMerge(
N))
9796 unsigned LogicOpcode = LogicOp.
getOpcode();
9802 unsigned ShiftOpcode = Shift->
getOpcode();
9805 assert(C1Node &&
"Expected a shift with constant operand");
9808 const APInt *&ShiftAmtVal) {
9809 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9817 ShiftOp = V.getOperand(0);
9822 if (ShiftAmtVal->getBitWidth() != C1Val.
getBitWidth())
9827 bool Overflow =
false;
9828 APInt NewShiftAmt = C1Val.
uadd_ov(*ShiftAmtVal, Overflow);
9833 if (NewShiftAmt.
uge(V.getScalarValueSizeInBits()))
9842 if (matchFirstShift(LogicOp.
getOperand(0),
X, C0Val))
9844 else if (matchFirstShift(LogicOp.
getOperand(1),
X, C0Val))
9856 return DAG.
getNode(LogicOpcode,
DL, VT, NewShift1, NewShift2,
9886 switch (
LHS.getOpcode()) {
9906 isa<ConstantSDNode>(BinOpLHSVal.
getOperand(1));
9910 if (!IsShiftByConstant && !IsCopyOrSelect)
9913 if (IsCopyOrSelect &&
N->hasOneUse())
9918 EVT VT =
N->getValueType(0);
9920 N->getOpcode(),
DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9923 return DAG.
getNode(
LHS.getOpcode(),
DL, VT, NewShift, NewRHS);
9934 EVT TruncVT =
N->getValueType(0);
9935 if (
N->hasOneUse() &&
N->getOperand(0).hasOneUse() &&
9937 SDValue N01 =
N->getOperand(0).getOperand(1);
9940 SDValue N00 =
N->getOperand(0).getOperand(0);
9943 AddToWorklist(Trunc00.
getNode());
9944 AddToWorklist(Trunc01.
getNode());
9956 EVT VT =
N->getValueType(0);
9971 bool OutOfRange =
false;
9973 OutOfRange |=
C->getAPIntValue().uge(Bitsize);
9981 return DAG.
getNode(
N->getOpcode(), dl, VT, N0, Amt);
9986 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9998 return DAG.
getNode(
N->getOpcode(), dl, VT, N0, NewOp1);
10010 bool SameSide = (
N->getOpcode() == NextOp);
10017 if (Norm1 && Norm2)
10019 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
10021 {CombinedShift, BitsizeC});
10023 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
10025 CombinedShiftNorm);
10049 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
10072 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
10082 if (
SDValue NewOp1 = distributeTruncateThroughAnd(N1.
getNode()))
10093 return (c1 + c2).uge(OpSizeInBits);
10103 return (c1 + c2).ult(OpSizeInBits);
10130 return c2.
uge(OpSizeInBits - InnerBitwidth) &&
10131 (c1 + c2).uge(OpSizeInBits);
10143 return c2.
uge(OpSizeInBits - InnerBitwidth) &&
10144 (c1 + c2).ult(OpSizeInBits);
10168 return c1.
ult(VT.getScalarSizeInBits()) && (c1 == c2);
10176 AddToWorklist(NewSHL.
getNode());
10184 const APInt &LHSC =
LHS->getAPIntValue();
10185 const APInt &RHSC =
RHS->getAPIntValue();
10186 return LHSC.
ult(OpSizeInBits) && RHSC.
ult(OpSizeInBits) &&
10258 AddToWorklist(Shl0.
getNode());
10277 {Add.getOperand(1)})) {
10297 if (
SDValue NewSHL = visitShiftByConstant(
N))
10331 APInt NewStep = C0 << ShlVal;
10346 "SRL or SRA node is required here!");
10355 SDValue ShiftOperand =
N->getOperand(0);
10366 if (!IsSignExt && !IsZeroExt)
10373 auto UserOfLowerBits = [NarrowVTSize](
SDNode *U) {
10378 if (!UShiftAmtSrc) {
10382 return UShiftAmt < NarrowVTSize;
10396 unsigned ActiveBits = IsSignExt
10397 ?
Constant->getAPIntValue().getSignificantBits()
10398 :
Constant->getAPIntValue().getActiveBits();
10399 if (ActiveBits > NarrowVTSize)
10416 "Cannot have a multiply node with two different operand types.");
10427 if (ShiftAmt != NarrowVTSize)
10449 bool IsSigned =
N->getOpcode() ==
ISD::SRA;
10456 unsigned Opcode =
N->getOpcode();
10461 EVT VT =
N->getValueType(0);
10511 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
10514 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
10530 APInt Sum = c1 + c2;
10531 unsigned ShiftSum =
10542 "Expected matchBinaryPredicate to return one element for "
10546 ShiftValue = ShiftValues[0];
10574 if ((ShiftAmt > 0) &&
10584 N->getValueType(0), Trunc);
10619 DAG.
getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10636 if (
SDValue NewOp1 = distributeTruncateThroughAnd(N1.
getNode()))
10653 if (LargeShift->getAPIntValue() == TruncBits) {
10674 if (
SDValue NewSRA = visitShiftByConstant(
N))
10683 if (
SDValue NarrowLoad = reduceLoadWidth(
N))
10686 if (
SDValue AVG = foldShiftToAvg(
N))
10709 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
10712 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
10728 return (c1 + c2).uge(OpSizeInBits);
10738 return (c1 + c2).ult(OpSizeInBits);
10758 if (c1 + OpSizeInBits == InnerShiftSize) {
10759 if (c1 + c2 >= InnerShiftSize)
10769 c1 + c2 < InnerShiftSize) {
10774 OpSizeInBits - c2),
10789 const APInt &LHSC =
LHS->getAPIntValue();
10790 const APInt &RHSC =
RHS->getAPIntValue();
10791 return LHSC.
ult(OpSizeInBits) && RHSC.
ult(OpSizeInBits) &&
10832 AddToWorklist(SmallShift.
getNode());
10860 APInt UnknownBits = ~Known.Zero;
10876 AddToWorklist(
Op.getNode());
10885 if (
SDValue NewOp1 = distributeTruncateThroughAnd(N1.
getNode()))
10895 if (
SDValue NewSRL = visitShiftByConstant(
N))
10899 if (
SDValue NarrowLoad = reduceLoadWidth(
N))
10926 if (
N->hasOneUse()) {
10935 AddToWorklist(
User);
10943 if (
SDValue AVG = foldShiftToAvg(
N))
10950 EVT VT =
N->getValueType(0);
10963 return IsFSHL ? N0 : N1;
10965 auto IsUndefOrZero = [](
SDValue V) {
10976 return DAG.
getNode(
N->getOpcode(),
DL, VT, N0, N1,
10982 return IsFSHL ? N0 : N1;
10988 if (IsUndefOrZero(N0))
10992 if (IsUndefOrZero(N1))
11004 auto *
LHS = dyn_cast<LoadSDNode>(N0);
11005 auto *
RHS = dyn_cast<LoadSDNode>(N1);
11006 if (LHS && RHS &&
LHS->isSimple() &&
RHS->isSimple() &&
11007 LHS->getAddressSpace() ==
RHS->getAddressSpace() &&
11017 RHS->getAddressSpace(), NewAlign,
11018 RHS->getMemOperand()->getFlags(), &
Fast) &&
11022 AddToWorklist(NewPtr.
getNode());
11024 VT,
DL,
RHS->getChain(), NewPtr,
11025 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11026 RHS->getMemOperand()->getFlags(),
RHS->getAAInfo());
11028 WorklistRemover DeadNodes(*
this);
11055 if (N0 == N1 && hasOperation(RotOpc, VT))
11056 return DAG.
getNode(RotOpc,
DL, VT, N0, N2);
11101 EVT SrcVT =
N->getValueType(0);
11104 N =
N->getOperand(0).getNode();
11109 EVT VT =
N->getValueType(0);
11110 SDValue AbsOp1 =
N->getOperand(0);
11138 VT0 = cast<VTSDNode>(Op0.
getOperand(1))->getVT();
11139 VT1 = cast<VTSDNode>(Op1.
getOperand(1))->getVT();
11148 EVT MaxVT = VT0.
bitsGT(VT1) ? VT0 : VT1;
11149 if ((VT0 == MaxVT || Op0->
hasOneUse()) &&
11151 (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
11161 if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
11171 EVT VT =
N->getValueType(0);
11206 EVT VT =
N->getValueType(0);
11229 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.
getOperand(1));
11231 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11232 ShAmt->getZExtValue() >= (BW / 2) &&
11233 (ShAmt->getZExtValue() % 16) == 0 && TLI.
isTypeLegal(HalfVT) &&
11235 (!LegalOperations || hasOperation(
ISD::BSWAP, HalfVT))) {
11237 if (
uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11252 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.
getOperand(1));
11253 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11254 ShAmt->getZExtValue() % 8 == 0) {
11269 EVT VT =
N->getValueType(0);
11297 EVT VT =
N->getValueType(0);
11314 EVT VT =
N->getValueType(0);
11326 EVT VT =
N->getValueType(0);
11343 EVT VT =
N->getValueType(0);
11355 EVT VT =
N->getValueType(0);
11367 const APInt &Amt = AmtC->getAPIntValue();
11368 if (Amt.
ult(NumBits)) {
11402 EVT VT =
LHS.getValueType();
11408 return (Flags.hasNoSignedZeros() ||
Options.NoSignedZerosFPMath) &&
11410 (Flags.hasNoNaNs() ||
11460 const unsigned Opcode =
N->getOpcode();
11466 unsigned FloorISD = 0;
11467 auto VT =
N->getValueType(0);
11468 bool IsUnsigned =
false;
11497 if (IsUnsigned && !
Add->getFlags().hasNoUnsignedWrap())
11500 if (!IsUnsigned && !
Add->getFlags().hasNoSignedWrap())
11503 return DAG.
getNode(FloorISD,
SDLoc(
N),
N->getValueType(0), {A, B});
11510 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11516 True, DAG, LegalOperations, ForCodeSize);
11528 if (LHS == NegTrue) {
11532 RHS, DAG, LegalOperations, ForCodeSize);
11535 if (NegRHS == False) {
11537 False,
CC, TLI, DAG);
11557 EVT VT =
N->getValueType(0);
11559 VT !=
Cond.getOperand(0).getValueType())
11606 EVT VT =
N->getValueType(0);
11607 EVT CondVT =
Cond.getValueType();
11613 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11614 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11618 if (CondVT != MVT::i1 || LegalOperations) {
11633 C1->
isZero() && C2->isOne()) {
11648 assert(CondVT == MVT::i1 && !LegalOperations);
11651 if (C1->
isOne() && C2->isZero())
11659 if (C1->
isZero() && C2->isOne()) {
11666 if (C1->
isZero() && C2->isAllOnes()) {
11679 const APInt &C1Val = C1->getAPIntValue();
11680 const APInt &C2Val = C2->getAPIntValue();
11683 if (C1Val - 1 == C2Val) {
11689 if (C1Val + 1 == C2Val) {
11709 if (C2->isAllOnes()) {
11721template <
class MatchContextClass>
11725 N->getOpcode() == ISD::VP_SELECT) &&
11726 "Expected a (v)(vp.)select");
11728 SDValue T =
N->getOperand(1),
F =
N->getOperand(2);
11729 EVT VT =
N->getValueType(0);
11731 MatchContextClass matcher(DAG, TLI,
N);
11767 EVT VT =
N->getValueType(0);
11831 EVT VT =
LHS.getValueType();
11833 if (LegalOperations && !hasOperation(ABDOpc, VT))
11843 return DAG.
getNode(ABDOpc,
DL, VT, LHS, RHS);
11846 hasOperation(ABDOpc, VT))
11855 return DAG.
getNode(ABDOpc,
DL, VT, LHS, RHS);
11858 hasOperation(ABDOpc, VT))
11872 EVT VT =
N->getValueType(0);
11880 if (
SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(
N,
DL, DAG))
11890 if (
SDValue V = foldSelectOfConstants(
N))
11894 if (SimplifySelectOps(
N, N1, N2))
11897 if (VT0 == MVT::i1) {
11906 bool normalizeToSequence =
11915 if (normalizeToSequence || !InnerSelect.
use_empty())
11917 InnerSelect, N2, Flags);
11920 recursivelyDeleteUnusedNodes(InnerSelect.
getNode());
11927 Cond1, N1, N2, Flags);
11928 if (normalizeToSequence || !InnerSelect.
use_empty())
11930 InnerSelect, Flags);
11933 recursivelyDeleteUnusedNodes(InnerSelect.
getNode());
11943 if (!normalizeToSequence) {
11949 if (
SDValue Combined = visitANDLike(N0, N1_0,
N)) {
11962 if (!normalizeToSequence) {
11968 if (
SDValue Combined = visitORLike(N0, N2_0,
DL))
12004 combineMinNumMaxNum(
DL, VT, Cond0, Cond1, N1, N2,
CC))
12015 auto *
C = dyn_cast<ConstantSDNode>(N2.
getOperand(1));
12016 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
12017 if (
C && NotC &&
C->getAPIntValue() == ~NotC->getAPIntValue()) {
12037 (!LegalOperations &&
12048 if (
SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2,
CC,
DL))
12051 if (
SDValue NewSel = SimplifySelect(
DL, N0, N1, N2))
12056 if (
SDValue BinOp = foldSelectOfBinops(
N))
12072 EVT VT =
N->getValueType(0);
12080 if (
LHS->getNumOperands() != 2 ||
RHS->getNumOperands() != 2)
12089 for (
int i = 0; i < NumElems / 2; ++i) {
12090 if (
Cond->getOperand(i)->isUndef())
12093 if (BottomHalf ==
nullptr)
12094 BottomHalf = cast<ConstantSDNode>(
Cond.getOperand(i));
12095 else if (
Cond->getOperand(i).getNode() != BottomHalf)
12101 for (
int i = NumElems / 2; i < NumElems; ++i) {
12102 if (
Cond->getOperand(i)->isUndef())
12105 if (TopHalf ==
nullptr)
12106 TopHalf = cast<ConstantSDNode>(
Cond.getOperand(i));
12107 else if (
Cond->getOperand(i).getNode() != TopHalf)
12111 assert(TopHalf && BottomHalf &&
12112 "One half of the selector was all UNDEFs and the other was all the "
12113 "same value. This should have been addressed before this function.");
12116 BottomHalf->
isZero() ?
RHS->getOperand(0) :
LHS->getOperand(0),
12117 TopHalf->
isZero() ?
RHS->getOperand(1) :
LHS->getOperand(1));
12130 EVT VT = BasePtr.getValueType();
12134 SplatVal.getValueType() == VT) {
12140 if (Index.getOpcode() !=
ISD::ADD)
12146 Index = Index.getOperand(1);
12152 Index = Index.getOperand(0);
12167 Index = Index.getOperand(0);
12180 Index = Index.getOperand(0);
12265 MST1->isSimple() && MST1->getBasePtr() ==
Ptr &&
12268 MST1->getMemoryVT().getStoreSize()) ||
12272 CombineTo(MST1, MST1->getChain());
12289 if (CombineToPreIndexedLoadStore(
N) || CombineToPostIndexedLoadStore(
N))
12293 Value.getValueType().isInteger() &&
12294 (!isa<ConstantSDNode>(
Value) ||
12295 !cast<ConstantSDNode>(
Value)->isOpaque())) {
12296 APInt TruncDemandedBits =
12322 Value.getOperand(0).getValueType());
12333 auto *SST = cast<VPStridedStoreSDNode>(
N);
12336 if (
auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12337 CStride && CStride->getZExtValue() == EltVT.
getStoreSize()) {
12339 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12340 SST->getVectorLength(), SST->getMemoryVT(),
12341 SST->getMemOperand(), SST->getAddressingMode(),
12342 SST->isTruncatingStore(), SST->isCompressingStore());
12351 SDValue Passthru =
N->getOperand(2);
12354 bool HasPassthru = !Passthru.
isUndef();
12367 unsigned NumSelected = 0;
12369 for (
unsigned I = 0;
I < NumElmts; ++
I) {
12382 for (
unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
12437 return CombineTo(
N, PassThru, MGT->
getChain());
12473 return CombineTo(
N, NewLd, NewLd.
getValue(1));
12477 if (CombineToPreIndexedLoadStore(
N) || CombineToPostIndexedLoadStore(
N))
12505 EVT DataVT =
Index.getValueType();
12513 auto *SLD = cast<VPStridedLoadSDNode>(
N);
12516 if (
auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
12517 CStride && CStride->getZExtValue() == EltVT.
getStoreSize()) {
12519 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
12520 SDLoc(
N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
12521 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12522 SLD->getMemOperand(), SLD->isExpandingLoad());
12523 return CombineTo(
N, NewLd, NewLd.
getValue(1));
12534 EVT VT =
N->getValueType(0);
12535 if (!
Cond.hasOneUse() ||
Cond.getScalarValueSizeInBits() != 1 ||
12544 bool AllAddOne =
true;
12545 bool AllSubOne =
true;
12547 for (
unsigned i = 0; i != Elts; ++i) {
12569 if (AllAddOne || AllSubOne) {
12605 if (
SDValue V = foldBoolSelectToLogic<VPMatchContext>(
N,
DL, DAG))
12615 EVT VT =
N->getValueType(0);
12621 if (
SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(
N,
DL, DAG))
12647 bool isAbs =
false;
12666 AddToWorklist(Shift.
getNode());
12667 AddToWorklist(
Add.getNode());
12679 if (
SDValue FMinMax = combineMinNumMaxNum(
DL, VT, LHS, RHS, N1, N2,
CC))
12694 EVT NarrowVT =
LHS.getValueType();
12702 SetCCWidth != 1 && SetCCWidth < WideWidth &&
12718 if (
SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2,
CC,
DL))
12748 (OpLHS == CondLHS || OpRHS == CondLHS))
12751 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12754 CondLHS == OpLHS) {
12759 return Cond->getAPIntValue() ==
~Op->getAPIntValue();
12800 if (OpLHS == LHS) {
12816 return (!
Op && !
Cond) ||
12818 Cond->getAPIntValue() == (-
Op->getAPIntValue() - 1));
12849 if (SimplifySelectOps(
N, N1, N2))
12869 if (
SDValue V = foldVSelectOfConstants(
N))
12903 AddToWorklist(
SCC.getNode());
12907 if (
auto *SCCC = dyn_cast<ConstantSDNode>(
SCC.getNode()))
12908 return SCCC->isZero() ? N3 : N2;
12912 if (
SCC->isUndef())
12919 SCC.getOperand(1), N2, N3,
SCC.getOperand(2));
12926 if (SimplifySelectOps(
N, N2, N3))
12930 return SimplifySelectCC(
DL, N0, N1, N2, N3,
CC);
12938 N->hasOneUse() &&
N->user_begin()->getOpcode() ==
ISD::BRCOND;
12941 EVT VT =
N->getValueType(0);
12942 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
12948 if (PreferSetCC && Combined.getOpcode() !=
ISD::SETCC) {
12949 SDValue NewSetCC = rebuildSetCC(Combined);
12977 A.getOperand(0) ==
B.getOperand(0);
12981 B.getOperand(0) ==
A;
12984 bool IsRotate =
false;
12987 if (IsAndWithShift(N0, N1)) {
12989 ShiftOrRotate = N1;
12990 }
else if (IsAndWithShift(N1, N0)) {
12992 ShiftOrRotate = N0;
12993 }
else if (IsRotateWithOp(N0, N1)) {
12996 ShiftOrRotate = N1;
12997 }
else if (IsRotateWithOp(N1, N0)) {
13000 ShiftOrRotate = N0;
13003 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
13008 auto GetAPIntValue = [](
SDValue Op) -> std::optional<APInt> {
13011 if (CNode ==
nullptr)
13012 return std::nullopt;
13015 std::optional<APInt> AndCMask =
13016 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.
getOperand(1));
13017 std::optional<APInt> ShiftCAmt =
13018 GetAPIntValue(ShiftOrRotate.getOperand(1));
13022 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13023 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
13025 bool CanTransform = IsRotate;
13026 if (!CanTransform) {
13028 CanTransform = *ShiftCAmt == (~*AndCMask).
popcount();
13030 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13038 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13040 if (CanTransform && NewShiftOpc != ShiftOpc) {
13042 DAG.
getNode(NewShiftOpc,
DL, OpVT, ShiftOrRotate.getOperand(0),
13043 ShiftOrRotate.getOperand(1));
13050 NumBits - ShiftCAmt->getZExtValue())
13051 :
APInt::getLowBitsSet(NumBits,
13052 NumBits - ShiftCAmt->getZExtValue());
13060 return DAG.
getSetCC(
DL, VT, NewAndOrOp, NewShiftOrRotate,
Cond);
13089 if (!
N.hasOneUse())
13092 if (!isa<LoadSDNode>(
N))
13118 unsigned Opcode =
N->getOpcode();
13120 EVT VT =
N->getValueType(0);
13123 "Expected EXTEND dag node in input!");
13164 unsigned Opcode =
N->getOpcode();
13166 EVT VT =
N->getValueType(0);
13169 "Expected EXTEND dag node in input!");
13174 if (isa<ConstantSDNode>(N0))
13175 return DAG.
getNode(Opcode,
DL, VT, N0);
13183 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
13194 unsigned FoldOpc = Opcode;
13217 for (
unsigned i = 0; i != NumElts; ++i) {
13219 if (
Op.isUndef()) {
13230 APInt C =
Op->getAsAPIntVal().zextOrTrunc(EVTBits);
13248 bool HasCopyToRegUses =
false;
13263 for (
unsigned i = 0; i != 2; ++i) {
13267 if (!isa<ConstantSDNode>(UseOp))
13281 HasCopyToRegUses =
true;
13284 if (HasCopyToRegUses) {
13285 bool BothLiveOut =
false;
13288 BothLiveOut =
true;
13295 return !ExtendNodes.
empty();
13305 for (
SDNode *SetCC : SetCCs) {
13308 for (
unsigned j = 0;
j != 2; ++
j) {
13310 if (SOp == OrigLoad)
13324 EVT DstVT =
N->getValueType(0);
13329 "Unexpected node type (not an extend)!");
13367 EVT SplitSrcVT = SrcVT;
13368 EVT SplitDstVT = DstVT;
13381 const unsigned NumSplits =
13388 for (
unsigned Idx = 0;
Idx < NumSplits;
Idx++) {
13407 AddToWorklist(NewChain.
getNode());
13409 CombineTo(
N, NewValue);
13415 ExtendSetCCUses(SetCCs, N0, NewValue, (
ISD::NodeType)
N->getOpcode());
13416 CombineTo(N0.
getNode(), Trunc, NewChain);
13424 EVT VT =
N->getValueType(0);
13425 EVT OrigVT =
N->getOperand(0).getValueType();
13447 EVT MemVT =
Load->getMemoryVT();
13468 Load->getChain(),
Load->getBasePtr(),
13469 Load->getMemoryVT(),
Load->getMemOperand());
13482 if (
SDValue(Load, 0).hasOneUse()) {
13486 Load->getValueType(0), ExtLoad);
13487 CombineTo(Load, Trunc, ExtLoad.
getValue(1));
13491 recursivelyDeleteUnusedNodes(N0.
getNode());
13500SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(
SDNode *Cast) {
13501 unsigned CastOpcode = Cast->
getOpcode();
13505 "Unexpected opcode for vector select narrowing/widening");
13545 bool LegalOperations,
SDNode *
N,
13556 if ((LegalOperations || !LN0->
isSimple() ||
13567 Combiner.recursivelyDeleteUnusedNodes(LN0);
13580 bool NonNegZExt =
false) {
13587 "Unexpected load type or opcode");
13604 !cast<LoadSDNode>(N0)->isSimple()) &&
13608 bool DoXform =
true;
13621 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13625 if (NoReplaceTrunc) {
13627 Combiner.recursivelyDeleteUnusedNodes(LN0);
13647 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->
isSimple()) &&
13669 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
13672 EVT MemoryVT = ALoad->getMemoryVT();
13681 EVT OrigVT = ALoad->getValueType(0);
13683 auto *NewALoad = cast<AtomicSDNode>(DAG.
getAtomic(
13685 ALoad->getBasePtr(), ALoad->getMemOperand()));
13686 NewALoad->setExtensionType(ExtLoadType);
13696 bool LegalOperations) {
13708 EVT VT =
N->getValueType(0);
13709 EVT XVT =
X.getValueType();
13725 return DAG.
getNode(ShiftOpcode,
DL, VT, NotX, ShiftAmount);
13739 EVT VT =
N->getValueType(0);
13749 if (VT.
isVector() && !LegalOperations &&
13768 if (SVT == MatchingVecType) {
13784 auto IsFreeToExtend = [&](
SDValue V) {
13792 cast<LoadSDNode>(V)->isSimple() &&
13806 if (
User->getOpcode() != ExtOpcode ||
User->getValueType(0) != VT)
13812 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13832 SDValue ExtTrueVal = (SetCCWidth == 1)
13836 if (
SDValue SCC = SimplifySelectCC(
DL, N00, N01, ExtTrueVal, Zero,
CC,
true))
13848 return DAG.
getSelect(
DL, VT, SetCC, ExtTrueVal, Zero);
13857 EVT VT =
N->getValueType(0);
13861 if (
SDValue FoldedVOp = SimplifyVCastOp(
N,
DL))
13899 if (NarrowLoad.getNode() != N0.
getNode()) {
13900 CombineTo(N0.
getNode(), NarrowLoad);
13902 AddToWorklist(oye);
13910 unsigned OpBits =
Op.getScalarValueSizeInBits();
13916 if (OpBits == DestBits) {
13922 if (OpBits < DestBits) {
13931 Flags.setNoSignedWrap(
true);
13939 if (OpBits < DestBits)
13941 else if (OpBits > DestBits)
13961 if (
SDValue ExtLoad = CombineExtLoad(
N))
13996 bool NoReplaceTruncAnd = !N0.
hasOneUse();
14000 if (NoReplaceTruncAnd) {
14003 CombineTo(N0.
getNode(), TruncAnd);
14005 if (NoReplaceTrunc) {
14010 CombineTo(LN00, Trunc, ExtLoad.
getValue(1));
14029 if (
SDValue NewVSel = matchVSelectOpSizesWithSetCC(
N))
14060 if (NewXor.getNode() == N0.
getNode()) {
14086 "Expected extend op");
14132 EVT VT =
N->getValueType(0);
14136 if (
SDValue FoldedVOp = SimplifyVCastOp(
N,
DL))
14167 APInt TruncatedBits =
14169 APInt(
Op.getScalarValueSizeInBits(), 0) :
14172 std::min(
Op.getScalarValueSizeInBits(),
14178 return ZExtOrTrunc;
14188 if (NarrowLoad.getNode() != N0.
getNode()) {
14189 CombineTo(N0.
getNode(), NarrowLoad);
14191 AddToWorklist(oye);
14199 if (
N->getFlags().hasNonNeg()) {
14207 if (OpBits == DestBits) {
14213 if (OpBits < DestBits) {
14223 Flags.setNoSignedWrap(
true);
14224 Flags.setNoUnsignedWrap(
true);
14236 AddToWorklist(
Op.getNode());
14240 return ZExtOrTrunc;
14246 AddToWorklist(
Op.getNode());
14282 if (
SDValue ExtLoad = CombineExtLoad(
N))
14302 bool DoXform =
true;
14306 auto *AndC = cast<ConstantSDNode>(N0.
getOperand(1));
14309 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
14325 bool NoReplaceTruncAnd = !N0.
hasOneUse();
14329 if (NoReplaceTruncAnd) {
14332 CombineTo(N0.
getNode(), TruncAnd);
14334 if (NoReplaceTrunc) {
14339 CombineTo(LN00, Trunc, ExtLoad.
getValue(1));
14348 if (
SDValue ZExtLoad = CombineZExtLogicopShiftLoad(
N))
14364 if (!LegalOperations && VT.
isVector() &&
14396 if (
SDValue SCC = SimplifySelectCC(
14400 cast<CondCodeSDNode>(N0.
getOperand(2))->get(),
true))
14409 if (
auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
14416 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
14437 if (
SDValue NewVSel = matchVSelectOpSizesWithSetCC(
N))
14461 EVT VT =
N->getValueType(0);
14495 if (NarrowLoad.getNode() != N0.
getNode()) {
14496 CombineTo(N0.
getNode(), NarrowLoad);
14498 AddToWorklist(oye);
14516 assert(isa<ConstantSDNode>(
Y) &&
"Expected constant to be folded!");
14532 bool DoXform =
true;
14545 CombineTo(
N, ExtLoad);
14546 if (NoReplaceTrunc) {
14548 recursivelyDeleteUnusedNodes(LN0);
14552 CombineTo(LN0, Trunc, ExtLoad.
getValue(1));
14566 if (!LegalOperations || TLI.
isLoadExtLegal(ExtType, VT, MemVT)) {
14570 CombineTo(
N, ExtLoad);
14572 recursivelyDeleteUnusedNodes(LN0);
14586 if (VT.
isVector() && !LegalOperations) {
14598 cast<CondCodeSDNode>(N0.
getOperand(2))->get());
14606 cast<CondCodeSDNode>(N0.
getOperand(2))->get());
14611 if (
SDValue SCC = SimplifySelectCC(
14614 cast<CondCodeSDNode>(N0.
getOperand(2))->get(),
true))
14628 unsigned Opcode =
N->getOpcode();
14631 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14635 AssertVT == cast<VTSDNode>(N0.
getOperand(1))->getVT())
14647 EVT BigA_AssertVT = cast<VTSDNode>(BigA.
getOperand(1))->getVT();
14648 EVT MinAssertVT = AssertVT.
bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14662 EVT BigA_AssertVT = cast<VTSDNode>(BigA.
getOperand(1))->getVT();
14663 if (AssertVT.
bitsLT(BigA_AssertVT)) {
14677 Align AL = cast<AssertAlignSDNode>(
N)->getAlign();
14682 if (
auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14684 std::max(AL, AAN->getAlign()));
14694 unsigned AlignShift =
Log2(AL);
14699 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14700 if (LHSAlignShift < AlignShift)
14702 if (RHSAlignShift < AlignShift)
14717 unsigned Opc =
N->getOpcode();
14721 EVT VT =
N->getValueType(0);
14731 unsigned ShAmt = 0;
14736 unsigned ShiftedOffset = 0;
14741 ExtVT = cast<VTSDNode>(
N->getOperand(1))->getVT();
14749 auto *LN = dyn_cast<LoadSDNode>(N0);
14750 auto *N1C = dyn_cast<ConstantSDNode>(
N->getOperand(1));
14756 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14757 if (MemoryWidth <= ShAmt)
14768 LN->getExtensionType() != ExtType)
14772 auto AndC = dyn_cast<ConstantSDNode>(
N->getOperand(1));
14777 unsigned ActiveBits = 0;
14778 if (
Mask.isMask()) {
14779 ActiveBits =
Mask.countr_one();
14780 }
else if (
Mask.isShiftedMask(ShAmt, ActiveBits)) {
14781 ShiftedOffset = ShAmt;
14802 if (!
SRL.hasOneUse())
14807 auto *LN = dyn_cast<LoadSDNode>(
SRL.getOperand(0));
14808 auto *SRL1C = dyn_cast<ConstantSDNode>(
SRL.getOperand(1));
14815 ShAmt = SRL1C->getZExtValue();
14816 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14817 if (ShAmt >= MemoryWidth)
14844 isa<ConstantSDNode>(
Mask->getOperand(1))) {
14845 unsigned Offset, ActiveBits;
14846 const APInt& ShiftMask =
Mask->getConstantOperandAPInt(1);
14847 if (ShiftMask.
isMask()) {
14869 N0 =
SRL.getOperand(0);
14877 unsigned ShLeftAmt = 0;
14881 ShLeftAmt = N01->getZExtValue();
14887 if (!isa<LoadSDNode>(N0))
14894 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14897 auto AdjustBigEndianShift = [&](
unsigned ShAmt) {
14898 unsigned LVTStoreBits =
14901 return LVTStoreBits - EVTStoreBits - ShAmt;
14906 unsigned PtrAdjustmentInBits =
14909 uint64_t PtrOff = PtrAdjustmentInBits / 8;
14915 AddToWorklist(NewPtr.
getNode());
14930 WorklistRemover DeadNodes(*
this);
14935 if (ShLeftAmt != 0) {
14947 if (ShiftedOffset != 0) {
14964 EVT VT =
N->getValueType(0);
14965 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14995 if ((N00Bits <= ExtVTBits ||
15011 if ((N00Bits == ExtVTBits ||
15012 (!IsZext && (N00Bits < ExtVTBits ||
15014 (!LegalOperations ||
15039 if (
SDValue NarrowLoad = reduceLoadWidth(
N))
15046 if (
auto *ShAmt = dyn_cast<ConstantSDNode>(N0.
getOperand(1)))
15047 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
15051 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
15062 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15063 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
15066 auto *LN0 = cast<LoadSDNode>(N0);
15070 CombineTo(
N, ExtLoad);
15072 AddToWorklist(ExtLoad.
getNode());
15078 N0.
hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15079 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
15081 auto *LN0 = cast<LoadSDNode>(N0);
15085 CombineTo(
N, ExtLoad);
15093 if (ExtVT == Ld->getMemoryVT() && N0.
hasOneUse() &&
15097 VT,
DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
15098 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
15099 Ld->getAddressingMode(),
ISD::SEXTLOAD, Ld->isExpandingLoad());
15100 CombineTo(
N, ExtMaskedLoad);
15107 if (
auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
15108 if (
SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
15110 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
15111 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
15114 DAG.
getVTList(VT, MVT::Other), ExtVT,
DL, Ops, GN0->getMemOperand(),
15117 CombineTo(
N, ExtLoad);
15119 AddToWorklist(ExtLoad.
getNode());
15142 (!LegalOperations ||
15156 bool LegalOperations) {
15157 unsigned InregOpcode =
N->getOpcode();
15161 EVT VT =
N->getValueType(0);
15163 Src.getValueType().getVectorElementType(),
15167 "Expected EXTEND_VECTOR_INREG dag node in input!");
15176 Src = Src.getOperand(0);
15177 if (Src.getValueType() != SrcVT)
15183 return DAG.
getNode(Opcode,
DL, VT, Src);
15188 EVT VT =
N->getValueType(0);
15213 EVT VT =
N->getValueType(0);
15234 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15236 assert(NumSrcBits > NumDstBits &&
"Unexpected types for truncate operation");
15257 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15259 assert(NumSrcBits > NumDstBits &&
"Unexpected types for truncate operation");
15280 unsigned NumSrcBits = In.getScalarValueSizeInBits();
15282 assert(NumSrcBits > NumDstBits &&
"Unexpected types for truncate operation");
15305 auto AllowedTruncateSat = [&](
unsigned Opc,
EVT SrcVT,
EVT VT) ->
bool {
15317 }
else if (Src.getOpcode() ==
ISD::UMIN) {
15331 EVT VT =
N->getValueType(0);
15346 return SaturatedTR;
15373 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
15394 if (LegalTypes && !LegalOperations && VT.
isScalarInteger() && VT != MVT::i1 &&
15396 EVT TrTy =
N->getValueType(0);
15401 if (Src.getOpcode() ==
ISD::SRL && Src.getOperand(0)->hasOneUse()) {
15404 Src = Src.getOperand(0);
15411 EVT VecTy = Src.getOperand(0).getValueType();
15412 EVT ExTy = Src.getValueType();
15416 auto NewEltCnt = EltCnt * SizeRatio;
15421 SDValue EltNo = Src->getOperand(1);
15422 if (isa<ConstantSDNode>(EltNo) &&
isTypeLegal(NVT)) {
15424 int Index = isLE ? (Elt * SizeRatio + EltOffset)
15425 : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
15436 if (!LegalOperations ||
15459 AddToWorklist(Amt.
getNode());
15508 if (BuildVectEltTy == TruncVecEltTy) {
15512 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
15513 unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
15515 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
15516 "Invalid number of elements");
15519 for (
unsigned i = FirstElt, e = BuildVecNumElts; i <
e;
15520 i += TruncEltOffset)
15530 if (
SDValue Reduced = reduceLoadWidth(
N))
15536 auto *LN0 = cast<LoadSDNode>(N0);
15553 unsigned NumDefs = 0;
15557 if (!
X.isUndef()) {
15568 X.getValueType().getVectorElementCount()));
15574 if (NumDefs == 1) {
15575 assert(
V.getNode() &&
"The single defined operand is empty!");
15577 for (
unsigned i = 0, e = VTs.
size(); i != e; ++i) {
15583 AddToWorklist(
NV.getNode());
15598 (!LegalOperations ||
15626 if (
SDValue NewVSel = matchVSelectOpSizesWithSetCC(
N))
15640 if (!LegalOperations && N0.
hasOneUse() &&
15673 if (!LegalOperations && N0.
hasOneUse() &&
15709 !LD1->hasOneUse() || !LD2->hasOneUse() ||
15710 LD1->getAddressSpace() != LD2->getAddressSpace())
15713 unsigned LD1Fast = 0;
15714 EVT LD1VT = LD1->getValueType(0);
15719 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15720 return DAG.
getLoad(VT,
SDLoc(
N), LD1->getChain(), LD1->getBasePtr(),
15721 LD1->getPointerInfo(), LD1->getAlign());
15736 EVT VT =
N->getValueType(0);
15773 auto IsBitCastOrFree = [&TLI, FPOpcode](
SDValue Op,
EVT VT) {
15787 IsBitCastOrFree(LogicOp0, VT)) {
15790 NumFPLogicOpsConv++;
15801 EVT VT =
N->getValueType(0);
15817 cast<BuildVectorSDNode>(N0)->isConstant())
15818 return ConstantFoldBITCASTofBUILD_VECTOR(N0.
getNode(),
15826 if (!LegalOperations ||
15832 if (
C.getNode() !=
N)
15845 auto IsFreeBitcast = [VT](
SDValue V) {
15847 V.getOperand(0).getValueType() == VT) ||
15868 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15882 if (
SDValue V = foldBitcastedFPLogic(
N, DAG, TLI))
15902 AddToWorklist(NewConv.
getNode());
15905 if (N0.
getValueType() == MVT::ppcf128 && !LegalTypes) {
15912 AddToWorklist(FlipBit.
getNode());
15919 AddToWorklist(
Hi.getNode());
15921 AddToWorklist(FlipBit.
getNode());
15925 AddToWorklist(FlipBits.
getNode());
15955 AddToWorklist(
X.getNode());
15959 if (OrigXWidth < VTWidth) {
15961 AddToWorklist(
X.getNode());
15962 }
else if (OrigXWidth > VTWidth) {
15967 X.getValueType(),
X,
15969 X.getValueType()));
15970 AddToWorklist(
X.getNode());
15972 AddToWorklist(
X.getNode());
15975 if (N0.
getValueType() == MVT::ppcf128 && !LegalTypes) {
15978 AddToWorklist(Cst.
getNode());
15980 AddToWorklist(
X.getNode());
15982 AddToWorklist(XorResult.
getNode());
15986 SDLoc(XorResult)));
15987 AddToWorklist(XorResult64.
getNode());
15991 AddToWorklist(FlipBit.
getNode());
15994 AddToWorklist(FlipBits.
getNode());
16000 AddToWorklist(
X.getNode());
16005 AddToWorklist(Cst.
getNode());
16013 if (
SDValue CombineLD = CombineConsecutiveLoads(N0.
getNode(), VT))
16036 auto PeekThroughBitcast = [&](
SDValue Op) {
16038 Op.getOperand(0).getValueType() == VT)
16057 for (
int i = 0; i != MaskScale; ++i)
16058 NewMask.
push_back(M < 0 ? -1 : M * MaskScale + i);
16063 return LegalShuffle;
16070 EVT VT =
N->getValueType(0);
16071 return CombineConsecutiveLoads(
N, VT);
16098 bool AllowMultipleMaybePoisonOperands =
16132 bool HadMaybePoisonOperands = !MaybePoisonOperands.
empty();
16133 bool IsNewMaybePoisonOperand = MaybePoisonOperands.
insert(
Op).second;
16134 if (IsNewMaybePoisonOperand)
16135 MaybePoisonOperandNumbers.
push_back(OpNo);
16136 if (!HadMaybePoisonOperands)
16138 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
16147 for (
unsigned OpNo : MaybePoisonOperandNumbers) {
16158 SDValue MaybePoisonOperand =
N->getOperand(0).getOperand(OpNo);
16167 FrozenMaybePoisonOperand.
getOperand(0) == FrozenMaybePoisonOperand) {
16171 MaybePoisonOperand);
16181 N0 =
N->getOperand(0);
16193 if (
auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0)) {
16202 "Can't create node that may be undef/poison!");
16209ConstantFoldBITCASTofBUILD_VECTOR(
SDNode *BV,
EVT DstEltVT) {
16213 if (SrcEltVT == DstEltVT)
return SDValue(BV, 0);
16220 if (SrcBitSize == DstBitSize) {
16225 if (
Op.getValueType() != SrcEltVT)
16228 AddToWorklist(Ops.
back().getNode());
16242 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
16250 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
16253 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
16261 auto *BVN = cast<BuildVectorSDNode>(BV);
16267 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
16272 for (
unsigned I = 0, E = RawBits.
size();
I != E; ++
I) {
16273 if (UndefElements[
I])
16289 N->getFlags().hasAllowContract();
16294 return Options.NoInfsFPMath ||
N->getFlags().hasNoInfs();
16298template <
class MatchContextClass>
16302 EVT VT =
N->getValueType(0);
16304 MatchContextClass matcher(DAG, TLI,
N);
16307 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16312 bool HasFMAD = !UseVP && (LegalOperations && TLI.
isFMADLegal(DAG,
N));
16316 (!LegalOperations || matcher.isOperationLegalOrCustom(
ISD::FMA, VT)) &&
16320 if (!HasFMAD && !HasFMA)
16324 Options.UnsafeFPMath || HasFMAD);
16326 if (!AllowFusionGlobally && !
N->getFlags().hasAllowContract())
16352 return AllowFusionGlobally ||
N->getFlags().hasAllowContract();
16363 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.
getOperand(0),
16370 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.
getOperand(0),
16382 bool CanReassociate =
16383 Options.UnsafeFPMath ||
N->getFlags().hasAllowReassociation();
16384 if (CanReassociate) {
16389 }
else if (isFusedOp(N1) && N1.
hasOneUse()) {
16395 while (E && isFusedOp(TmpFMA) && TmpFMA.
hasOneUse()) {
16420 PreferredFusedOpcode, SL, VT,
16434 PreferredFusedOpcode, SL, VT,
16446 return matcher.getNode(
16447 PreferredFusedOpcode, SL, VT,
X,
Y,
16448 matcher.getNode(PreferredFusedOpcode, SL, VT,
16452 if (isFusedOp(N0)) {
16473 return matcher.getNode(
16474 PreferredFusedOpcode, SL, VT,
16477 matcher.getNode(PreferredFusedOpcode, SL, VT,
16483 if (isFusedOp(N00)) {
16497 if (isFusedOp(N1)) {
16518 if (isFusedOp(N10)) {
16535template <
class MatchContextClass>
16539 EVT VT =
N->getValueType(0);
16541 MatchContextClass matcher(DAG, TLI,
N);
16544 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16549 bool HasFMAD = !UseVP && (LegalOperations && TLI.
isFMADLegal(DAG,
N));
16553 (!LegalOperations || matcher.isOperationLegalOrCustom(
ISD::FMA, VT)) &&
16557 if (!HasFMAD && !HasFMA)
16562 Options.UnsafeFPMath || HasFMAD);
16565 if (!AllowFusionGlobally && !
N->getFlags().hasAllowContract())
16574 bool NoSignedZero =
Options.NoSignedZerosFPMath ||
Flags.hasNoSignedZeros();
16581 return AllowFusionGlobally ||
N->getFlags().hasAllowContract();
16587 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.
getOperand(0),
16598 return matcher.getNode(
16599 PreferredFusedOpcode, SL, VT,
16600 matcher.getNode(
ISD::FNEG, SL, VT, YZ.getOperand(0)),
16601 YZ.getOperand(1),
X);
16611 if (
SDValue V = tryToFoldXSubYZ(N0, N1))
16614 if (
SDValue V = tryToFoldXYSubZ(N0, N1))
16618 if (
SDValue V = tryToFoldXYSubZ(N0, N1))
16621 if (
SDValue V = tryToFoldXSubYZ(N0, N1))
16630 return matcher.
getNode(PreferredFusedOpcode, SL, VT,
16631 matcher.getNode(
ISD::FNEG, SL, VT, N00), N01,
16632 matcher.getNode(
ISD::FNEG, SL, VT, N1));
16645 PreferredFusedOpcode, SL, VT,
16648 matcher.getNode(
ISD::FNEG, SL, VT, N1));
16661 PreferredFusedOpcode, SL, VT,
16685 PreferredFusedOpcode, SL, VT,
16709 PreferredFusedOpcode, SL, VT,
16718 return Options.UnsafeFPMath ||
N->getFlags().hasAllowReassociation();
16732 bool CanFuse =
Options.UnsafeFPMath ||
N->getFlags().hasAllowContract();
16735 if (CanFuse && isFusedOp(N0) &&
16736 isContractableAndReassociableFMUL(N0.
getOperand(2)) &&
16738 return matcher.getNode(
16740 matcher.
getNode(PreferredFusedOpcode, SL, VT,
16748 if (CanFuse && isFusedOp(N1) &&
16749 isContractableAndReassociableFMUL(N1.
getOperand(2)) &&
16754 PreferredFusedOpcode, SL, VT,
16757 matcher.
getNode(PreferredFusedOpcode, SL, VT,
16758 matcher.getNode(
ISD::FNEG, SL, VT, N20), N21, N0));
16763 if (isFusedOp(N0) && N0->
hasOneUse()) {
16767 if (isContractableAndReassociableFMUL(N020) &&
16773 PreferredFusedOpcode, SL, VT,
16776 matcher.getNode(
ISD::FNEG, SL, VT, N1)));
16789 if (isFusedOp(N00)) {
16791 if (isContractableAndReassociableFMUL(N002) &&
16795 PreferredFusedOpcode, SL, VT,
16799 PreferredFusedOpcode, SL, VT,
16802 matcher.getNode(
ISD::FNEG, SL, VT, N1)));
16812 if (isContractableAndReassociableFMUL(N120) &&
16818 PreferredFusedOpcode, SL, VT,
16822 PreferredFusedOpcode, SL, VT,
16840 if (isContractableAndReassociableFMUL(N102) &&
16846 PreferredFusedOpcode, SL, VT,
16851 PreferredFusedOpcode, SL, VT,
16865SDValue DAGCombiner::visitFMULForFMADistributiveCombine(
SDNode *
N) {
16868 EVT VT =
N->getValueType(0);
16889 bool HasFMAD =
Options.UnsafeFPMath &&
16893 if (!HasFMAD && !HasFMA)
16905 if (
C->isExactlyValue(+1.0))
16906 return DAG.
getNode(PreferredFusedOpcode, SL, VT,
X.getOperand(0),
Y,
16908 if (
C->isExactlyValue(-1.0))
16909 return DAG.
getNode(PreferredFusedOpcode, SL, VT,
X.getOperand(0),
Y,
16916 if (
SDValue FMA = FuseFADD(N0, N1))
16918 if (
SDValue FMA = FuseFADD(N1, N0))
16928 if (C0->isExactlyValue(+1.0))
16929 return DAG.
getNode(PreferredFusedOpcode, SL, VT,
16932 if (C0->isExactlyValue(-1.0))
16933 return DAG.
getNode(PreferredFusedOpcode, SL, VT,
16938 if (C1->isExactlyValue(+1.0))
16939 return DAG.
getNode(PreferredFusedOpcode, SL, VT,
X.getOperand(0),
Y,
16941 if (C1->isExactlyValue(-1.0))
16942 return DAG.
getNode(PreferredFusedOpcode, SL, VT,
X.getOperand(0),
Y,
16949 if (
SDValue FMA = FuseFSUB(N0, N1))
16951 if (
SDValue FMA = FuseFSUB(N1, N0))
16961 if (
SDValue Fused = visitFADDForFMACombine<VPMatchContext>(
N)) {
16963 AddToWorklist(Fused.getNode());
16974 EVT VT =
N->getValueType(0);
16988 if (N0CFP && !N1CFP)
16993 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
16998 if (N1C && N1C->
isZero())
17002 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
17008 N1, DAG, LegalOperations, ForCodeSize))
17014 N0, DAG, LegalOperations, ForCodeSize))
17021 return C &&
C->isExactlyValue(-2.0);
17025 if (isFMulNegTwo(N0)) {
17031 if (isFMulNegTwo(N1)) {
17042 if ((
Options.NoNaNsFPMath ||
Flags.hasNoNaNs()) && AllowNewConst) {
17056 (
Flags.hasAllowReassociation() &&
Flags.hasNoSignedZeros())) &&
17074 if (CFP01 && !CFP00 && N0.
getOperand(0) == N1) {
17095 if (CFP11 && !CFP10 && N1.
getOperand(0) == N0) {
17143 VT, N0, N1, Flags))
17148 if (
SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(
N)) {
17150 AddToWorklist(Fused.getNode());
17160 EVT VT =
N->getValueType(0);
17161 EVT ChainVT =
N->getValueType(1);
17168 N1, DAG, LegalOperations, ForCodeSize)) {
17170 {Chain, N0, NegN1});
17176 N0, DAG, LegalOperations, ForCodeSize)) {
17178 {Chain, N1, NegN0});
17188 EVT VT =
N->getValueType(0);
17203 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
17206 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
17210 if (N1CFP && N1CFP->
isZero()) {
17212 Flags.hasNoSignedZeros()) {
17224 if (N0CFP && N0CFP->
isZero()) {
17226 (
Options.NoSignedZerosFPMath ||
Flags.hasNoSignedZeros())) {
17243 (
Flags.hasAllowReassociation() &&
Flags.hasNoSignedZeros())) &&
17259 if (
SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(
N)) {
17260 AddToWorklist(Fused.getNode());
17283 EVT VT =
N->getValueType(0);
17286 std::optional<int> Mantissa;
17287 auto GetConstAndPow2Ops = [&](
unsigned ConstOpIdx) {
17288 if (ConstOpIdx == 1 &&
N->getOpcode() ==
ISD::FDIV)
17292 Pow2Op =
N->getOperand(1 - ConstOpIdx);
17305 if (CFP ==
nullptr)
17308 const APFloat &APF = CFP->getValueAPF();
17316 int CurExp =
ilogb(APF);
17319 N->getOpcode() ==
ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
17322 N->getOpcode() ==
ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
17330 Mantissa = ThisMantissa;
17332 return *Mantissa == ThisMantissa && ThisMantissa > 0;
17339 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
17368 NewIntVT, DAG.
getBitcast(NewIntVT, ConstOp), Shift);
17377 EVT VT =
N->getValueType(0);
17397 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
17400 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
17403 if (
Options.UnsafeFPMath ||
Flags.hasAllowReassociation()) {
17429 VT, N0, N1, Flags))
17463 if (
Flags.hasNoNaNs() &&
Flags.hasNoSignedZeros() &&
17471 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(
Select.getOperand(1));
17472 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(
Select.getOperand(2));
17474 if (TrueOpnd && FalseOpnd &&
17476 isa<ConstantFPSDNode>(
Cond.getOperand(1)) &&
17477 cast<ConstantFPSDNode>(
Cond.getOperand(1))->isExactlyValue(0.0)) {
17495 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
17499 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
17508 if (
SDValue Fused = visitFMULForFMADistributiveCombine(
N)) {
17509 AddToWorklist(Fused.getNode());
17515 if (
SDValue R = combineFMulOrFDivWithIntPow2(
N))
17521template <
class MatchContextClass>
SDValue DAGCombiner::visitFMA(
SDNode *
N) {
17527 EVT VT =
N->getValueType(0);
17532 MatchContextClass matcher(DAG, TLI,
N);
17557 if (N0CFP && N0CFP->
isZero())
17559 if (N1CFP && N1CFP->
isZero())
17567 return matcher.getNode(
ISD::FADD,
DL, VT, N0, N2);
17572 return matcher.getNode(
ISD::FMA,
DL, VT, N1, N0, N2);
17574 bool CanReassociate =
17575 Options.UnsafeFPMath ||
N->getFlags().hasAllowReassociation();
17576 if (CanReassociate) {
17581 return matcher.getNode(
17590 return matcher.getNode(
17600 return matcher.getNode(
ISD::FADD,
DL, VT, N0, N2);
17605 AddToWorklist(RHSNeg.
getNode());
17606 return matcher.getNode(
ISD::FADD,
DL, VT, N2, RHSNeg);
17620 if (CanReassociate) {
17622 if (N1CFP && N0 == N2) {
17640 SDValue(
N, 0), DAG, LegalOperations, ForCodeSize))
17649 EVT VT =
N->getValueType(0);
17672 if (LegalDAG || (!UnsafeMath && !
Flags.hasAllowReciprocal()))
17676 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
17687 unsigned NumElts = 1;
17688 EVT VT =
N->getValueType(0);
17692 if (!MinUses || (N1->
use_size() * NumElts) < MinUses)
17698 for (
auto *U : N1->
users()) {
17699 if (
U->getOpcode() ==
ISD::FDIV &&
U->getOperand(1) == N1) {
17701 if (
U->getOperand(1).getOpcode() ==
ISD::FSQRT &&
17702 U->getOperand(0) ==
U->getOperand(1).getOperand(0) &&
17703 U->getFlags().hasAllowReassociation() &&
17704 U->getFlags().hasNoSignedZeros())
17709 if (UnsafeMath ||
U->getFlags().hasAllowReciprocal())
17716 if ((
Users.size() * NumElts) < MinUses)
17724 for (
auto *U :
Users) {
17725 SDValue Dividend =
U->getOperand(0);
17726 if (Dividend != FPOne) {
17728 Reciprocal, Flags);
17729 CombineTo(U, NewNode);
17730 }
else if (U != Reciprocal.
getNode()) {
17733 CombineTo(U, Reciprocal);
17742 EVT VT =
N->getValueType(0);
17757 if (
SDValue FoldedVOp = SimplifyVBinOp(
N,
DL))
17760 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
17777 (
Options.UnsafeFPMath ||
Flags.hasAllowReciprocal()))) &&
17778 (!LegalOperations ||
17788 if (
Options.UnsafeFPMath ||
Flags.hasAllowReciprocal()) {
17828 A =
Y.getOperand(0);
17837 if (
SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17841 recursivelyDeleteUnusedNodes(AAZ.
getNode());
17849 AddToWorklist(Div.
getNode());
17857 if (
SDValue RV = BuildDivEstimate(N0, N1, Flags))
17862 if ((
Options.NoSignedZerosFPMath ||
Flags.hasNoSignedZeros()) &&
17863 (
Options.UnsafeFPMath ||
Flags.hasAllowReassociation()))
17883 if (
SDValue R = combineFMulOrFDivWithIntPow2(
N))
17892 EVT VT =
N->getValueType(0);
17904 if (
SDValue NewSel = foldBinOpIntoSelect(
N))
17914 bool NeedsCopySign =
17938 if (!
Flags.hasApproximateFuncs() ||
17951 return buildSqrtEstimate(N0, Flags);
17966 if (YTy == MVT::f128)
17985 EVT VT =
N->getValueType(0);
17996 if (!
V.isNegative()) {
18050 EVT VT =
N->getValueType(0);
18060 if (!
Flags.hasNoSignedZeros() || !
Flags.hasNoInfs() || !
Flags.hasNoNaNs() ||
18061 !
Flags.hasApproximateFuncs())
18080 if (ExponentIs025 || ExponentIs075) {
18091 if ((!
Flags.hasNoSignedZeros() && ExponentIs025) || !
Flags.hasNoInfs() ||
18092 !
Flags.hasApproximateFuncs())
18125 EVT VT =
N->getValueType(0);
18146 EVT VT =
N->getValueType(0);
18194 EVT VT =
N->getValueType(0);
18232 EVT VT =
N->getValueType(0);
18238 EVT SrcVT = Src.getValueType();
18253 unsigned ActualSize = std::min(InputSize, OutputSize);
18262 return DAG.
getNode(ExtOp,
DL, VT, Src);
18273 EVT VT =
N->getValueType(0);
18289 EVT VT =
N->getValueType(0);
18305 EVT VT =
N->getValueType(0);
18324 EVT VT =
N->getValueType(0);
18337 const bool NIsTrunc =
N->getConstantOperandVal(1) == 1;
18375 AddToWorklist(Tmp.
getNode());
18379 if (
SDValue NewVSel = matchVSelectOpSizesWithSetCC(
N))
18387 EVT VT =
N->getValueType(0);
18391 if (
SDValue FoldedVOp = SimplifyVCastOp(
N,
DL))
18411 if (
In.getValueType() == VT)
return In;
18425 CombineTo(
N, ExtLoad);
18434 if (
SDValue NewVSel = matchVSelectOpSizesWithSetCC(
N))
18442 EVT VT =
N->getValueType(0);
18453 EVT VT =
N->getValueType(0);
18487 EVT VT =
N->getValueType(0);
18498 EVT VT =
N->getValueType(0);
18515 N->getFlags().hasNoSignedZeros()) && N0.
hasOneUse()) {
18520 if (
SDValue Cast = foldSignChangeInBitcast(
N))
18529 EVT VT =
N->getValueType(0);
18531 unsigned Opc =
N->getOpcode();
18553 return PropagatesNaN ?
N->getOperand(1) :
N->getOperand(0);
18562 if (IsMin == AF.
isNegative() && (!PropagatesNaN ||
Flags.hasNoNaNs()))
18563 return N->getOperand(1);
18569 if (IsMin != AF.
isNegative() && (PropagatesNaN ||
Flags.hasNoNaNs()))
18570 return N->getOperand(0);
18574 if (
SDValue SD = reassociateReduction(
18578 Opc,
SDLoc(
N), VT, N0, N1, Flags))
18586 EVT VT =
N->getValueType(0);
18595 return N->getOperand(0);
18602 if (
SDValue Cast = foldSignChangeInBitcast(
N))
18635 bool Updated =
false;
18647 return True || False;
18651 if (!IsAlwaysTrueOrFalse(
Cond, S1C)) {
18658 S1 =
S1->getOperand(0);
18690 if (
SDValue NewN1 = rebuildSetCC(N1))
18692 ChainHandle.getValue(), NewN1, N2,
N->getFlags());
18701 (
N.getOperand(0).hasOneUse() &&
18702 N.getOperand(0).getOpcode() ==
ISD::SRL))) {
18705 N =
N.getOperand(0);
18754 SDValue Tmp = visitXOR(
N.getNode());
18761 N = XORHandle.getValue();
18773 bool Equal =
false;
18778 Op0 =
N->getOperand(0);
18779 Op1 =
N->getOperand(1);
18783 EVT SetCCVT =
N.getValueType();
18803 SDValue CondLHS =
N->getOperand(2), CondRHS =
N->getOperand(3);
18813 CondLHS, CondRHS,
CC->get(),
SDLoc(
N),
18831 if (LD->isIndexed())
18833 EVT VT = LD->getMemoryVT();
18836 Ptr = LD->getBasePtr();
18837 }
else if (
StoreSDNode *ST = dyn_cast<StoreSDNode>(
N)) {
18838 if (ST->isIndexed())
18840 EVT VT = ST->getMemoryVT();
18843 Ptr = ST->getBasePtr();
18846 if (LD->isIndexed())
18848 EVT VT = LD->getMemoryVT();
18852 Ptr = LD->getBasePtr();
18855 if (ST->isIndexed())
18857 EVT VT = ST->getMemoryVT();
18861 Ptr = ST->getBasePtr();
18875bool DAGCombiner::CombineToPreIndexedLoadStore(
SDNode *
N) {
18879 bool IsLoad =
true;
18880 bool IsMasked =
false;
18902 bool Swapped =
false;
18903 if (isa<ConstantSDNode>(BasePtr)) {
18922 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18927 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(
N)->getValue()
18928 : cast<StoreSDNode>(
N)->getValue();
18931 if (Val == BasePtr)
18949 if (isa<ConstantSDNode>(
Offset))
18967 if (!isa<ConstantSDNode>(Op1)) {
18985 bool RealUse =
false;
19021 WorklistRemover DeadNodes(*
this);
19030 deleteAndRecombine(
N);
19036 for (
unsigned i = 0, e = OtherUses.
size(); i != e; ++i) {
19037 unsigned OffsetIdx = 1;
19038 if (OtherUses[i]->getOperand(OffsetIdx).
getNode() ==
BasePtr.getNode())
19041 BasePtr.getNode() &&
"Expected BasePtr operand");
19054 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
19055 const APInt &Offset0 = CN->getAPIntValue();
19057 int X0 = (OtherUses[i]->getOpcode() ==
ISD::SUB && OffsetIdx == 1) ? -1 : 1;
19058 int Y0 = (OtherUses[i]->getOpcode() ==
ISD::SUB && OffsetIdx == 0) ? -1 : 1;
19064 APInt CNV = Offset0;
19065 if (X0 < 0) CNV = -CNV;
19066 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
19067 else CNV = CNV - Offset1;
19079 deleteAndRecombine(OtherUses[i]);
19084 deleteAndRecombine(
Ptr.getNode());
19085 AddToWorklist(
Result.getNode());
19106 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19116 if (isa<MemSDNode>(
User)) {
19117 bool IsLoad =
true;
19118 bool IsMasked =
false;
19121 IsMasked, OtherPtr, TLI)) {
19147 IsMasked,
Ptr, TLI) ||
19181bool DAGCombiner::CombineToPostIndexedLoadStore(
SDNode *
N) {
19185 bool IsLoad =
true;
19186 bool IsMasked =
false;
19207 ++PostIndexedNodes;
19211 WorklistRemover DeadNodes(*
this);
19220 deleteAndRecombine(
N);
19224 Result.getValue(IsLoad ? 1 : 0));
19225 deleteAndRecombine(
Op);
19240 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
19241 "Cannot split out indexing using opaque target constants");
19259 EVT STMemType =
ST->getMemoryVT();
19260 if (STType == STMemType)
19282 EVT LDMemType =
LD->getMemoryVT();
19283 EVT LDType =
LD->getValueType(0);
19285 "Attempting to extend value of non-matching type");
19286 if (LDType == LDMemType)
19289 switch (
LD->getExtensionType()) {
19325 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG,
Offset))
19328 GatherAllAliases(Store, Chain, Aliases);
19329 if (Aliases.
empty() ||
19330 (Aliases.
size() == 1 && Aliases.
front().getNode() == Store))
19339 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG,
Offset))
19355 if (!ST || !
ST->isSimple() ||
ST->getAddressSpace() !=
LD->getAddressSpace())
19358 EVT LDType =
LD->getValueType(0);
19359 EVT LDMemType =
LD->getMemoryVT();
19360 EVT STMemType =
ST->getMemoryVT();
19361 EVT STType =
ST->getValue().getValueType();
19385 int64_t OrigOffset =
Offset;
19398 STCoversLD = (
Offset == 0) && LdMemSize == StMemSize;
19404 if (
LD->isIndexed()) {
19411 return CombineTo(LD, Ops, 3);
19413 return CombineTo(LD, Val, Chain);
19420 if (
Offset == 0 && LDType == STType && STMemType == LDMemType) {
19423 return ReplaceLd(LD,
ST->getValue(), Chain);
19431 SDLoc(ST), STType);
19433 return ReplaceLd(LD, Val, Chain);
19451 if (
LD->getBasePtr().isUndef() ||
Offset != 0)
19456 if (!getTruncatedStoreValue(ST, Val))
19460 if (STMemType != LDMemType) {
19468 if (!extendLoadedValueToExtension(LD, Val))
19470 return ReplaceLd(LD, Val, Chain);
19475 deleteAndRecombine(Val.
getNode());
19488 if (
LD->isSimple()) {
19489 if (
N->getValueType(1) == MVT::Other) {
19491 if (!
N->hasAnyUseOfValue(0)) {
19499 dbgs() <<
"\nWith chain: "; Chain.
dump(&DAG);
19501 WorklistRemover DeadNodes(*
this);
19503 AddUsersToWorklist(Chain.
getNode());
19504 if (
N->use_empty())
19505 deleteAndRecombine(
N);
19511 assert(
N->getValueType(2) == MVT::Other &&
"Malformed indexed loads?");
19519 if (!
N->hasAnyUseOfValue(0) && (CanSplitIdx || !
N->hasAnyUseOfValue(1))) {
19522 if (
N->hasAnyUseOfValue(1) && CanSplitIdx) {
19523 Index = SplitIndexingFromLoad(LD);
19526 AddUsersToWorklist(
N);
19530 dbgs() <<
"\nWith: ";
Undef.dump(&DAG);
19531 dbgs() <<
" and 2 other values\n");
19532 WorklistRemover DeadNodes(*
this);
19536 deleteAndRecombine(
N);
19544 if (
auto V = ForwardStoreValueToDirectLoad(LD))
19551 if (*Alignment >
LD->getAlign() &&
19552 isAligned(*Alignment,
LD->getSrcValueOffset())) {
19554 LD->getExtensionType(),
SDLoc(
N),
LD->getValueType(0), Chain,
Ptr,
19555 LD->getPointerInfo(),
LD->getMemoryVT(), *Alignment,
19556 LD->getMemOperand()->getFlags(),
LD->getAAInfo());
19564 if (
LD->isUnindexed()) {
19566 SDValue BetterChain = FindBetterChain(LD, Chain);
19569 if (Chain != BetterChain) {
19575 BetterChain,
Ptr,
LD->getMemOperand());
19578 LD->getValueType(0),
19579 BetterChain,
Ptr,
LD->getMemoryVT(),
19580 LD->getMemOperand());
19585 MVT::Other, Chain, ReplLoad.
getValue(1));
19588 return CombineTo(
N, ReplLoad.
getValue(0), Token);
19593 if (CombineToPreIndexedLoadStore(
N) || CombineToPostIndexedLoadStore(
N))
19598 if (SliceUpLoad(
N))
19618struct LoadedSlice {
19622 bool ForCodeSize =
false;
19625 unsigned Loads = 0;
19626 unsigned Truncates = 0;
19627 unsigned CrossRegisterBanksCopies = 0;
19628 unsigned ZExts = 0;
19629 unsigned Shift = 0;
19631 explicit Cost(
bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
19634 Cost(
const LoadedSlice &LS,
bool ForCodeSize)
19635 : ForCodeSize(ForCodeSize), Loads(1) {
19636 EVT TruncType =
LS.Inst->getValueType(0);
19637 EVT LoadedType =
LS.getLoadedType();
19638 if (TruncType != LoadedType &&
19639 !
LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
19647 void addSliceGain(
const LoadedSlice &LS) {
19656 if (
LS.canMergeExpensiveCrossRegisterBankCopy())
19657 ++CrossRegisterBanksCopies;
19661 Loads +=
RHS.Loads;
19662 Truncates +=
RHS.Truncates;
19663 CrossRegisterBanksCopies +=
RHS.CrossRegisterBanksCopies;
19664 ZExts +=
RHS.ZExts;
19665 Shift +=
RHS.Shift;
19670 return Loads ==
RHS.Loads && Truncates ==
RHS.Truncates &&
19671 CrossRegisterBanksCopies ==
RHS.CrossRegisterBanksCopies &&
19672 ZExts ==
RHS.ZExts && Shift ==
RHS.Shift;
19680 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
19681 unsigned ExpensiveOpsRHS =
RHS.Loads +
RHS.CrossRegisterBanksCopies;
19684 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
19685 return ExpensiveOpsLHS < ExpensiveOpsRHS;
19686 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
19687 (
RHS.Truncates +
RHS.ZExts +
RHS.Shift + ExpensiveOpsRHS);
19713 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
19718 APInt getUsedBits()
const {
19723 assert(Origin &&
"No original load to compare against.");
19725 assert(Inst &&
"This slice is not bound to an instruction");
19727 "Extracted slice is bigger than the whole type!");
19729 UsedBits.setAllBits();
19730 UsedBits = UsedBits.zext(
BitWidth);
19731 UsedBits <<= Shift;
19736 unsigned getLoadedSize()
const {
19737 unsigned SliceSize = getUsedBits().popcount();
19738 assert(!(SliceSize & 0x7) &&
"Size is not a multiple of a byte.");
19739 return SliceSize / 8;
19744 EVT getLoadedType()
const {
19745 assert(DAG &&
"Missing context");
19760 bool isLegal()
const {
19762 if (!Origin || !Inst || !DAG)
19772 EVT SliceType = getLoadedType();
19783 if (PtrType == MVT::Untyped || PtrType.
isExtended())
19796 if (TruncateType != SliceType &&
19806 uint64_t getOffsetFromBase()
const {
19807 assert(DAG &&
"Missing context.");
19809 assert(!(Shift & 0x7) &&
"Shifts not aligned on Bytes are not supported.");
19813 "The size of the original loaded type is not a multiple of a"
19818 "Invalid shift amount for given loaded size");
19831 assert(Inst && Origin &&
"Unable to replace a non-existing slice.");
19835 int64_t
Offset =
static_cast<int64_t
>(getOffsetFromBase());
19836 assert(
Offset >= 0 &&
"Offset too big to fit in int64_t!");
19846 EVT SliceType = getLoadedType();
19856 if (SliceType != FinalType)
19866 bool canMergeExpensiveCrossRegisterBankCopy()
const {
19872 assert(DAG &&
"Missing context");
19874 EVT ResVT =
User->getValueType(0);
19888 if (!
TRI ||
TRI->getCommonSubClass(ArgRC, ResRC))
19893 unsigned IsFast = 0;
19934 const LoadedSlice &Second) {
19936 "Unable to match different memory origins.");
19938 assert((UsedBits & Second.getUsedBits()) == 0 &&
19939 "Slices are not supposed to overlap.");
19940 UsedBits |= Second.getUsedBits();
19949 LoadedSlice::Cost &GlobalLSCost) {
19950 unsigned NumberOfSlices = LoadedSlices.
size();
19952 if (NumberOfSlices < 2)
19957 llvm::sort(LoadedSlices, [](
const LoadedSlice &
LHS,
const LoadedSlice &
RHS) {
19958 assert(
LHS.Origin ==
RHS.Origin &&
"Different bases not implemented.");
19959 return LHS.getOffsetFromBase() <
RHS.getOffsetFromBase();
19961 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19964 const LoadedSlice *
First =
nullptr;
19965 const LoadedSlice *Second =
nullptr;
19966 for (
unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19969 Second = &LoadedSlices[CurrSlice];
19976 EVT LoadedType =
First->getLoadedType();
19979 if (LoadedType != Second->getLoadedType())
19983 Align RequiredAlignment;
19984 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19990 if (
First->getAlign() < RequiredAlignment)
19997 assert(GlobalLSCost.Loads > 0 &&
"We save more loads than we created!");
19998 --GlobalLSCost.Loads;
20015 const APInt &UsedBits,
bool ForCodeSize) {
20016 unsigned NumberOfSlices = LoadedSlices.
size();
20018 return NumberOfSlices > 1;
20021 if (NumberOfSlices != 2)
20029 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
20031 OrigCost.Loads = 1;
20032 for (
unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
20033 const LoadedSlice &LS = LoadedSlices[CurrSlice];
20035 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
20036 GlobalSlicingCost += SliceCost;
20040 OrigCost.addSliceGain(LS);
20045 return OrigCost > GlobalSlicingCost;
20054bool DAGCombiner::SliceUpLoad(
SDNode *
N) {
20060 !
LD->getValueType(0).isInteger())
20066 if (
LD->getValueType(0).isScalableVector())
20071 APInt UsedBits(
LD->getValueSizeInBits(0), 0);
20078 for (
SDUse &U :
LD->uses()) {
20080 if (
U.getResNo() != 0)
20084 unsigned Shift = 0;
20089 Shift =
User->getConstantOperandVal(1);
20102 unsigned Width =
User->getValueSizeInBits(0);
20107 LoadedSlice
LS(
User, LD, Shift, &DAG);
20108 APInt CurrentUsedBits =
LS.getUsedBits();
20111 if ((CurrentUsedBits & UsedBits) != 0)
20114 UsedBits |= CurrentUsedBits;
20135 for (
const LoadedSlice &LS : LoadedSlices) {
20137 CombineTo(
LS.Inst, SliceInst,
true);
20141 "It takes more than a zext to get to the loaded slice!!");
20148 AddToWorklist(Chain.
getNode());
20155static std::pair<unsigned, unsigned>
20157 std::pair<unsigned, unsigned> Result(0, 0);
20161 !isa<ConstantSDNode>(V->getOperand(1)) ||
20166 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
20167 if (LD->getBasePtr() !=
Ptr)
return Result;
20170 if (V.getValueType() != MVT::i16 &&
20171 V.getValueType() != MVT::i32 &&
20172 V.getValueType() != MVT::i64)
20178 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
20180 if (NotMaskLZ & 7)
return Result;
20182 if (NotMaskTZ & 7)
return Result;
20183 if (NotMaskLZ == 64)
return Result;
20190 if (V.getValueType() != MVT::i64 && NotMaskLZ)
20191 NotMaskLZ -= 64-V.getValueSizeInBits();
20193 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
20194 switch (MaskedBytes) {
20198 default:
return Result;
20203 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes)
return Result;
20212 if (!LD->isOperandOf(Chain.
getNode()))
20217 Result.first = MaskedBytes;
20218 Result.second = NotMaskTZ/8;
20229 unsigned NumBytes = MaskInfo.first;
20230 unsigned ByteShift = MaskInfo.second;
20236 ByteShift*8, (ByteShift+NumBytes)*8);
20245 bool UseTruncStore;
20246 if (DC->isTypeLegal(VT))
20247 UseTruncStore =
false;
20250 UseTruncStore =
true;
20276 StOffset = ByteShift;
20307 if (!
ST->isSimple())
20315 if (
ST->isTruncatingStore() || VT.
isVector())
20318 unsigned Opc =
Value.getOpcode();
20330 std::pair<unsigned, unsigned> MaskedLoad;
20332 if (MaskedLoad.first)
20334 Value.getOperand(1), ST,
this))
20339 if (MaskedLoad.first)
20341 Value.getOperand(0), ST,
this))
20355 if (
LD->getBasePtr() !=
Ptr ||
20356 LD->getPointerInfo().getAddrSpace() !=
20357 ST->getPointerInfo().getAddrSpace())
20366 if (Imm == 0 ||
Imm.isAllOnes())
20371 unsigned BitsPerByteMask = 7u;
20372 unsigned LSB =
Imm.countr_zero() & ~BitsPerByteMask;
20373 unsigned MSB = (
Imm.getActiveBits() - 1) | BitsPerByteMask;
20401 unsigned ShAmt = 0;
20403 for (; ShAmt + NewBW <= VTStoreSize; ShAmt += 8) {
20407 if (ShAmt + NewBW < MSB)
20412 ? VTStoreSize - NewBW - ShAmt
20414 PtrOff = PtrAdjustmentInBits / 8;
20417 unsigned IsFast = 0;
20420 LD->getAddressSpace(), NewAlign,
20421 LD->getMemOperand()->getFlags(), &IsFast) &&
20426 if (ShAmt + NewBW > VTStoreSize)
20429 APInt NewImm =
Imm.lshr(ShAmt).trunc(NewBW);
20437 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
20438 LD->getMemOperand()->getFlags(),
LD->getAAInfo());
20443 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
20445 AddToWorklist(NewPtr.
getNode());
20446 AddToWorklist(NewLD.
getNode());
20447 AddToWorklist(NewVal.
getNode());
20448 WorklistRemover DeadNodes(*
this);
20466 EVT VT =
LD->getMemoryVT();
20468 LD->isNonTemporal() ||
ST->isNonTemporal() ||
20469 LD->getPointerInfo().getAddrSpace() != 0 ||
20470 ST->getPointerInfo().getAddrSpace() != 0)
20480 unsigned FastLD = 0, FastST = 0;
20487 *
LD->getMemOperand(), &FastLD) ||
20489 *
ST->getMemOperand(), &FastST) ||
20490 !FastLD || !FastST)
20494 LD->getBasePtr(),
LD->getMemOperand());
20497 ST->getBasePtr(),
ST->getMemOperand());
20499 AddToWorklist(NewLD.
getNode());
20500 AddToWorklist(NewST.
getNode());
20501 WorklistRemover DeadNodes(*
this);
20523bool DAGCombiner::isMulAddWithConstProfitable(
SDNode *MulNode,
SDValue AddNode,
20535 if (
User == MulNode)
20558 if (OtherOp == MulVar)
20586 unsigned NumStores) {
20589 SDLoc StoreDL(StoreNodes[0].MemNode);
20591 for (
unsigned i = 0; i < NumStores; ++i) {
20592 Visited.
insert(StoreNodes[i].MemNode);
20596 for (
unsigned i = 0; i < NumStores; ++i) {
20597 if (Visited.
insert(StoreNodes[i].MemNode->getChain().getNode()).second)
20598 Chains.
push_back(StoreNodes[i].MemNode->getChain());
20601 assert(!Chains.
empty() &&
"Chain should have generated a chain");
20606 const Value *UnderlyingObj =
nullptr;
20607 for (
const auto &
MemOp : StoreNodes) {
20619 if (UnderlyingObj && UnderlyingObj != Obj)
20622 if (!UnderlyingObj)
20623 UnderlyingObj = Obj;
20629bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
20631 bool IsConstantSrc,
bool UseVector,
bool UseTrunc) {
20636 assert((!UseTrunc || !UseVector) &&
20637 "This optimization cannot emit a vector truncating store");
20640 SDLoc DL(StoreNodes[0].MemNode);
20643 unsigned SizeInBits = NumStores * ElementSizeBits;
20646 std::optional<MachineMemOperand::Flags>
Flags;
20648 for (
unsigned I = 0;
I != NumStores; ++
I) {
20649 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[
I].MemNode);
20664 unsigned Elts = NumStores * NumMemElts;
20672 if (IsConstantSrc) {
20674 for (
unsigned I = 0;
I != NumStores; ++
I) {
20675 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[
I].MemNode);
20683 auto *
C = dyn_cast<ConstantSDNode>(Val);
20693 .zextOrTrunc(ElementSizeBits),
20703 DL, StoreTy, BuildVector);
20706 for (
unsigned i = 0; i < NumStores; ++i) {
20707 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20744 assert(IsConstantSrc &&
"Merged vector elements should use vector store");
20746 APInt StoreInt(SizeInBits, 0);
20751 for (
unsigned i = 0; i < NumStores; ++i) {
20752 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20755 SDValue Val = St->getValue();
20757 StoreInt <<= ElementSizeBits;
20759 StoreInt |=
C->getAPIntValue()
20760 .zextOrTrunc(ElementSizeBits)
20761 .zextOrTrunc(SizeInBits);
20763 StoreInt |=
C->getValueAPF()
20765 .zextOrTrunc(ElementSizeBits)
20766 .zextOrTrunc(SizeInBits);
20784 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
20785 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);