LLVM  15.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
49 #include "llvm/IR/Attributes.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/KnownBits.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "dagcombine"
79 
80 STATISTIC(NodesCombined , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94  cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99  cl::desc("Only use DAG-combiner alias analysis in this"
100  " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107  cl::desc("Bypass the profitability model of load slicing"),
108  cl::init(false));
109 
110 static cl::opt<bool>
111  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112  cl::desc("DAG combiner may split indexing from loads"));
113 
114 static cl::opt<bool>
115  EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
116  cl::desc("DAG combiner enable merging multiple stores "
117  "into a wider store"));
118 
120  "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
121  cl::desc("Limit the number of operands to inline for Token Factors"));
122 
124  "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
125  cl::desc("Limit the number of times for the same StoreNode and RootNode "
126  "to bail out in store merging dependence check"));
127 
129  "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
130  cl::desc("DAG combiner enable reducing the width of load/op/store "
131  "sequence"));
132 
134  "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
135  cl::desc("DAG combiner enable load/<replace bytes>/store with "
136  "a narrower store"));
137 
138 namespace {
139 
140  class DAGCombiner {
141  SelectionDAG &DAG;
142  const TargetLowering &TLI;
143  const SelectionDAGTargetInfo *STI;
145  CodeGenOpt::Level OptLevel;
146  bool LegalDAG = false;
147  bool LegalOperations = false;
148  bool LegalTypes = false;
149  bool ForCodeSize;
150  bool DisableGenericCombines;
151 
152  /// Worklist of all of the nodes that need to be simplified.
153  ///
154  /// This must behave as a stack -- new nodes to process are pushed onto the
155  /// back and when processing we pop off of the back.
156  ///
157  /// The worklist will not contain duplicates but may contain null entries
158  /// due to nodes being deleted from the underlying DAG.
159  SmallVector<SDNode *, 64> Worklist;
160 
161  /// Mapping from an SDNode to its position on the worklist.
162  ///
163  /// This is used to find and remove nodes from the worklist (by nulling
164  /// them) when they are deleted from the underlying DAG. It relies on
165  /// stable indices of nodes within the worklist.
166  DenseMap<SDNode *, unsigned> WorklistMap;
167  /// This records all nodes attempted to add to the worklist since we
168  /// considered a new worklist entry. As we keep do not add duplicate nodes
169  /// in the worklist, this is different from the tail of the worklist.
170  SmallSetVector<SDNode *, 32> PruningList;
171 
172  /// Set of nodes which have been combined (at least once).
173  ///
174  /// This is used to allow us to reliably add any operands of a DAG node
175  /// which have not yet been combined to the worklist.
176  SmallPtrSet<SDNode *, 32> CombinedNodes;
177 
178  /// Map from candidate StoreNode to the pair of RootNode and count.
179  /// The count is used to track how many times we have seen the StoreNode
180  /// with the same RootNode bail out in dependence check. If we have seen
181  /// the bail out for the same pair many times over a limit, we won't
182  /// consider the StoreNode with the same RootNode as store merging
183  /// candidate again.
185 
186  // AA - Used for DAG load/store alias analysis.
187  AliasAnalysis *AA;
188 
189  /// When an instruction is simplified, add all users of the instruction to
190  /// the work lists because they might get more simplified now.
191  void AddUsersToWorklist(SDNode *N) {
192  for (SDNode *Node : N->uses())
193  AddToWorklist(Node);
194  }
195 
196  /// Convenient shorthand to add a node and all of its user to the worklist.
197  void AddToWorklistWithUsers(SDNode *N) {
198  AddUsersToWorklist(N);
199  AddToWorklist(N);
200  }
201 
202  // Prune potentially dangling nodes. This is called after
203  // any visit to a node, but should also be called during a visit after any
204  // failed combine which may have created a DAG node.
205  void clearAddedDanglingWorklistEntries() {
206  // Check any nodes added to the worklist to see if they are prunable.
207  while (!PruningList.empty()) {
208  auto *N = PruningList.pop_back_val();
209  if (N->use_empty())
210  recursivelyDeleteUnusedNodes(N);
211  }
212  }
213 
214  SDNode *getNextWorklistEntry() {
215  // Before we do any work, remove nodes that are not in use.
216  clearAddedDanglingWorklistEntries();
217  SDNode *N = nullptr;
218  // The Worklist holds the SDNodes in order, but it may contain null
219  // entries.
220  while (!N && !Worklist.empty()) {
221  N = Worklist.pop_back_val();
222  }
223 
224  if (N) {
225  bool GoodWorklistEntry = WorklistMap.erase(N);
226  (void)GoodWorklistEntry;
227  assert(GoodWorklistEntry &&
228  "Found a worklist entry without a corresponding map entry!");
229  }
230  return N;
231  }
232 
233  /// Call the node-specific routine that folds each particular type of node.
234  SDValue visit(SDNode *N);
235 
236  public:
237  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
238  : DAG(D), TLI(D.getTargetLoweringInfo()),
239  STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
240  ForCodeSize = DAG.shouldOptForSize();
241  DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
242 
243  MaximumLegalStoreInBits = 0;
244  // We use the minimum store size here, since that's all we can guarantee
245  // for the scalable vector types.
246  for (MVT VT : MVT::all_valuetypes())
247  if (EVT(VT).isSimple() && VT != MVT::Other &&
248  TLI.isTypeLegal(EVT(VT)) &&
249  VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
250  MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
251  }
252 
253  void ConsiderForPruning(SDNode *N) {
254  // Mark this for potential pruning.
255  PruningList.insert(N);
256  }
257 
258  /// Add to the worklist making sure its instance is at the back (next to be
259  /// processed.)
260  void AddToWorklist(SDNode *N) {
261  assert(N->getOpcode() != ISD::DELETED_NODE &&
262  "Deleted Node added to Worklist");
263 
264  // Skip handle nodes as they can't usefully be combined and confuse the
265  // zero-use deletion strategy.
266  if (N->getOpcode() == ISD::HANDLENODE)
267  return;
268 
269  ConsiderForPruning(N);
270 
271  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
272  Worklist.push_back(N);
273  }
274 
275  /// Remove all instances of N from the worklist.
276  void removeFromWorklist(SDNode *N) {
277  CombinedNodes.erase(N);
278  PruningList.remove(N);
279  StoreRootCountMap.erase(N);
280 
281  auto It = WorklistMap.find(N);
282  if (It == WorklistMap.end())
283  return; // Not in the worklist.
284 
285  // Null out the entry rather than erasing it to avoid a linear operation.
286  Worklist[It->second] = nullptr;
287  WorklistMap.erase(It);
288  }
289 
290  void deleteAndRecombine(SDNode *N);
291  bool recursivelyDeleteUnusedNodes(SDNode *N);
292 
293  /// Replaces all uses of the results of one DAG node with new values.
294  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
295  bool AddTo = true);
296 
297  /// Replaces all uses of the results of one DAG node with new values.
298  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
299  return CombineTo(N, &Res, 1, AddTo);
300  }
301 
302  /// Replaces all uses of the results of one DAG node with new values.
303  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
304  bool AddTo = true) {
305  SDValue To[] = { Res0, Res1 };
306  return CombineTo(N, To, 2, AddTo);
307  }
308 
309  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
310 
311  private:
312  unsigned MaximumLegalStoreInBits;
313 
314  /// Check the specified integer node value to see if it can be simplified or
315  /// if things it uses can be simplified by bit propagation.
316  /// If so, return true.
317  bool SimplifyDemandedBits(SDValue Op) {
318  unsigned BitWidth = Op.getScalarValueSizeInBits();
320  return SimplifyDemandedBits(Op, DemandedBits);
321  }
322 
323  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
324  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
325  KnownBits Known;
326  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
327  return false;
328 
329  // Revisit the node.
330  AddToWorklist(Op.getNode());
331 
332  CommitTargetLoweringOpt(TLO);
333  return true;
334  }
335 
336  /// Check the specified vector node value to see if it can be simplified or
337  /// if things it uses can be simplified as it only uses some of the
338  /// elements. If so, return true.
339  bool SimplifyDemandedVectorElts(SDValue Op) {
340  // TODO: For now just pretend it cannot be simplified.
341  if (Op.getValueType().isScalableVector())
342  return false;
343 
344  unsigned NumElts = Op.getValueType().getVectorNumElements();
345  APInt DemandedElts = APInt::getAllOnes(NumElts);
346  return SimplifyDemandedVectorElts(Op, DemandedElts);
347  }
348 
349  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
350  const APInt &DemandedElts,
351  bool AssumeSingleUse = false);
352  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
353  bool AssumeSingleUse = false);
354 
355  bool CombineToPreIndexedLoadStore(SDNode *N);
356  bool CombineToPostIndexedLoadStore(SDNode *N);
357  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
358  bool SliceUpLoad(SDNode *N);
359 
360  // Scalars have size 0 to distinguish from singleton vectors.
361  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
362  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
363  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
364 
365  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
366  /// load.
367  ///
368  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
369  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
370  /// \param EltNo index of the vector element to load.
371  /// \param OriginalLoad load that EVE came from to be replaced.
372  /// \returns EVE on success SDValue() on failure.
373  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
374  SDValue EltNo,
375  LoadSDNode *OriginalLoad);
376  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
377  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
378  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
379  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
380  SDValue PromoteIntBinOp(SDValue Op);
381  SDValue PromoteIntShiftOp(SDValue Op);
382  SDValue PromoteExtend(SDValue Op);
383  bool PromoteLoad(SDValue Op);
384 
385  /// Call the node-specific routine that knows how to fold each
386  /// particular type of node. If that doesn't do anything, try the
387  /// target-specific DAG combines.
389 
390  // Visitation implementation - Implement dag node combining for different
391  // node types. The semantics are as follows:
392  // Return Value:
393  // SDValue.getNode() == 0 - No change was made
394  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
395  // otherwise - N should be replaced by the returned Operand.
396  //
397  SDValue visitTokenFactor(SDNode *N);
398  SDValue visitMERGE_VALUES(SDNode *N);
399  SDValue visitADD(SDNode *N);
400  SDValue visitADDLike(SDNode *N);
401  SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
402  SDValue visitSUB(SDNode *N);
403  SDValue visitADDSAT(SDNode *N);
404  SDValue visitSUBSAT(SDNode *N);
405  SDValue visitADDC(SDNode *N);
406  SDValue visitADDO(SDNode *N);
407  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
408  SDValue visitSUBC(SDNode *N);
409  SDValue visitSUBO(SDNode *N);
410  SDValue visitADDE(SDNode *N);
411  SDValue visitADDCARRY(SDNode *N);
412  SDValue visitSADDO_CARRY(SDNode *N);
413  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
414  SDValue visitSUBE(SDNode *N);
415  SDValue visitSUBCARRY(SDNode *N);
416  SDValue visitSSUBO_CARRY(SDNode *N);
417  SDValue visitMUL(SDNode *N);
418  SDValue visitMULFIX(SDNode *N);
419  SDValue useDivRem(SDNode *N);
420  SDValue visitSDIV(SDNode *N);
421  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
422  SDValue visitUDIV(SDNode *N);
423  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
424  SDValue visitREM(SDNode *N);
425  SDValue visitMULHU(SDNode *N);
426  SDValue visitMULHS(SDNode *N);
427  SDValue visitAVG(SDNode *N);
428  SDValue visitSMUL_LOHI(SDNode *N);
429  SDValue visitUMUL_LOHI(SDNode *N);
430  SDValue visitMULO(SDNode *N);
431  SDValue visitIMINMAX(SDNode *N);
432  SDValue visitAND(SDNode *N);
433  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
434  SDValue visitOR(SDNode *N);
435  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
436  SDValue visitXOR(SDNode *N);
437  SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
438  SDValue visitSHL(SDNode *N);
439  SDValue visitSRA(SDNode *N);
440  SDValue visitSRL(SDNode *N);
441  SDValue visitFunnelShift(SDNode *N);
442  SDValue visitSHLSAT(SDNode *N);
443  SDValue visitRotate(SDNode *N);
444  SDValue visitABS(SDNode *N);
445  SDValue visitBSWAP(SDNode *N);
446  SDValue visitBITREVERSE(SDNode *N);
447  SDValue visitCTLZ(SDNode *N);
448  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
449  SDValue visitCTTZ(SDNode *N);
450  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
451  SDValue visitCTPOP(SDNode *N);
452  SDValue visitSELECT(SDNode *N);
453  SDValue visitVSELECT(SDNode *N);
454  SDValue visitSELECT_CC(SDNode *N);
455  SDValue visitSETCC(SDNode *N);
456  SDValue visitSETCCCARRY(SDNode *N);
457  SDValue visitSIGN_EXTEND(SDNode *N);
458  SDValue visitZERO_EXTEND(SDNode *N);
459  SDValue visitANY_EXTEND(SDNode *N);
460  SDValue visitAssertExt(SDNode *N);
461  SDValue visitAssertAlign(SDNode *N);
462  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
463  SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
464  SDValue visitTRUNCATE(SDNode *N);
465  SDValue visitBITCAST(SDNode *N);
466  SDValue visitFREEZE(SDNode *N);
467  SDValue visitBUILD_PAIR(SDNode *N);
468  SDValue visitFADD(SDNode *N);
469  SDValue visitSTRICT_FADD(SDNode *N);
470  SDValue visitFSUB(SDNode *N);
471  SDValue visitFMUL(SDNode *N);
472  SDValue visitFMA(SDNode *N);
473  SDValue visitFDIV(SDNode *N);
474  SDValue visitFREM(SDNode *N);
475  SDValue visitFSQRT(SDNode *N);
476  SDValue visitFCOPYSIGN(SDNode *N);
477  SDValue visitFPOW(SDNode *N);
478  SDValue visitSINT_TO_FP(SDNode *N);
479  SDValue visitUINT_TO_FP(SDNode *N);
480  SDValue visitFP_TO_SINT(SDNode *N);
481  SDValue visitFP_TO_UINT(SDNode *N);
482  SDValue visitFP_ROUND(SDNode *N);
483  SDValue visitFP_EXTEND(SDNode *N);
484  SDValue visitFNEG(SDNode *N);
485  SDValue visitFABS(SDNode *N);
486  SDValue visitFCEIL(SDNode *N);
487  SDValue visitFTRUNC(SDNode *N);
488  SDValue visitFFLOOR(SDNode *N);
489  SDValue visitFMinMax(SDNode *N);
490  SDValue visitBRCOND(SDNode *N);
491  SDValue visitBR_CC(SDNode *N);
492  SDValue visitLOAD(SDNode *N);
493 
494  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
495  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
496 
497  SDValue visitSTORE(SDNode *N);
498  SDValue visitLIFETIME_END(SDNode *N);
499  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
500  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
501  SDValue visitBUILD_VECTOR(SDNode *N);
502  SDValue visitCONCAT_VECTORS(SDNode *N);
503  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
504  SDValue visitVECTOR_SHUFFLE(SDNode *N);
505  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
506  SDValue visitINSERT_SUBVECTOR(SDNode *N);
507  SDValue visitMLOAD(SDNode *N);
508  SDValue visitMSTORE(SDNode *N);
509  SDValue visitMGATHER(SDNode *N);
510  SDValue visitMSCATTER(SDNode *N);
511  SDValue visitFP_TO_FP16(SDNode *N);
512  SDValue visitFP16_TO_FP(SDNode *N);
513  SDValue visitFP_TO_BF16(SDNode *N);
514  SDValue visitVECREDUCE(SDNode *N);
515  SDValue visitVPOp(SDNode *N);
516 
517  SDValue visitFADDForFMACombine(SDNode *N);
518  SDValue visitFSUBForFMACombine(SDNode *N);
519  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
520 
521  SDValue XformToShuffleWithZero(SDNode *N);
522  bool reassociationCanBreakAddressingModePattern(unsigned Opc,
523  const SDLoc &DL,
524  SDNode *N,
525  SDValue N0,
526  SDValue N1);
527  SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
528  SDValue N1);
529  SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
530  SDValue N1, SDNodeFlags Flags);
531 
532  SDValue visitShiftByConstant(SDNode *N);
533 
534  SDValue foldSelectOfConstants(SDNode *N);
535  SDValue foldVSelectOfConstants(SDNode *N);
536  SDValue foldBinOpIntoSelect(SDNode *BO);
537  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
538  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
539  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
540  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
541  SDValue N2, SDValue N3, ISD::CondCode CC,
542  bool NotExtCompare = false);
543  SDValue convertSelectOfFPConstantsToLoadOffset(
544  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
545  ISD::CondCode CC);
546  SDValue foldSignChangeInBitcast(SDNode *N);
547  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
548  SDValue N2, SDValue N3, ISD::CondCode CC);
549  SDValue foldSelectOfBinops(SDNode *N);
550  SDValue foldSextSetcc(SDNode *N);
551  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
552  const SDLoc &DL);
553  SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
554  SDValue unfoldMaskedMerge(SDNode *N);
555  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
556  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
557  const SDLoc &DL, bool foldBooleans);
558  SDValue rebuildSetCC(SDValue N);
559 
560  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
561  SDValue &CC, bool MatchStrict = false) const;
562  bool isOneUseSetCC(SDValue N) const;
563 
564  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
565  unsigned HiOp);
566  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
567  SDValue CombineExtLoad(SDNode *N);
568  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
569  SDValue combineRepeatedFPDivisors(SDNode *N);
570  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
571  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
572  SDValue BuildSDIV(SDNode *N);
573  SDValue BuildSDIVPow2(SDNode *N);
574  SDValue BuildUDIV(SDNode *N);
575  SDValue BuildSREMPow2(SDNode *N);
576  SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
577  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
578  SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
579  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
580  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
581  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
582  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
583  SDNodeFlags Flags, bool Reciprocal);
584  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
585  SDNodeFlags Flags, bool Reciprocal);
586  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
587  bool DemandHighBits = true);
588  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
589  SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
590  SDValue InnerPos, SDValue InnerNeg, bool HasPos,
591  unsigned PosOpcode, unsigned NegOpcode,
592  const SDLoc &DL);
593  SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
594  SDValue InnerPos, SDValue InnerNeg, bool HasPos,
595  unsigned PosOpcode, unsigned NegOpcode,
596  const SDLoc &DL);
597  SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
598  SDValue MatchLoadCombine(SDNode *N);
599  SDValue mergeTruncStores(StoreSDNode *N);
600  SDValue reduceLoadWidth(SDNode *N);
601  SDValue ReduceLoadOpStoreWidth(SDNode *N);
603  SDValue TransformFPLoadStorePair(SDNode *N);
604  SDValue convertBuildVecZextToZext(SDNode *N);
605  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
606  SDValue reduceBuildVecTruncToBitCast(SDNode *N);
607  SDValue reduceBuildVecToShuffle(SDNode *N);
608  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
609  ArrayRef<int> VectorMask, SDValue VecIn1,
610  SDValue VecIn2, unsigned LeftIdx,
611  bool DidSplitVec);
612  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
613 
614  /// Walk up chain skipping non-aliasing memory nodes,
615  /// looking for aliasing nodes and adding them to the Aliases vector.
616  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
617  SmallVectorImpl<SDValue> &Aliases);
618 
619  /// Return true if there is any possibility that the two addresses overlap.
620  bool mayAlias(SDNode *Op0, SDNode *Op1) const;
621 
622  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
623  /// chain (aliasing node.)
624  SDValue FindBetterChain(SDNode *N, SDValue Chain);
625 
626  /// Try to replace a store and any possibly adjacent stores on
627  /// consecutive chains with better chains. Return true only if St is
628  /// replaced.
629  ///
630  /// Notice that other chains may still be replaced even if the function
631  /// returns false.
632  bool findBetterNeighborChains(StoreSDNode *St);
633 
634  // Helper for findBetterNeighborChains. Walk up store chain add additional
635  // chained stores that do not overlap and can be parallelized.
636  bool parallelizeChainedStores(StoreSDNode *St);
637 
638  /// Holds a pointer to an LSBaseSDNode as well as information on where it
639  /// is located in a sequence of memory operations connected by a chain.
640  struct MemOpLink {
641  // Ptr to the mem node.
642  LSBaseSDNode *MemNode;
643 
644  // Offset from the base ptr.
645  int64_t OffsetFromBase;
646 
647  MemOpLink(LSBaseSDNode *N, int64_t Offset)
648  : MemNode(N), OffsetFromBase(Offset) {}
649  };
650 
651  // Classify the origin of a stored value.
652  enum class StoreSource { Unknown, Constant, Extract, Load };
653  StoreSource getStoreSource(SDValue StoreVal) {
654  switch (StoreVal.getOpcode()) {
655  case ISD::Constant:
656  case ISD::ConstantFP:
657  return StoreSource::Constant;
660  return StoreSource::Extract;
661  case ISD::LOAD:
662  return StoreSource::Load;
663  default:
664  return StoreSource::Unknown;
665  }
666  }
667 
668  /// This is a helper function for visitMUL to check the profitability
669  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
670  /// MulNode is the original multiply, AddNode is (add x, c1),
671  /// and ConstNode is c2.
672  bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
673  SDValue ConstNode);
674 
675  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
676  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
677  /// the type of the loaded value to be extended.
678  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
679  EVT LoadResultTy, EVT &ExtVT);
680 
681  /// Helper function to calculate whether the given Load/Store can have its
682  /// width reduced to ExtVT.
683  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
684  EVT &MemVT, unsigned ShAmt = 0);
685 
686  /// Used by BackwardsPropagateMask to find suitable loads.
687  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
688  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
689  ConstantSDNode *Mask, SDNode *&NodeToMask);
690  /// Attempt to propagate a given AND node back to load leaves so that they
691  /// can be combined into narrow loads.
692  bool BackwardsPropagateMask(SDNode *N);
693 
694  /// Helper function for mergeConsecutiveStores which merges the component
695  /// store chains.
696  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
697  unsigned NumStores);
698 
699  /// This is a helper function for mergeConsecutiveStores. When the source
700  /// elements of the consecutive stores are all constants or all extracted
701  /// vector elements, try to merge them into one larger store introducing
702  /// bitcasts if necessary. \return True if a merged store was created.
703  bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
704  EVT MemVT, unsigned NumStores,
705  bool IsConstantSrc, bool UseVector,
706  bool UseTrunc);
707 
708  /// This is a helper function for mergeConsecutiveStores. Stores that
709  /// potentially may be merged with St are placed in StoreNodes. RootNode is
710  /// a chain predecessor to all store candidates.
711  void getStoreMergeCandidates(StoreSDNode *St,
712  SmallVectorImpl<MemOpLink> &StoreNodes,
713  SDNode *&Root);
714 
715  /// Helper function for mergeConsecutiveStores. Checks if candidate stores
716  /// have indirect dependency through their operands. RootNode is the
717  /// predecessor to all stores calculated by getStoreMergeCandidates and is
718  /// used to prune the dependency check. \return True if safe to merge.
719  bool checkMergeStoreCandidatesForDependencies(
720  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
721  SDNode *RootNode);
722 
723  /// This is a helper function for mergeConsecutiveStores. Given a list of
724  /// store candidates, find the first N that are consecutive in memory.
725  /// Returns 0 if there are not at least 2 consecutive stores to try merging.
726  unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
727  int64_t ElementSizeBytes) const;
728 
729  /// This is a helper function for mergeConsecutiveStores. It is used for
730  /// store chains that are composed entirely of constant values.
731  bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
732  unsigned NumConsecutiveStores,
733  EVT MemVT, SDNode *Root, bool AllowVectors);
734 
735  /// This is a helper function for mergeConsecutiveStores. It is used for
736  /// store chains that are composed entirely of extracted vector elements.
737  /// When extracting multiple vector elements, try to store them in one
738  /// vector store rather than a sequence of scalar stores.
739  bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
740  unsigned NumConsecutiveStores, EVT MemVT,
741  SDNode *Root);
742 
743  /// This is a helper function for mergeConsecutiveStores. It is used for
744  /// store chains that are composed entirely of loaded values.
745  bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
746  unsigned NumConsecutiveStores, EVT MemVT,
747  SDNode *Root, bool AllowVectors,
748  bool IsNonTemporalStore, bool IsNonTemporalLoad);
749 
750  /// Merge consecutive store operations into a wide store.
751  /// This optimization uses wide integers or vectors when possible.
752  /// \return true if stores were merged.
753  bool mergeConsecutiveStores(StoreSDNode *St);
754 
755  /// Try to transform a truncation where C is a constant:
756  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
757  ///
758  /// \p N needs to be a truncation and its first operand an AND. Other
759  /// requirements are checked by the function (e.g. that trunc is
760  /// single-use) and if missed an empty SDValue is returned.
761  SDValue distributeTruncateThroughAnd(SDNode *N);
762 
763  /// Helper function to determine whether the target supports operation
764  /// given by \p Opcode for type \p VT, that is, whether the operation
765  /// is legal or custom before legalizing operations, and whether is
766  /// legal (but not custom) after legalization.
767  bool hasOperation(unsigned Opcode, EVT VT) {
768  return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
769  }
770 
771  public:
772  /// Runs the dag combiner on all nodes in the work list
773  void Run(CombineLevel AtLevel);
774 
775  SelectionDAG &getDAG() const { return DAG; }
776 
777  /// Returns a type large enough to hold any valid shift amount - before type
778  /// legalization these can be huge.
779  EVT getShiftAmountTy(EVT LHSTy) {
780  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
781  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
782  }
783 
784  /// This method returns true if we are running before type legalization or
785  /// if the specified VT is legal.
786  bool isTypeLegal(const EVT &VT) {
787  if (!LegalTypes) return true;
788  return TLI.isTypeLegal(VT);
789  }
790 
791  /// Convenience wrapper around TargetLowering::getSetCCResultType
792  EVT getSetCCResultType(EVT VT) const {
793  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
794  }
795 
796  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
797  SDValue OrigLoad, SDValue ExtLoad,
798  ISD::NodeType ExtType);
799  };
800 
801 /// This class is a DAGUpdateListener that removes any deleted
802 /// nodes from the worklist.
803 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
804  DAGCombiner &DC;
805 
806 public:
807  explicit WorklistRemover(DAGCombiner &dc)
808  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
809 
810  void NodeDeleted(SDNode *N, SDNode *E) override {
811  DC.removeFromWorklist(N);
812  }
813 };
814 
815 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
816  DAGCombiner &DC;
817 
818 public:
819  explicit WorklistInserter(DAGCombiner &dc)
820  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
821 
822  // FIXME: Ideally we could add N to the worklist, but this causes exponential
823  // compile time costs in large DAGs, e.g. Halide.
824  void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
825 };
826 
827 } // end anonymous namespace
828 
829 //===----------------------------------------------------------------------===//
830 // TargetLowering::DAGCombinerInfo implementation
831 //===----------------------------------------------------------------------===//
832 
834  ((DAGCombiner*)DC)->AddToWorklist(N);
835 }
836 
838 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
839  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
840 }
841 
843 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
844  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
845 }
846 
848 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
849  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
850 }
851 
854  return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
855 }
856 
859  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
860 }
861 
862 //===----------------------------------------------------------------------===//
863 // Helper Functions
864 //===----------------------------------------------------------------------===//
865 
866 void DAGCombiner::deleteAndRecombine(SDNode *N) {
867  removeFromWorklist(N);
868 
869  // If the operands of this node are only used by the node, they will now be
870  // dead. Make sure to re-visit them and recursively delete dead nodes.
871  for (const SDValue &Op : N->ops())
872  // For an operand generating multiple values, one of the values may
873  // become dead allowing further simplification (e.g. split index
874  // arithmetic from an indexed load).
875  if (Op->hasOneUse() || Op->getNumValues() > 1)
876  AddToWorklist(Op.getNode());
877 
878  DAG.DeleteNode(N);
879 }
880 
881 // APInts must be the same size for most operations, this helper
882 // function zero extends the shorter of the pair so that they match.
883 // We provide an Offset so that we can create bitwidths that won't overflow.
884 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
885  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
886  LHS = LHS.zext(Bits);
887  RHS = RHS.zext(Bits);
888 }
889 
890 // Return true if this node is a setcc, or is a select_cc
891 // that selects between the target values used for true and false, making it
892 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
893 // the appropriate nodes based on the type of node we are checking. This
894 // simplifies life a bit for the callers.
895 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
896  SDValue &CC, bool MatchStrict) const {
897  if (N.getOpcode() == ISD::SETCC) {
898  LHS = N.getOperand(0);
899  RHS = N.getOperand(1);
900  CC = N.getOperand(2);
901  return true;
902  }
903 
904  if (MatchStrict &&
905  (N.getOpcode() == ISD::STRICT_FSETCC ||
906  N.getOpcode() == ISD::STRICT_FSETCCS)) {
907  LHS = N.getOperand(1);
908  RHS = N.getOperand(2);
909  CC = N.getOperand(3);
910  return true;
911  }
912 
913  if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
914  !TLI.isConstFalseVal(N.getOperand(3)))
915  return false;
916 
917  if (TLI.getBooleanContents(N.getValueType()) ==
919  return false;
920 
921  LHS = N.getOperand(0);
922  RHS = N.getOperand(1);
923  CC = N.getOperand(4);
924  return true;
925 }
926 
927 /// Return true if this is a SetCC-equivalent operation with only one use.
928 /// If this is true, it allows the users to invert the operation for free when
929 /// it is profitable to do so.
930 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
931  SDValue N0, N1, N2;
932  if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
933  return true;
934  return false;
935 }
936 
937 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
938  if (!ScalarTy.isSimple())
939  return false;
940 
941  uint64_t MaskForTy = 0ULL;
942  switch (ScalarTy.getSimpleVT().SimpleTy) {
943  case MVT::i8:
944  MaskForTy = 0xFFULL;
945  break;
946  case MVT::i16:
947  MaskForTy = 0xFFFFULL;
948  break;
949  case MVT::i32:
950  MaskForTy = 0xFFFFFFFFULL;
951  break;
952  default:
953  return false;
954  break;
955  }
956 
957  APInt Val;
958  if (ISD::isConstantSplatVector(N, Val))
959  return Val.getLimitedValue() == MaskForTy;
960 
961  return false;
962 }
963 
964 // Determines if it is a constant integer or a splat/build vector of constant
965 // integers (and undefs).
966 // Do not permit build vector implicit truncation.
967 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
968  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
969  return !(Const->isOpaque() && NoOpaques);
970  if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
971  return false;
972  unsigned BitWidth = N.getScalarValueSizeInBits();
973  for (const SDValue &Op : N->op_values()) {
974  if (Op.isUndef())
975  continue;
976  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
977  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
978  (Const->isOpaque() && NoOpaques))
979  return false;
980  }
981  return true;
982 }
983 
984 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
985 // undef's.
986 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
987  if (V.getOpcode() != ISD::BUILD_VECTOR)
988  return false;
989  return isConstantOrConstantVector(V, NoOpaques) ||
991 }
992 
993 // Determine if this an indexed load with an opaque target constant index.
994 static bool canSplitIdx(LoadSDNode *LD) {
995  return MaySplitLoadIndex &&
996  (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
997  !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
998 }
999 
1000 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1001  const SDLoc &DL,
1002  SDNode *N,
1003  SDValue N0,
1004  SDValue N1) {
1005  // Currently this only tries to ensure we don't undo the GEP splits done by
1006  // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1007  // we check if the following transformation would be problematic:
1008  // (load/store (add, (add, x, offset1), offset2)) ->
1009  // (load/store (add, x, offset1+offset2)).
1010 
1011  // (load/store (add, (add, x, y), offset2)) ->
1012  // (load/store (add, (add, x, offset2), y)).
1013 
1014  if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1015  return false;
1016 
1017  auto *C2 = dyn_cast<ConstantSDNode>(N1);
1018  if (!C2)
1019  return false;
1020 
1021  const APInt &C2APIntVal = C2->getAPIntValue();
1022  if (C2APIntVal.getSignificantBits() > 64)
1023  return false;
1024 
1025  if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1026  if (N0.hasOneUse())
1027  return false;
1028 
1029  const APInt &C1APIntVal = C1->getAPIntValue();
1030  const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1031  if (CombinedValueIntVal.getSignificantBits() > 64)
1032  return false;
1033  const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1034 
1035  for (SDNode *Node : N->uses()) {
1036  if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1037  // Is x[offset2] already not a legal addressing mode? If so then
1038  // reassociating the constants breaks nothing (we test offset2 because
1039  // that's the one we hope to fold into the load or store).
1041  AM.HasBaseReg = true;
1042  AM.BaseOffs = C2APIntVal.getSExtValue();
1043  EVT VT = LoadStore->getMemoryVT();
1044  unsigned AS = LoadStore->getAddressSpace();
1045  Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1046  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1047  continue;
1048 
1049  // Would x[offset1+offset2] still be a legal addressing mode?
1050  AM.BaseOffs = CombinedValue;
1051  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1052  return true;
1053  }
1054  }
1055  } else {
1056  if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1057  if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1058  return false;
1059 
1060  for (SDNode *Node : N->uses()) {
1061  auto *LoadStore = dyn_cast<MemSDNode>(Node);
1062  if (!LoadStore)
1063  return false;
1064 
1065  // Is x[offset2] a legal addressing mode? If so then
1066  // reassociating the constants breaks address pattern
1068  AM.HasBaseReg = true;
1069  AM.BaseOffs = C2APIntVal.getSExtValue();
1070  EVT VT = LoadStore->getMemoryVT();
1071  unsigned AS = LoadStore->getAddressSpace();
1072  Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1073  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1074  return false;
1075  }
1076  return true;
1077  }
1078 
1079  return false;
1080 }
1081 
1082 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1083 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1084 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1085  SDValue N0, SDValue N1) {
1086  EVT VT = N0.getValueType();
1087 
1088  if (N0.getOpcode() != Opc)
1089  return SDValue();
1090 
1091  SDValue N00 = N0.getOperand(0);
1092  SDValue N01 = N0.getOperand(1);
1093 
1096  // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1097  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1098  return DAG.getNode(Opc, DL, VT, N00, OpNode);
1099  return SDValue();
1100  }
1101  if (TLI.isReassocProfitable(DAG, N0, N1)) {
1102  // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1103  // iff (op x, c1) has one use
1104  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1);
1105  return DAG.getNode(Opc, DL, VT, OpNode, N01);
1106  }
1107  }
1108 
1109  // Check for repeated operand logic simplifications.
1110  if (Opc == ISD::AND || Opc == ISD::OR) {
1111  // (N00 & N01) & N00 --> N00 & N01
1112  // (N00 & N01) & N01 --> N00 & N01
1113  // (N00 | N01) | N00 --> N00 | N01
1114  // (N00 | N01) | N01 --> N00 | N01
1115  if (N1 == N00 || N1 == N01)
1116  return N0;
1117  }
1118  if (Opc == ISD::XOR) {
1119  // (N00 ^ N01) ^ N00 --> N01
1120  if (N1 == N00)
1121  return N01;
1122  // (N00 ^ N01) ^ N01 --> N00
1123  if (N1 == N01)
1124  return N00;
1125  }
1126 
1127  if (TLI.isReassocProfitable(DAG, N0, N1)) {
1128  if (N1 != N01) {
1129  // Reassociate if (op N00, N1) already exist
1130  if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1131  // if Op (Op N00, N1), N01 already exist
1132  // we need to stop reassciate to avoid dead loop
1133  if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1134  return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1135  }
1136  }
1137 
1138  if (N1 != N00) {
1139  // Reassociate if (op N01, N1) already exist
1140  if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1141  // if Op (Op N01, N1), N00 already exist
1142  // we need to stop reassciate to avoid dead loop
1143  if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1144  return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1145  }
1146  }
1147  }
1148 
1149  return SDValue();
1150 }
1151 
1152 // Try to reassociate commutative binops.
1153 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1154  SDValue N1, SDNodeFlags Flags) {
1155  assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1156 
1157  // Floating-point reassociation is not allowed without loose FP math.
1158  if (N0.getValueType().isFloatingPoint() ||
1160  if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1161  return SDValue();
1162 
1163  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1164  return Combined;
1165  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1166  return Combined;
1167  return SDValue();
1168 }
1169 
1170 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1171  bool AddTo) {
1172  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1173  ++NodesCombined;
1174  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1175  To[0].dump(&DAG);
1176  dbgs() << " and " << NumTo - 1 << " other values\n");
1177  for (unsigned i = 0, e = NumTo; i != e; ++i)
1178  assert((!To[i].getNode() ||
1179  N->getValueType(i) == To[i].getValueType()) &&
1180  "Cannot combine value to value of different type!");
1181 
1182  WorklistRemover DeadNodes(*this);
1183  DAG.ReplaceAllUsesWith(N, To);
1184  if (AddTo) {
1185  // Push the new nodes and any users onto the worklist
1186  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1187  if (To[i].getNode())
1188  AddToWorklistWithUsers(To[i].getNode());
1189  }
1190  }
1191 
1192  // Finally, if the node is now dead, remove it from the graph. The node
1193  // may not be dead if the replacement process recursively simplified to
1194  // something else needing this node.
1195  if (N->use_empty())
1196  deleteAndRecombine(N);
1197  return SDValue(N, 0);
1198 }
1199 
1200 void DAGCombiner::
1201 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1202  // Replace the old value with the new one.
1203  ++NodesCombined;
1204  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1205  dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1206 
1207  // Replace all uses. If any nodes become isomorphic to other nodes and
1208  // are deleted, make sure to remove them from our worklist.
1209  WorklistRemover DeadNodes(*this);
1210  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1211 
1212  // Push the new node and any (possibly new) users onto the worklist.
1213  AddToWorklistWithUsers(TLO.New.getNode());
1214 
1215  // Finally, if the node is now dead, remove it from the graph. The node
1216  // may not be dead if the replacement process recursively simplified to
1217  // something else needing this node.
1218  if (TLO.Old->use_empty())
1219  deleteAndRecombine(TLO.Old.getNode());
1220 }
1221 
1222 /// Check the specified integer node value to see if it can be simplified or if
1223 /// things it uses can be simplified by bit propagation. If so, return true.
1224 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1225  const APInt &DemandedElts,
1226  bool AssumeSingleUse) {
1227  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1228  KnownBits Known;
1229  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1230  AssumeSingleUse))
1231  return false;
1232 
1233  // Revisit the node.
1234  AddToWorklist(Op.getNode());
1235 
1236  CommitTargetLoweringOpt(TLO);
1237  return true;
1238 }
1239 
1240 /// Check the specified vector node value to see if it can be simplified or
1241 /// if things it uses can be simplified as it only uses some of the elements.
1242 /// If so, return true.
1243 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1244  const APInt &DemandedElts,
1245  bool AssumeSingleUse) {
1246  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1247  APInt KnownUndef, KnownZero;
1248  if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1249  TLO, 0, AssumeSingleUse))
1250  return false;
1251 
1252  // Revisit the node.
1253  AddToWorklist(Op.getNode());
1254 
1255  CommitTargetLoweringOpt(TLO);
1256  return true;
1257 }
1258 
1259 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1260  SDLoc DL(Load);
1261  EVT VT = Load->getValueType(0);
1262  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1263 
1264  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1265  Trunc.dump(&DAG); dbgs() << '\n');
1266  WorklistRemover DeadNodes(*this);
1267  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1268  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1269  deleteAndRecombine(Load);
1270  AddToWorklist(Trunc.getNode());
1271 }
1272 
1273 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1274  Replace = false;
1275  SDLoc DL(Op);
1276  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1277  LoadSDNode *LD = cast<LoadSDNode>(Op);
1278  EVT MemVT = LD->getMemoryVT();
1280  : LD->getExtensionType();
1281  Replace = true;
1282  return DAG.getExtLoad(ExtType, DL, PVT,
1283  LD->getChain(), LD->getBasePtr(),
1284  MemVT, LD->getMemOperand());
1285  }
1286 
1287  unsigned Opc = Op.getOpcode();
1288  switch (Opc) {
1289  default: break;
1290  case ISD::AssertSext:
1291  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1292  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1293  break;
1294  case ISD::AssertZext:
1295  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1296  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1297  break;
1298  case ISD::Constant: {
1299  unsigned ExtOpc =
1300  Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1301  return DAG.getNode(ExtOpc, DL, PVT, Op);
1302  }
1303  }
1304 
1305  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1306  return SDValue();
1307  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1308 }
1309 
1310 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1312  return SDValue();
1313  EVT OldVT = Op.getValueType();
1314  SDLoc DL(Op);
1315  bool Replace = false;
1316  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1317  if (!NewOp.getNode())
1318  return SDValue();
1319  AddToWorklist(NewOp.getNode());
1320 
1321  if (Replace)
1322  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1323  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1324  DAG.getValueType(OldVT));
1325 }
1326 
1327 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1328  EVT OldVT = Op.getValueType();
1329  SDLoc DL(Op);
1330  bool Replace = false;
1331  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1332  if (!NewOp.getNode())
1333  return SDValue();
1334  AddToWorklist(NewOp.getNode());
1335 
1336  if (Replace)
1337  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1338  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1339 }
1340 
1341 /// Promote the specified integer binary operation if the target indicates it is
1342 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1343 /// i32 since i16 instructions are longer.
1344 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1345  if (!LegalOperations)
1346  return SDValue();
1347 
1348  EVT VT = Op.getValueType();
1349  if (VT.isVector() || !VT.isInteger())
1350  return SDValue();
1351 
1352  // If operation type is 'undesirable', e.g. i16 on x86, consider
1353  // promoting it.
1354  unsigned Opc = Op.getOpcode();
1355  if (TLI.isTypeDesirableForOp(Opc, VT))
1356  return SDValue();
1357 
1358  EVT PVT = VT;
1359  // Consult target whether it is a good idea to promote this operation and
1360  // what's the right type to promote it to.
1361  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1362  assert(PVT != VT && "Don't know what type to promote to!");
1363 
1364  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1365 
1366  bool Replace0 = false;
1367  SDValue N0 = Op.getOperand(0);
1368  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1369 
1370  bool Replace1 = false;
1371  SDValue N1 = Op.getOperand(1);
1372  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1373  SDLoc DL(Op);
1374 
1375  SDValue RV =
1376  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1377 
1378  // We are always replacing N0/N1's use in N and only need additional
1379  // replacements if there are additional uses.
1380  // Note: We are checking uses of the *nodes* (SDNode) rather than values
1381  // (SDValue) here because the node may reference multiple values
1382  // (for example, the chain value of a load node).
1383  Replace0 &= !N0->hasOneUse();
1384  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1385 
1386  // Combine Op here so it is preserved past replacements.
1387  CombineTo(Op.getNode(), RV);
1388 
1389  // If operands have a use ordering, make sure we deal with
1390  // predecessor first.
1391  if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1392  std::swap(N0, N1);
1393  std::swap(NN0, NN1);
1394  }
1395 
1396  if (Replace0) {
1397  AddToWorklist(NN0.getNode());
1398  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1399  }
1400  if (Replace1) {
1401  AddToWorklist(NN1.getNode());
1402  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1403  }
1404  return Op;
1405  }
1406  return SDValue();
1407 }
1408 
1409 /// Promote the specified integer shift operation if the target indicates it is
1410 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1411 /// i32 since i16 instructions are longer.
1412 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1413  if (!LegalOperations)
1414  return SDValue();
1415 
1416  EVT VT = Op.getValueType();
1417  if (VT.isVector() || !VT.isInteger())
1418  return SDValue();
1419 
1420  // If operation type is 'undesirable', e.g. i16 on x86, consider
1421  // promoting it.
1422  unsigned Opc = Op.getOpcode();
1423  if (TLI.isTypeDesirableForOp(Opc, VT))
1424  return SDValue();
1425 
1426  EVT PVT = VT;
1427  // Consult target whether it is a good idea to promote this operation and
1428  // what's the right type to promote it to.
1429  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1430  assert(PVT != VT && "Don't know what type to promote to!");
1431 
1432  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1433 
1434  bool Replace = false;
1435  SDValue N0 = Op.getOperand(0);
1436  if (Opc == ISD::SRA)
1437  N0 = SExtPromoteOperand(N0, PVT);
1438  else if (Opc == ISD::SRL)
1439  N0 = ZExtPromoteOperand(N0, PVT);
1440  else
1441  N0 = PromoteOperand(N0, PVT, Replace);
1442 
1443  if (!N0.getNode())
1444  return SDValue();
1445 
1446  SDLoc DL(Op);
1447  SDValue N1 = Op.getOperand(1);
1448  SDValue RV =
1449  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1450 
1451  if (Replace)
1452  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1453 
1454  // Deal with Op being deleted.
1455  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1456  return RV;
1457  }
1458  return SDValue();
1459 }
1460 
1461 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1462  if (!LegalOperations)
1463  return SDValue();
1464 
1465  EVT VT = Op.getValueType();
1466  if (VT.isVector() || !VT.isInteger())
1467  return SDValue();
1468 
1469  // If operation type is 'undesirable', e.g. i16 on x86, consider
1470  // promoting it.
1471  unsigned Opc = Op.getOpcode();
1472  if (TLI.isTypeDesirableForOp(Opc, VT))
1473  return SDValue();
1474 
1475  EVT PVT = VT;
1476  // Consult target whether it is a good idea to promote this operation and
1477  // what's the right type to promote it to.
1478  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1479  assert(PVT != VT && "Don't know what type to promote to!");
1480  // fold (aext (aext x)) -> (aext x)
1481  // fold (aext (zext x)) -> (zext x)
1482  // fold (aext (sext x)) -> (sext x)
1483  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1484  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1485  }
1486  return SDValue();
1487 }
1488 
1489 bool DAGCombiner::PromoteLoad(SDValue Op) {
1490  if (!LegalOperations)
1491  return false;
1492 
1493  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1494  return false;
1495 
1496  EVT VT = Op.getValueType();
1497  if (VT.isVector() || !VT.isInteger())
1498  return false;
1499 
1500  // If operation type is 'undesirable', e.g. i16 on x86, consider
1501  // promoting it.
1502  unsigned Opc = Op.getOpcode();
1503  if (TLI.isTypeDesirableForOp(Opc, VT))
1504  return false;
1505 
1506  EVT PVT = VT;
1507  // Consult target whether it is a good idea to promote this operation and
1508  // what's the right type to promote it to.
1509  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1510  assert(PVT != VT && "Don't know what type to promote to!");
1511 
1512  SDLoc DL(Op);
1513  SDNode *N = Op.getNode();
1514  LoadSDNode *LD = cast<LoadSDNode>(N);
1515  EVT MemVT = LD->getMemoryVT();
1517  : LD->getExtensionType();
1518  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1519  LD->getChain(), LD->getBasePtr(),
1520  MemVT, LD->getMemOperand());
1521  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1522 
1523  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1524  Result.dump(&DAG); dbgs() << '\n');
1525  WorklistRemover DeadNodes(*this);
1526  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1527  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1528  deleteAndRecombine(N);
1529  AddToWorklist(Result.getNode());
1530  return true;
1531  }
1532  return false;
1533 }
1534 
1535 /// Recursively delete a node which has no uses and any operands for
1536 /// which it is the only use.
1537 ///
1538 /// Note that this both deletes the nodes and removes them from the worklist.
1539 /// It also adds any nodes who have had a user deleted to the worklist as they
1540 /// may now have only one use and subject to other combines.
1541 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1542  if (!N->use_empty())
1543  return false;
1544 
1546  Nodes.insert(N);
1547  do {
1548  N = Nodes.pop_back_val();
1549  if (!N)
1550  continue;
1551 
1552  if (N->use_empty()) {
1553  for (const SDValue &ChildN : N->op_values())
1554  Nodes.insert(ChildN.getNode());
1555 
1556  removeFromWorklist(N);
1557  DAG.DeleteNode(N);
1558  } else {
1559  AddToWorklist(N);
1560  }
1561  } while (!Nodes.empty());
1562  return true;
1563 }
1564 
1565 //===----------------------------------------------------------------------===//
1566 // Main DAG Combiner implementation
1567 //===----------------------------------------------------------------------===//
1568 
1569 void DAGCombiner::Run(CombineLevel AtLevel) {
1570  // set the instance variables, so that the various visit routines may use it.
1571  Level = AtLevel;
1572  LegalDAG = Level >= AfterLegalizeDAG;
1573  LegalOperations = Level >= AfterLegalizeVectorOps;
1574  LegalTypes = Level >= AfterLegalizeTypes;
1575 
1576  WorklistInserter AddNodes(*this);
1577 
1578  // Add all the dag nodes to the worklist.
1579  for (SDNode &Node : DAG.allnodes())
1580  AddToWorklist(&Node);
1581 
1582  // Create a dummy node (which is not added to allnodes), that adds a reference
1583  // to the root node, preventing it from being deleted, and tracking any
1584  // changes of the root.
1585  HandleSDNode Dummy(DAG.getRoot());
1586 
1587  // While we have a valid worklist entry node, try to combine it.
1588  while (SDNode *N = getNextWorklistEntry()) {
1589  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1590  // N is deleted from the DAG, since they too may now be dead or may have a
1591  // reduced number of uses, allowing other xforms.
1592  if (recursivelyDeleteUnusedNodes(N))
1593  continue;
1594 
1595  WorklistRemover DeadNodes(*this);
1596 
1597  // If this combine is running after legalizing the DAG, re-legalize any
1598  // nodes pulled off the worklist.
1599  if (LegalDAG) {
1600  SmallSetVector<SDNode *, 16> UpdatedNodes;
1601  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1602 
1603  for (SDNode *LN : UpdatedNodes)
1604  AddToWorklistWithUsers(LN);
1605 
1606  if (!NIsValid)
1607  continue;
1608  }
1609 
1610  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1611 
1612  // Add any operands of the new node which have not yet been combined to the
1613  // worklist as well. Because the worklist uniques things already, this
1614  // won't repeatedly process the same operand.
1615  CombinedNodes.insert(N);
1616  for (const SDValue &ChildN : N->op_values())
1617  if (!CombinedNodes.count(ChildN.getNode()))
1618  AddToWorklist(ChildN.getNode());
1619 
1620  SDValue RV = combine(N);
1621 
1622  if (!RV.getNode())
1623  continue;
1624 
1625  ++NodesCombined;
1626 
1627  // If we get back the same node we passed in, rather than a new node or
1628  // zero, we know that the node must have defined multiple values and
1629  // CombineTo was used. Since CombineTo takes care of the worklist
1630  // mechanics for us, we have no work to do in this case.
1631  if (RV.getNode() == N)
1632  continue;
1633 
1634  assert(N->getOpcode() != ISD::DELETED_NODE &&
1635  RV.getOpcode() != ISD::DELETED_NODE &&
1636  "Node was deleted but visit returned new node!");
1637 
1638  LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1639 
1640  if (N->getNumValues() == RV->getNumValues())
1641  DAG.ReplaceAllUsesWith(N, RV.getNode());
1642  else {
1643  assert(N->getValueType(0) == RV.getValueType() &&
1644  N->getNumValues() == 1 && "Type mismatch");
1645  DAG.ReplaceAllUsesWith(N, &RV);
1646  }
1647 
1648  // Push the new node and any users onto the worklist. Omit this if the
1649  // new node is the EntryToken (e.g. if a store managed to get optimized
1650  // out), because re-visiting the EntryToken and its users will not uncover
1651  // any additional opportunities, but there may be a large number of such
1652  // users, potentially causing compile time explosion.
1653  if (RV.getOpcode() != ISD::EntryToken) {
1654  AddToWorklist(RV.getNode());
1655  AddUsersToWorklist(RV.getNode());
1656  }
1657 
1658  // Finally, if the node is now dead, remove it from the graph. The node
1659  // may not be dead if the replacement process recursively simplified to
1660  // something else needing this node. This will also take care of adding any
1661  // operands which have lost a user to the worklist.
1662  recursivelyDeleteUnusedNodes(N);
1663  }
1664 
1665  // If the root changed (e.g. it was a dead load, update the root).
1666  DAG.setRoot(Dummy.getValue());
1667  DAG.RemoveDeadNodes();
1668 }
1669 
1670 SDValue DAGCombiner::visit(SDNode *N) {
1671  switch (N->getOpcode()) {
1672  default: break;
1673  case ISD::TokenFactor: return visitTokenFactor(N);
1674  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1675  case ISD::ADD: return visitADD(N);
1676  case ISD::SUB: return visitSUB(N);
1677  case ISD::SADDSAT:
1678  case ISD::UADDSAT: return visitADDSAT(N);
1679  case ISD::SSUBSAT:
1680  case ISD::USUBSAT: return visitSUBSAT(N);
1681  case ISD::ADDC: return visitADDC(N);
1682  case ISD::SADDO:
1683  case ISD::UADDO: return visitADDO(N);
1684  case ISD::SUBC: return visitSUBC(N);
1685  case ISD::SSUBO:
1686  case ISD::USUBO: return visitSUBO(N);
1687  case ISD::ADDE: return visitADDE(N);
1688  case ISD::ADDCARRY: return visitADDCARRY(N);
1689  case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1690  case ISD::SUBE: return visitSUBE(N);
1691  case ISD::SUBCARRY: return visitSUBCARRY(N);
1692  case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1693  case ISD::SMULFIX:
1694  case ISD::SMULFIXSAT:
1695  case ISD::UMULFIX:
1696  case ISD::UMULFIXSAT: return visitMULFIX(N);
1697  case ISD::MUL: return visitMUL(N);
1698  case ISD::SDIV: return visitSDIV(N);
1699  case ISD::UDIV: return visitUDIV(N);
1700  case ISD::SREM:
1701  case ISD::UREM: return visitREM(N);
1702  case ISD::MULHU: return visitMULHU(N);
1703  case ISD::MULHS: return visitMULHS(N);
1704  case ISD::AVGFLOORS:
1705  case ISD::AVGFLOORU:
1706  case ISD::AVGCEILS:
1707  case ISD::AVGCEILU: return visitAVG(N);
1708  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1709  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1710  case ISD::SMULO:
1711  case ISD::UMULO: return visitMULO(N);
1712  case ISD::SMIN:
1713  case ISD::SMAX:
1714  case ISD::UMIN:
1715  case ISD::UMAX: return visitIMINMAX(N);
1716  case ISD::AND: return visitAND(N);
1717  case ISD::OR: return visitOR(N);
1718  case ISD::XOR: return visitXOR(N);
1719  case ISD::SHL: return visitSHL(N);
1720  case ISD::SRA: return visitSRA(N);
1721  case ISD::SRL: return visitSRL(N);
1722  case ISD::ROTR:
1723  case ISD::ROTL: return visitRotate(N);
1724  case ISD::FSHL:
1725  case ISD::FSHR: return visitFunnelShift(N);
1726  case ISD::SSHLSAT:
1727  case ISD::USHLSAT: return visitSHLSAT(N);
1728  case ISD::ABS: return visitABS(N);
1729  case ISD::BSWAP: return visitBSWAP(N);
1730  case ISD::BITREVERSE: return visitBITREVERSE(N);
1731  case ISD::CTLZ: return visitCTLZ(N);
1732  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1733  case ISD::CTTZ: return visitCTTZ(N);
1734  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1735  case ISD::CTPOP: return visitCTPOP(N);
1736  case ISD::SELECT: return visitSELECT(N);
1737  case ISD::VSELECT: return visitVSELECT(N);
1738  case ISD::SELECT_CC: return visitSELECT_CC(N);
1739  case ISD::SETCC: return visitSETCC(N);
1740  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1741  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1742  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1743  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1744  case ISD::AssertSext:
1745  case ISD::AssertZext: return visitAssertExt(N);
1746  case ISD::AssertAlign: return visitAssertAlign(N);
1747  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1749  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1750  case ISD::TRUNCATE: return visitTRUNCATE(N);
1751  case ISD::BITCAST: return visitBITCAST(N);
1752  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1753  case ISD::FADD: return visitFADD(N);
1754  case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1755  case ISD::FSUB: return visitFSUB(N);
1756  case ISD::FMUL: return visitFMUL(N);
1757  case ISD::FMA: return visitFMA(N);
1758  case ISD::FDIV: return visitFDIV(N);
1759  case ISD::FREM: return visitFREM(N);
1760  case ISD::FSQRT: return visitFSQRT(N);
1761  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1762  case ISD::FPOW: return visitFPOW(N);
1763  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1764  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1765  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1766  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1767  case ISD::FP_ROUND: return visitFP_ROUND(N);
1768  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1769  case ISD::FNEG: return visitFNEG(N);
1770  case ISD::FABS: return visitFABS(N);
1771  case ISD::FFLOOR: return visitFFLOOR(N);
1772  case ISD::FMINNUM:
1773  case ISD::FMAXNUM:
1774  case ISD::FMINIMUM:
1775  case ISD::FMAXIMUM: return visitFMinMax(N);
1776  case ISD::FCEIL: return visitFCEIL(N);
1777  case ISD::FTRUNC: return visitFTRUNC(N);
1778  case ISD::BRCOND: return visitBRCOND(N);
1779  case ISD::BR_CC: return visitBR_CC(N);
1780  case ISD::LOAD: return visitLOAD(N);
1781  case ISD::STORE: return visitSTORE(N);
1782  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1783  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1784  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1785  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1786  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1787  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1788  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1789  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1790  case ISD::MGATHER: return visitMGATHER(N);
1791  case ISD::MLOAD: return visitMLOAD(N);
1792  case ISD::MSCATTER: return visitMSCATTER(N);
1793  case ISD::MSTORE: return visitMSTORE(N);
1794  case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1795  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1796  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1797  case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1798  case ISD::FREEZE: return visitFREEZE(N);
1799  case ISD::VECREDUCE_FADD:
1800  case ISD::VECREDUCE_FMUL:
1801  case ISD::VECREDUCE_ADD:
1802  case ISD::VECREDUCE_MUL:
1803  case ISD::VECREDUCE_AND:
1804  case ISD::VECREDUCE_OR:
1805  case ISD::VECREDUCE_XOR:
1806  case ISD::VECREDUCE_SMAX:
1807  case ISD::VECREDUCE_SMIN:
1808  case ISD::VECREDUCE_UMAX:
1809  case ISD::VECREDUCE_UMIN:
1810  case ISD::VECREDUCE_FMAX:
1811  case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1812 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1813 #include "llvm/IR/VPIntrinsics.def"
1814  return visitVPOp(N);
1815  }
1816  return SDValue();
1817 }
1818 
1820  SDValue RV;
1821  if (!DisableGenericCombines)
1822  RV = visit(N);
1823 
1824  // If nothing happened, try a target-specific DAG combine.
1825  if (!RV.getNode()) {
1826  assert(N->getOpcode() != ISD::DELETED_NODE &&
1827  "Node was deleted but visit returned NULL!");
1828 
1829  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1830  TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1831 
1832  // Expose the DAG combiner to the target combiner impls.
1834  DagCombineInfo(DAG, Level, false, this);
1835 
1836  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1837  }
1838  }
1839 
1840  // If nothing happened still, try promoting the operation.
1841  if (!RV.getNode()) {
1842  switch (N->getOpcode()) {
1843  default: break;
1844  case ISD::ADD:
1845  case ISD::SUB:
1846  case ISD::MUL:
1847  case ISD::AND:
1848  case ISD::OR:
1849  case ISD::XOR:
1850  RV = PromoteIntBinOp(SDValue(N, 0));
1851  break;
1852  case ISD::SHL:
1853  case ISD::SRA:
1854  case ISD::SRL:
1855  RV = PromoteIntShiftOp(SDValue(N, 0));
1856  break;
1857  case ISD::SIGN_EXTEND:
1858  case ISD::ZERO_EXTEND:
1859  case ISD::ANY_EXTEND:
1860  RV = PromoteExtend(SDValue(N, 0));
1861  break;
1862  case ISD::LOAD:
1863  if (PromoteLoad(SDValue(N, 0)))
1864  RV = SDValue(N, 0);
1865  break;
1866  }
1867  }
1868 
1869  // If N is a commutative binary node, try to eliminate it if the commuted
1870  // version is already present in the DAG.
1871  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1872  N->getNumValues() == 1) {
1873  SDValue N0 = N->getOperand(0);
1874  SDValue N1 = N->getOperand(1);
1875 
1876  // Constant operands are canonicalized to RHS.
1877  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1878  SDValue Ops[] = {N1, N0};
1879  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1880  N->getFlags());
1881  if (CSENode)
1882  return SDValue(CSENode, 0);
1883  }
1884  }
1885 
1886  return RV;
1887 }
1888 
1889 /// Given a node, return its input chain if it has one, otherwise return a null
1890 /// sd operand.
1892  if (unsigned NumOps = N->getNumOperands()) {
1893  if (N->getOperand(0).getValueType() == MVT::Other)
1894  return N->getOperand(0);
1895  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1896  return N->getOperand(NumOps-1);
1897  for (unsigned i = 1; i < NumOps-1; ++i)
1898  if (N->getOperand(i).getValueType() == MVT::Other)
1899  return N->getOperand(i);
1900  }
1901  return SDValue();
1902 }
1903 
1904 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1905  // If N has two operands, where one has an input chain equal to the other,
1906  // the 'other' chain is redundant.
1907  if (N->getNumOperands() == 2) {
1908  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1909  return N->getOperand(0);
1910  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1911  return N->getOperand(1);
1912  }
1913 
1914  // Don't simplify token factors if optnone.
1915  if (OptLevel == CodeGenOpt::None)
1916  return SDValue();
1917 
1918  // Don't simplify the token factor if the node itself has too many operands.
1919  if (N->getNumOperands() > TokenFactorInlineLimit)
1920  return SDValue();
1921 
1922  // If the sole user is a token factor, we should make sure we have a
1923  // chance to merge them together. This prevents TF chains from inhibiting
1924  // optimizations.
1925  if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1926  AddToWorklist(*(N->use_begin()));
1927 
1928  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1929  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1930  SmallPtrSet<SDNode*, 16> SeenOps;
1931  bool Changed = false; // If we should replace this token factor.
1932 
1933  // Start out with this token factor.
1934  TFs.push_back(N);
1935 
1936  // Iterate through token factors. The TFs grows when new token factors are
1937  // encountered.
1938  for (unsigned i = 0; i < TFs.size(); ++i) {
1939  // Limit number of nodes to inline, to avoid quadratic compile times.
1940  // We have to add the outstanding Token Factors to Ops, otherwise we might
1941  // drop Ops from the resulting Token Factors.
1942  if (Ops.size() > TokenFactorInlineLimit) {
1943  for (unsigned j = i; j < TFs.size(); j++)
1944  Ops.emplace_back(TFs[j], 0);
1945  // Drop unprocessed Token Factors from TFs, so we do not add them to the
1946  // combiner worklist later.
1947  TFs.resize(i);
1948  break;
1949  }
1950 
1951  SDNode *TF = TFs[i];
1952  // Check each of the operands.
1953  for (const SDValue &Op : TF->op_values()) {
1954  switch (Op.getOpcode()) {
1955  case ISD::EntryToken:
1956  // Entry tokens don't need to be added to the list. They are
1957  // redundant.
1958  Changed = true;
1959  break;
1960 
1961  case ISD::TokenFactor:
1962  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1963  // Queue up for processing.
1964  TFs.push_back(Op.getNode());
1965  Changed = true;
1966  break;
1967  }
1969 
1970  default:
1971  // Only add if it isn't already in the list.
1972  if (SeenOps.insert(Op.getNode()).second)
1973  Ops.push_back(Op);
1974  else
1975  Changed = true;
1976  break;
1977  }
1978  }
1979  }
1980 
1981  // Re-visit inlined Token Factors, to clean them up in case they have been
1982  // removed. Skip the first Token Factor, as this is the current node.
1983  for (unsigned i = 1, e = TFs.size(); i < e; i++)
1984  AddToWorklist(TFs[i]);
1985 
1986  // Remove Nodes that are chained to another node in the list. Do so
1987  // by walking up chains breath-first stopping when we've seen
1988  // another operand. In general we must climb to the EntryNode, but we can exit
1989  // early if we find all remaining work is associated with just one operand as
1990  // no further pruning is possible.
1991 
1992  // List of nodes to search through and original Ops from which they originate.
1994  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1995  SmallPtrSet<SDNode *, 16> SeenChains;
1996  bool DidPruneOps = false;
1997 
1998  unsigned NumLeftToConsider = 0;
1999  for (const SDValue &Op : Ops) {
2000  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2001  OpWorkCount.push_back(1);
2002  }
2003 
2004  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2005  // If this is an Op, we can remove the op from the list. Remark any
2006  // search associated with it as from the current OpNumber.
2007  if (SeenOps.contains(Op)) {
2008  Changed = true;
2009  DidPruneOps = true;
2010  unsigned OrigOpNumber = 0;
2011  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2012  OrigOpNumber++;
2013  assert((OrigOpNumber != Ops.size()) &&
2014  "expected to find TokenFactor Operand");
2015  // Re-mark worklist from OrigOpNumber to OpNumber
2016  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2017  if (Worklist[i].second == OrigOpNumber) {
2018  Worklist[i].second = OpNumber;
2019  }
2020  }
2021  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2022  OpWorkCount[OrigOpNumber] = 0;
2023  NumLeftToConsider--;
2024  }
2025  // Add if it's a new chain
2026  if (SeenChains.insert(Op).second) {
2027  OpWorkCount[OpNumber]++;
2028  Worklist.push_back(std::make_pair(Op, OpNumber));
2029  }
2030  };
2031 
2032  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2033  // We need at least be consider at least 2 Ops to prune.
2034  if (NumLeftToConsider <= 1)
2035  break;
2036  auto CurNode = Worklist[i].first;
2037  auto CurOpNumber = Worklist[i].second;
2038  assert((OpWorkCount[CurOpNumber] > 0) &&
2039  "Node should not appear in worklist");
2040  switch (CurNode->getOpcode()) {
2041  case ISD::EntryToken:
2042  // Hitting EntryToken is the only way for the search to terminate without
2043  // hitting
2044  // another operand's search. Prevent us from marking this operand
2045  // considered.
2046  NumLeftToConsider++;
2047  break;
2048  case ISD::TokenFactor:
2049  for (const SDValue &Op : CurNode->op_values())
2050  AddToWorklist(i, Op.getNode(), CurOpNumber);
2051  break;
2052  case ISD::LIFETIME_START:
2053  case ISD::LIFETIME_END:
2054  case ISD::CopyFromReg:
2055  case ISD::CopyToReg:
2056  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2057  break;
2058  default:
2059  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2060  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2061  break;
2062  }
2063  OpWorkCount[CurOpNumber]--;
2064  if (OpWorkCount[CurOpNumber] == 0)
2065  NumLeftToConsider--;
2066  }
2067 
2068  // If we've changed things around then replace token factor.
2069  if (Changed) {
2070  SDValue Result;
2071  if (Ops.empty()) {
2072  // The entry token is the only possible outcome.
2073  Result = DAG.getEntryNode();
2074  } else {
2075  if (DidPruneOps) {
2076  SmallVector<SDValue, 8> PrunedOps;
2077  //
2078  for (const SDValue &Op : Ops) {
2079  if (SeenChains.count(Op.getNode()) == 0)
2080  PrunedOps.push_back(Op);
2081  }
2082  Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2083  } else {
2084  Result = DAG.getTokenFactor(SDLoc(N), Ops);
2085  }
2086  }
2087  return Result;
2088  }
2089  return SDValue();
2090 }
2091 
2092 /// MERGE_VALUES can always be eliminated.
2093 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2094  WorklistRemover DeadNodes(*this);
2095  // Replacing results may cause a different MERGE_VALUES to suddenly
2096  // be CSE'd with N, and carry its uses with it. Iterate until no
2097  // uses remain, to ensure that the node can be safely deleted.
2098  // First add the users of this node to the work list so that they
2099  // can be tried again once they have new operands.
2100  AddUsersToWorklist(N);
2101  do {
2102  // Do as a single replacement to avoid rewalking use lists.
2104  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2105  Ops.push_back(N->getOperand(i));
2106  DAG.ReplaceAllUsesWith(N, Ops.data());
2107  } while (!N->use_empty());
2108  deleteAndRecombine(N);
2109  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2110 }
2111 
2112 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2113 /// ConstantSDNode pointer else nullptr.
2115  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2116  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2117 }
2118 
2119 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2120 /// and that N may be folded in the load / store addressing mode.
2122  const TargetLowering &TLI) {
2123  EVT VT;
2124  unsigned AS;
2125 
2126  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2127  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2128  return false;
2129  VT = LD->getMemoryVT();
2130  AS = LD->getAddressSpace();
2131  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2132  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2133  return false;
2134  VT = ST->getMemoryVT();
2135  AS = ST->getAddressSpace();
2136  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2137  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2138  return false;
2139  VT = LD->getMemoryVT();
2140  AS = LD->getAddressSpace();
2141  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2142  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2143  return false;
2144  VT = ST->getMemoryVT();
2145  AS = ST->getAddressSpace();
2146  } else {
2147  return false;
2148  }
2149 
2151  if (N->getOpcode() == ISD::ADD) {
2152  AM.HasBaseReg = true;
2153  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2154  if (Offset)
2155  // [reg +/- imm]
2156  AM.BaseOffs = Offset->getSExtValue();
2157  else
2158  // [reg +/- reg]
2159  AM.Scale = 1;
2160  } else if (N->getOpcode() == ISD::SUB) {
2161  AM.HasBaseReg = true;
2162  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2163  if (Offset)
2164  // [reg +/- imm]
2165  AM.BaseOffs = -Offset->getSExtValue();
2166  else
2167  // [reg +/- reg]
2168  AM.Scale = 1;
2169  } else {
2170  return false;
2171  }
2172 
2173  return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2174  VT.getTypeForEVT(*DAG.getContext()), AS);
2175 }
2176 
2177 /// This inverts a canonicalization in IR that replaces a variable select arm
2178 /// with an identity constant. Codegen improves if we re-use the variable
2179 /// operand rather than load a constant. This can also be converted into a
2180 /// masked vector operation if the target supports it.
2182  bool ShouldCommuteOperands) {
2183  // Match a select as operand 1. The identity constant that we are looking for
2184  // is only valid as operand 1 of a non-commutative binop.
2185  SDValue N0 = N->getOperand(0);
2186  SDValue N1 = N->getOperand(1);
2187  if (ShouldCommuteOperands)
2188  std::swap(N0, N1);
2189 
2190  // TODO: Should this apply to scalar select too?
2191  if (!N1.hasOneUse() || N1.getOpcode() != ISD::VSELECT)
2192  return SDValue();
2193 
2194  unsigned Opcode = N->getOpcode();
2195  EVT VT = N->getValueType(0);
2196  SDValue Cond = N1.getOperand(0);
2197  SDValue TVal = N1.getOperand(1);
2198  SDValue FVal = N1.getOperand(2);
2199 
2200  // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity().
2201  // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()".
2202  // TODO: With fast-math (NSZ), allow the opposite-sign form of zero?
2203  auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) {
2205  switch (Opcode) {
2206  case ISD::FADD: // X + -0.0 --> X
2207  return C->isZero() && C->isNegative();
2208  case ISD::FSUB: // X - 0.0 --> X
2209  return C->isZero() && !C->isNegative();
2210  case ISD::FMUL: // X * 1.0 --> X
2211  case ISD::FDIV: // X / 1.0 --> X
2212  return C->isExactlyValue(1.0);
2213  }
2214  }
2215  if (ConstantSDNode *C = isConstOrConstSplat(V)) {
2216  switch (Opcode) {
2217  case ISD::ADD: // X + 0 --> X
2218  case ISD::SUB: // X - 0 --> X
2219  case ISD::SHL: // X << 0 --> X
2220  case ISD::SRA: // X s>> 0 --> X
2221  case ISD::SRL: // X u>> 0 --> X
2222  return C->isZero();
2223  case ISD::MUL: // X * 1 --> X
2224  return C->isOne();
2225  }
2226  }
2227  return false;
2228  };
2229 
2230  // This transform increases uses of N0, so freeze it to be safe.
2231  // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2232  if (isIdentityConstantForOpcode(Opcode, TVal)) {
2233  SDValue F0 = DAG.getFreeze(N0);
2234  SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2235  return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2236  }
2237  // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2238  if (isIdentityConstantForOpcode(Opcode, FVal)) {
2239  SDValue F0 = DAG.getFreeze(N0);
2240  SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2241  return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2242  }
2243 
2244  return SDValue();
2245 }
2246 
2247 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2248  assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2249  "Unexpected binary operator");
2250 
2251  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2252  auto BinOpcode = BO->getOpcode();
2253  EVT VT = BO->getValueType(0);
2254  if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2255  if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2256  return Sel;
2257 
2258  if (TLI.isCommutativeBinOp(BO->getOpcode()))
2259  if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2260  return Sel;
2261  }
2262 
2263  // Don't do this unless the old select is going away. We want to eliminate the
2264  // binary operator, not replace a binop with a select.
2265  // TODO: Handle ISD::SELECT_CC.
2266  unsigned SelOpNo = 0;
2267  SDValue Sel = BO->getOperand(0);
2268  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2269  SelOpNo = 1;
2270  Sel = BO->getOperand(1);
2271  }
2272 
2273  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2274  return SDValue();
2275 
2276  SDValue CT = Sel.getOperand(1);
2277  if (!isConstantOrConstantVector(CT, true) &&
2279  return SDValue();
2280 
2281  SDValue CF = Sel.getOperand(2);
2282  if (!isConstantOrConstantVector(CF, true) &&
2284  return SDValue();
2285 
2286  // Bail out if any constants are opaque because we can't constant fold those.
2287  // The exception is "and" and "or" with either 0 or -1 in which case we can
2288  // propagate non constant operands into select. I.e.:
2289  // and (select Cond, 0, -1), X --> select Cond, 0, X
2290  // or X, (select Cond, -1, 0) --> select Cond, -1, X
2291  bool CanFoldNonConst =
2292  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2295 
2296  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2297  if (!CanFoldNonConst &&
2298  !isConstantOrConstantVector(CBO, true) &&
2300  return SDValue();
2301 
2302  // We have a select-of-constants followed by a binary operator with a
2303  // constant. Eliminate the binop by pulling the constant math into the select.
2304  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2305  SDLoc DL(Sel);
2306  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2307  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2308  if (!CanFoldNonConst && !NewCT.isUndef() &&
2309  !isConstantOrConstantVector(NewCT, true) &&
2311  return SDValue();
2312 
2313  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2314  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2315  if (!CanFoldNonConst && !NewCF.isUndef() &&
2316  !isConstantOrConstantVector(NewCF, true) &&
2318  return SDValue();
2319 
2320  SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2321  SelectOp->setFlags(BO->getFlags());
2322  return SelectOp;
2323 }
2324 
2326  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2327  "Expecting add or sub");
2328 
2329  // Match a constant operand and a zext operand for the math instruction:
2330  // add Z, C
2331  // sub C, Z
2332  bool IsAdd = N->getOpcode() == ISD::ADD;
2333  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2334  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2335  auto *CN = dyn_cast<ConstantSDNode>(C);
2336  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2337  return SDValue();
2338 
2339  // Match the zext operand as a setcc of a boolean.
2340  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2341  Z.getOperand(0).getValueType() != MVT::i1)
2342  return SDValue();
2343 
2344  // Match the compare as: setcc (X & 1), 0, eq.
2345  SDValue SetCC = Z.getOperand(0);
2346  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2347  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2348  SetCC.getOperand(0).getOpcode() != ISD::AND ||
2349  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2350  return SDValue();
2351 
2352  // We are adding/subtracting a constant and an inverted low bit. Turn that
2353  // into a subtract/add of the low bit with incremented/decremented constant:
2354  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2355  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2356  EVT VT = C.getValueType();
2357  SDLoc DL(N);
2358  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2359  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2360  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2361  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2362 }
2363 
2364 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2365 /// a shift and add with a different constant.
2367  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2368  "Expecting add or sub");
2369 
2370  // We need a constant operand for the add/sub, and the other operand is a
2371  // logical shift right: add (srl), C or sub C, (srl).
2372  bool IsAdd = N->getOpcode() == ISD::ADD;
2373  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2374  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2375  if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2376  ShiftOp.getOpcode() != ISD::SRL)
2377  return SDValue();
2378 
2379  // The shift must be of a 'not' value.
2380  SDValue Not = ShiftOp.getOperand(0);
2381  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2382  return SDValue();
2383 
2384  // The shift must be moving the sign bit to the least-significant-bit.
2385  EVT VT = ShiftOp.getValueType();
2386  SDValue ShAmt = ShiftOp.getOperand(1);
2387  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2388  if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2389  return SDValue();
2390 
2391  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2392  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2393  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2394  SDLoc DL(N);
2395  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2396  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2397  if (SDValue NewC =
2398  DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2399  {ConstantOp, DAG.getConstant(1, DL, VT)}))
2400  return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2401  return SDValue();
2402 }
2403 
2404 static bool isADDLike(SDValue V, const SelectionDAG &DAG) {
2405  unsigned Opcode = V.getOpcode();
2406  if (Opcode == ISD::OR)
2407  return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1));
2408  if (Opcode == ISD::XOR)
2409  return isMinSignedConstant(V.getOperand(1));
2410  return false;
2411 }
2412 
2413 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2414 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2415 /// are no common bits set in the operands).
2416 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2417  SDValue N0 = N->getOperand(0);
2418  SDValue N1 = N->getOperand(1);
2419  EVT VT = N0.getValueType();
2420  SDLoc DL(N);
2421 
2422  // fold (add x, undef) -> undef
2423  if (N0.isUndef())
2424  return N0;
2425  if (N1.isUndef())
2426  return N1;
2427 
2428  // fold (add c1, c2) -> c1+c2
2429  if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2430  return C;
2431 
2432  // canonicalize constant to RHS
2435  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2436 
2437  // fold vector ops
2438  if (VT.isVector()) {
2439  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2440  return FoldedVOp;
2441 
2442  // fold (add x, 0) -> x, vector edition
2444  return N0;
2445  }
2446 
2447  // fold (add x, 0) -> x
2448  if (isNullConstant(N1))
2449  return N0;
2450 
2451  if (N0.getOpcode() == ISD::SUB) {
2452  SDValue N00 = N0.getOperand(0);
2453  SDValue N01 = N0.getOperand(1);
2454 
2455  // fold ((A-c1)+c2) -> (A+(c2-c1))
2456  if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2457  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2458 
2459  // fold ((c1-A)+c2) -> (c1+c2)-A
2460  if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2461  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2462  }
2463 
2464  // add (sext i1 X), 1 -> zext (not i1 X)
2465  // We don't transform this pattern:
2466  // add (zext i1 X), -1 -> sext (not i1 X)
2467  // because most (?) targets generate better code for the zext form.
2468  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2469  isOneOrOneSplat(N1)) {
2470  SDValue X = N0.getOperand(0);
2471  if ((!LegalOperations ||
2472  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2473  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2474  X.getScalarValueSizeInBits() == 1) {
2475  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2476  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2477  }
2478  }
2479 
2480  // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2481  // iff (or x, c0) is equivalent to (add x, c0).
2482  // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2483  // iff (xor x, c0) is equivalent to (add x, c0).
2484  if (isADDLike(N0, DAG)) {
2485  SDValue N01 = N0.getOperand(1);
2486  if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2487  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2488  }
2489 
2490  if (SDValue NewSel = foldBinOpIntoSelect(N))
2491  return NewSel;
2492 
2493  // reassociate add
2494  if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2495  if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2496  return RADD;
2497 
2498  // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2499  // equivalent to (add x, c).
2500  // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2501  // equivalent to (add x, c).
2502  auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2503  if (isADDLike(N0, DAG) && N0.hasOneUse() &&
2504  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2505  return DAG.getNode(ISD::ADD, DL, VT,
2506  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2507  N0.getOperand(1));
2508  }
2509  return SDValue();
2510  };
2511  if (SDValue Add = ReassociateAddOr(N0, N1))
2512  return Add;
2513  if (SDValue Add = ReassociateAddOr(N1, N0))
2514  return Add;
2515  }
2516  // fold ((0-A) + B) -> B-A
2517  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2518  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2519 
2520  // fold (A + (0-B)) -> A-B
2521  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2522  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2523 
2524  // fold (A+(B-A)) -> B
2525  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2526  return N1.getOperand(0);
2527 
2528  // fold ((B-A)+A) -> B
2529  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2530  return N0.getOperand(0);
2531 
2532  // fold ((A-B)+(C-A)) -> (C-B)
2533  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2534  N0.getOperand(0) == N1.getOperand(1))
2535  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2536  N0.getOperand(1));
2537 
2538  // fold ((A-B)+(B-C)) -> (A-C)
2539  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2540  N0.getOperand(1) == N1.getOperand(0))
2541  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2542  N1.getOperand(1));
2543 
2544  // fold (A+(B-(A+C))) to (B-C)
2545  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2546  N0 == N1.getOperand(1).getOperand(0))
2547  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2548  N1.getOperand(1).getOperand(1));
2549 
2550  // fold (A+(B-(C+A))) to (B-C)
2551  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2552  N0 == N1.getOperand(1).getOperand(1))
2553  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2554  N1.getOperand(1).getOperand(0));
2555 
2556  // fold (A+((B-A)+or-C)) to (B+or-C)
2557  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2558  N1.getOperand(0).getOpcode() == ISD::SUB &&
2559  N0 == N1.getOperand(0).getOperand(1))
2560  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2561  N1.getOperand(1));
2562 
2563  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2564  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2565  N0->hasOneUse() && N1->hasOneUse()) {
2566  SDValue N00 = N0.getOperand(0);
2567  SDValue N01 = N0.getOperand(1);
2568  SDValue N10 = N1.getOperand(0);
2569  SDValue N11 = N1.getOperand(1);
2570 
2572  return DAG.getNode(ISD::SUB, DL, VT,
2573  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2574  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2575  }
2576 
2577  // fold (add (umax X, C), -C) --> (usubsat X, C)
2578  if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2579  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2580  return (!Max && !Op) ||
2581  (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2582  };
2583  if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2584  /*AllowUndefs*/ true))
2585  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2586  N0.getOperand(1));
2587  }
2588 
2589  if (SimplifyDemandedBits(SDValue(N, 0)))
2590  return SDValue(N, 0);
2591 
2592  if (isOneOrOneSplat(N1)) {
2593  // fold (add (xor a, -1), 1) -> (sub 0, a)
2594  if (isBitwiseNot(N0))
2595  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2596  N0.getOperand(0));
2597 
2598  // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2599  if (N0.getOpcode() == ISD::ADD) {
2600  SDValue A, Xor;
2601 
2602  if (isBitwiseNot(N0.getOperand(0))) {
2603  A = N0.getOperand(1);
2604  Xor = N0.getOperand(0);
2605  } else if (isBitwiseNot(N0.getOperand(1))) {
2606  A = N0.getOperand(0);
2607  Xor = N0.getOperand(1);
2608  }
2609 
2610  if (Xor)
2611  return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2612  }
2613 
2614  // Look for:
2615  // add (add x, y), 1
2616  // And if the target does not like this form then turn into:
2617  // sub y, (xor x, -1)
2618  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2619  N0.hasOneUse()) {
2620  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2621  DAG.getAllOnesConstant(DL, VT));
2622  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2623  }
2624  }
2625 
2626  // (x - y) + -1 -> add (xor y, -1), x
2627  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2629  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2630  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2631  }
2632 
2633  if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2634  return Combined;
2635 
2636  if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2637  return Combined;
2638 
2639  return SDValue();
2640 }
2641 
2642 SDValue DAGCombiner::visitADD(SDNode *N) {
2643  SDValue N0 = N->getOperand(0);
2644  SDValue N1 = N->getOperand(1);
2645  EVT VT = N0.getValueType();
2646  SDLoc DL(N);
2647 
2648  if (SDValue Combined = visitADDLike(N))
2649  return Combined;
2650 
2651  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2652  return V;
2653 
2654  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2655  return V;
2656 
2657  // fold (a+b) -> (a|b) iff a and b share no bits.
2658  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2659  DAG.haveNoCommonBitsSet(N0, N1))
2660  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2661 
2662  // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2663  if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2664  const APInt &C0 = N0->getConstantOperandAPInt(0);
2665  const APInt &C1 = N1->getConstantOperandAPInt(0);
2666  return DAG.getVScale(DL, VT, C0 + C1);
2667  }
2668 
2669  // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2670  if ((N0.getOpcode() == ISD::ADD) &&
2671  (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2672  (N1.getOpcode() == ISD::VSCALE)) {
2673  const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2674  const APInt &VS1 = N1->getConstantOperandAPInt(0);
2675  SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2676  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2677  }
2678 
2679  // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2680  if (N0.getOpcode() == ISD::STEP_VECTOR &&
2681  N1.getOpcode() == ISD::STEP_VECTOR) {
2682  const APInt &C0 = N0->getConstantOperandAPInt(0);
2683  const APInt &C1 = N1->getConstantOperandAPInt(0);
2684  APInt NewStep = C0 + C1;
2685  return DAG.getStepVector(DL, VT, NewStep);
2686  }
2687 
2688  // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2689  if ((N0.getOpcode() == ISD::ADD) &&
2690  (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2691  (N1.getOpcode() == ISD::STEP_VECTOR)) {
2692  const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2693  const APInt &SV1 = N1->getConstantOperandAPInt(0);
2694  APInt NewStep = SV0 + SV1;
2695  SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2696  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2697  }
2698 
2699  return SDValue();
2700 }
2701 
2702 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2703  unsigned Opcode = N->getOpcode();
2704  SDValue N0 = N->getOperand(0);
2705  SDValue N1 = N->getOperand(1);
2706  EVT VT = N0.getValueType();
2707  SDLoc DL(N);
2708 
2709  // fold (add_sat x, undef) -> -1
2710  if (N0.isUndef() || N1.isUndef())
2711  return DAG.getAllOnesConstant(DL, VT);
2712 
2713  // fold (add_sat c1, c2) -> c3
2714  if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2715  return C;
2716 
2717  // canonicalize constant to RHS
2720  return DAG.getNode(Opcode, DL, VT, N1, N0);
2721 
2722  // fold vector ops
2723  if (VT.isVector()) {
2724  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2725  return FoldedVOp;
2726 
2727  // fold (add_sat x, 0) -> x, vector edition
2729  return N0;
2730  }
2731 
2732  // fold (add_sat x, 0) -> x
2733  if (isNullConstant(N1))
2734  return N0;
2735 
2736  // If it cannot overflow, transform into an add.
2737  if (Opcode == ISD::UADDSAT)
2738  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2739  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2740 
2741  return SDValue();
2742 }
2743 
2744 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2745  bool Masked = false;
2746 
2747  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2748  while (true) {
2749  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2750  V = V.getOperand(0);
2751  continue;
2752  }
2753 
2754  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2755  Masked = true;
2756  V = V.getOperand(0);
2757  continue;
2758  }
2759 
2760  break;
2761  }
2762 
2763  // If this is not a carry, return.
2764  if (V.getResNo() != 1)
2765  return SDValue();
2766 
2767  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2768  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2769  return SDValue();
2770 
2771  EVT VT = V->getValueType(0);
2772  if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2773  return SDValue();
2774 
2775  // If the result is masked, then no matter what kind of bool it is we can
2776  // return. If it isn't, then we need to make sure the bool type is either 0 or
2777  // 1 and not other values.
2778  if (Masked ||
2779  TLI.getBooleanContents(V.getValueType()) ==
2781  return V;
2782 
2783  return SDValue();
2784 }
2785 
2786 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2787 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2788 /// the opcode and bypass the mask operation.
2789 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2790  SelectionDAG &DAG, const SDLoc &DL) {
2791  if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2792  return SDValue();
2793 
2794  EVT VT = N0.getValueType();
2795  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2796  return SDValue();
2797 
2798  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2799  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2800  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2801 }
2802 
2803 /// Helper for doing combines based on N0 and N1 being added to each other.
2804 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2805  SDNode *LocReference) {
2806  EVT VT = N0.getValueType();
2807  SDLoc DL(LocReference);
2808 
2809  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2810  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2812  return DAG.getNode(ISD::SUB, DL, VT, N0,
2813  DAG.getNode(ISD::SHL, DL, VT,
2814  N1.getOperand(0).getOperand(1),
2815  N1.getOperand(1)));
2816 
2817  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2818  return V;
2819 
2820  // Look for:
2821  // add (add x, 1), y
2822  // And if the target does not like this form then turn into:
2823  // sub y, (xor x, -1)
2824  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2825  N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) {
2826  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2827  DAG.getAllOnesConstant(DL, VT));
2828  return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2829  }
2830 
2831  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
2832  // Hoist one-use subtraction by non-opaque constant:
2833  // (x - C) + y -> (x + y) - C
2834  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2835  if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2836  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2837  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2838  }
2839  // Hoist one-use subtraction from non-opaque constant:
2840  // (C - x) + y -> (y - x) + C
2841  if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2842  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2843  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2844  }
2845  }
2846 
2847  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2848  // rather than 'add 0/-1' (the zext should get folded).
2849  // add (sext i1 Y), X --> sub X, (zext i1 Y)
2850  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2851  N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2853  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2854  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2855  }
2856 
2857  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2858  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2859  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2860  if (TN->getVT() == MVT::i1) {
2861  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2862  DAG.getConstant(1, DL, VT));
2863  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2864  }
2865  }
2866 
2867  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2868  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2869  N1.getResNo() == 0)
2870  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2871  N0, N1.getOperand(0), N1.getOperand(2));
2872 
2873  // (add X, Carry) -> (addcarry X, 0, Carry)
2875  if (SDValue Carry = getAsCarry(TLI, N1))
2876  return DAG.getNode(ISD::ADDCARRY, DL,
2877  DAG.getVTList(VT, Carry.getValueType()), N0,
2878  DAG.getConstant(0, DL, VT), Carry);
2879 
2880  return SDValue();
2881 }
2882 
2883 SDValue DAGCombiner::visitADDC(SDNode *N) {
2884  SDValue N0 = N->getOperand(0);
2885  SDValue N1 = N->getOperand(1);
2886  EVT VT = N0.getValueType();
2887  SDLoc DL(N);
2888 
2889  // If the flag result is dead, turn this into an ADD.
2890  if (!N->hasAnyUseOfValue(1))
2891  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2893 
2894  // canonicalize constant to RHS.
2895  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2896  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2897  if (N0C && !N1C)
2898  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2899 
2900  // fold (addc x, 0) -> x + no carry out
2901  if (isNullConstant(N1))
2902  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2903  DL, MVT::Glue));
2904 
2905  // If it cannot overflow, transform into an add.
2906  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2907  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2909 
2910  return SDValue();
2911 }
2912 
2913 /**
2914  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2915  * then the flip also occurs if computing the inverse is the same cost.
2916  * This function returns an empty SDValue in case it cannot flip the boolean
2917  * without increasing the cost of the computation. If you want to flip a boolean
2918  * no matter what, use DAG.getLogicalNOT.
2919  */
2921  const TargetLowering &TLI,
2922  bool Force) {
2923  if (Force && isa<ConstantSDNode>(V))
2924  return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2925 
2926  if (V.getOpcode() != ISD::XOR)
2927  return SDValue();
2928 
2929  ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2930  if (!Const)
2931  return SDValue();
2932 
2933  EVT VT = V.getValueType();
2934 
2935  bool IsFlip = false;
2936  switch(TLI.getBooleanContents(VT)) {
2938  IsFlip = Const->isOne();
2939  break;
2941  IsFlip = Const->isAllOnes();
2942  break;
2944  IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2945  break;
2946  }
2947 
2948  if (IsFlip)
2949  return V.getOperand(0);
2950  if (Force)
2951  return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2952  return SDValue();
2953 }
2954 
2955 SDValue DAGCombiner::visitADDO(SDNode *N) {
2956  SDValue N0 = N->getOperand(0);
2957  SDValue N1 = N->getOperand(1);
2958  EVT VT = N0.getValueType();
2959  bool IsSigned = (ISD::SADDO == N->getOpcode());
2960 
2961  EVT CarryVT = N->getValueType(1);
2962  SDLoc DL(N);
2963 
2964  // If the flag result is dead, turn this into an ADD.
2965  if (!N->hasAnyUseOfValue(1))
2966  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2967  DAG.getUNDEF(CarryVT));
2968 
2969  // canonicalize constant to RHS.
2972  return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2973 
2974  // fold (addo x, 0) -> x + no carry out
2975  if (isNullOrNullSplat(N1))
2976  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2977 
2978  if (!IsSigned) {
2979  // If it cannot overflow, transform into an add.
2980  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2981  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2982  DAG.getConstant(0, DL, CarryVT));
2983 
2984  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2985  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2986  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2987  DAG.getConstant(0, DL, VT), N0.getOperand(0));
2988  return CombineTo(
2989  N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2990  }
2991 
2992  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2993  return Combined;
2994 
2995  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2996  return Combined;
2997  }
2998 
2999  return SDValue();
3000 }
3001 
3002 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3003  EVT VT = N0.getValueType();
3004  if (VT.isVector())
3005  return SDValue();
3006 
3007  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
3008  // If Y + 1 cannot overflow.
3009  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
3010  SDValue Y = N1.getOperand(0);
3011  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3013  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
3014  N1.getOperand(2));
3015  }
3016 
3017  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
3019  if (SDValue Carry = getAsCarry(TLI, N1))
3020  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
3021  DAG.getConstant(0, SDLoc(N), VT), Carry);
3022 
3023  return SDValue();
3024 }
3025 
3026 SDValue DAGCombiner::visitADDE(SDNode *N) {
3027  SDValue N0 = N->getOperand(0);
3028  SDValue N1 = N->getOperand(1);
3029  SDValue CarryIn = N->getOperand(2);
3030 
3031  // canonicalize constant to RHS
3032  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3033  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3034  if (N0C && !N1C)
3035  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3036  N1, N0, CarryIn);
3037 
3038  // fold (adde x, y, false) -> (addc x, y)
3039  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3040  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3041 
3042  return SDValue();
3043 }
3044 
3045 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
3046  SDValue N0 = N->getOperand(0);
3047  SDValue N1 = N->getOperand(1);
3048  SDValue CarryIn = N->getOperand(2);
3049  SDLoc DL(N);
3050 
3051  // canonicalize constant to RHS
3052  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3053  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3054  if (N0C && !N1C)
3055  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
3056 
3057  // fold (addcarry x, y, false) -> (uaddo x, y)
3058  if (isNullConstant(CarryIn)) {
3059  if (!LegalOperations ||
3060  TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3061  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3062  }
3063 
3064  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3065  if (isNullConstant(N0) && isNullConstant(N1)) {
3066  EVT VT = N0.getValueType();
3067  EVT CarryVT = CarryIn.getValueType();
3068  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3069  AddToWorklist(CarryExt.getNode());
3070  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3071  DAG.getConstant(1, DL, VT)),
3072  DAG.getConstant(0, DL, CarryVT));
3073  }
3074 
3075  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
3076  return Combined;
3077 
3078  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
3079  return Combined;
3080 
3081  return SDValue();
3082 }
3083 
3084 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3085  SDValue N0 = N->getOperand(0);
3086  SDValue N1 = N->getOperand(1);
3087  SDValue CarryIn = N->getOperand(2);
3088  SDLoc DL(N);
3089 
3090  // canonicalize constant to RHS
3091  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3092  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3093  if (N0C && !N1C)
3094  return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3095 
3096  // fold (saddo_carry x, y, false) -> (saddo x, y)
3097  if (isNullConstant(CarryIn)) {
3098  if (!LegalOperations ||
3099  TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3100  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3101  }
3102 
3103  return SDValue();
3104 }
3105 
3106 /**
3107  * If we are facing some sort of diamond carry propapagtion pattern try to
3108  * break it up to generate something like:
3109  * (addcarry X, 0, (addcarry A, B, Z):Carry)
3110  *
3111  * The end result is usually an increase in operation required, but because the
3112  * carry is now linearized, other tranforms can kick in and optimize the DAG.
3113  *
3114  * Patterns typically look something like
3115  * (uaddo A, B)
3116  * / \
3117  * Carry Sum
3118  * | \
3119  * | (addcarry *, 0, Z)
3120  * | /
3121  * \ Carry
3122  * | /
3123  * (addcarry X, *, *)
3124  *
3125  * But numerous variation exist. Our goal is to identify A, B, X and Z and
3126  * produce a combine with a single path for carry propagation.
3127  */
3129  SDValue X, SDValue Carry0, SDValue Carry1,
3130  SDNode *N) {
3131  if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3132  return SDValue();
3133  if (Carry1.getOpcode() != ISD::UADDO)
3134  return SDValue();
3135 
3136  SDValue Z;
3137 
3138  /**
3139  * First look for a suitable Z. It will present itself in the form of
3140  * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3141  */
3142  if (Carry0.getOpcode() == ISD::ADDCARRY &&
3143  isNullConstant(Carry0.getOperand(1))) {
3144  Z = Carry0.getOperand(2);
3145  } else if (Carry0.getOpcode() == ISD::UADDO &&
3146  isOneConstant(Carry0.getOperand(1))) {
3147  EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3148  Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3149  } else {
3150  // We couldn't find a suitable Z.
3151  return SDValue();
3152  }
3153 
3154 
3155  auto cancelDiamond = [&](SDValue A,SDValue B) {
3156  SDLoc DL(N);
3157  SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3158  Combiner.AddToWorklist(NewY.getNode());
3159  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3160  DAG.getConstant(0, DL, X.getValueType()),
3161  NewY.getValue(1));
3162  };
3163 
3164  /**
3165  * (uaddo A, B)
3166  * |
3167  * Sum
3168  * |
3169  * (addcarry *, 0, Z)
3170  */
3171  if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3172  return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3173  }
3174 
3175  /**
3176  * (addcarry A, 0, Z)
3177  * |
3178  * Sum
3179  * |
3180  * (uaddo *, B)
3181  */
3182  if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3183  return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3184  }
3185 
3186  if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3187  return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3188  }
3189 
3190  return SDValue();
3191 }
3192 
3193 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3194 // match patterns like:
3195 //
3196 // (uaddo A, B) CarryIn
3197 // | \ |
3198 // | \ |
3199 // PartialSum PartialCarryOutX /
3200 // | | /
3201 // | ____|____________/
3202 // | / |
3203 // (uaddo *, *) \________
3204 // | \ \
3205 // | \ |
3206 // | PartialCarryOutY |
3207 // | \ |
3208 // | \ /
3209 // AddCarrySum | ______/
3210 // | /
3211 // CarryOut = (or *, *)
3212 //
3213 // And generate ADDCARRY (or SUBCARRY) with two result values:
3214 //
3215 // {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3216 //
3217 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3218 // a single path for carry/borrow out propagation:
3220  SDValue N0, SDValue N1, SDNode *N) {
3221  SDValue Carry0 = getAsCarry(TLI, N0);
3222  if (!Carry0)
3223  return SDValue();
3224  SDValue Carry1 = getAsCarry(TLI, N1);
3225  if (!Carry1)
3226  return SDValue();
3227 
3228  unsigned Opcode = Carry0.getOpcode();
3229  if (Opcode != Carry1.getOpcode())
3230  return SDValue();
3231  if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3232  return SDValue();
3233 
3234  // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3235  // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3236  if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3237  std::swap(Carry0, Carry1);
3238 
3239  // Check if nodes are connected in expected way.
3240  if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3241  Carry1.getOperand(1) != Carry0.getValue(0))
3242  return SDValue();
3243 
3244  // The carry in value must be on the righthand side for subtraction.
3245  unsigned CarryInOperandNum =
3246  Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3247  if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3248  return SDValue();
3249  SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3250 
3251  unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3252  if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3253  return SDValue();
3254 
3255  // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3256  // TODO: make getAsCarry() aware of how partial carries are merged.
3257  if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3258  return SDValue();
3259  CarryIn = CarryIn.getOperand(0);
3260  if (CarryIn.getValueType() != MVT::i1)
3261  return SDValue();
3262 
3263  SDLoc DL(N);
3264  SDValue Merged =
3265  DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3266  Carry0.getOperand(1), CarryIn);
3267 
3268  // Please note that because we have proven that the result of the UADDO/USUBO
3269  // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3270  // therefore prove that if the first UADDO/USUBO overflows, the second
3271  // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3272  // maximum value.
3273  //
3274  // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3275  // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3276  //
3277  // This is important because it means that OR and XOR can be used to merge
3278  // carry flags; and that AND can return a constant zero.
3279  //
3280  // TODO: match other operations that can merge flags (ADD, etc)
3281  DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3282  if (N->getOpcode() == ISD::AND)
3283  return DAG.getConstant(0, DL, MVT::i1);
3284  return Merged.getValue(1);
3285 }
3286 
3287 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3288  SDNode *N) {
3289  // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3290  if (isBitwiseNot(N0))
3291  if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3292  SDLoc DL(N);
3293  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3294  N0.getOperand(0), NotC);
3295  return CombineTo(
3296  N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3297  }
3298 
3299  // Iff the flag result is dead:
3300  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3301  // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3302  // or the dependency between the instructions.
3303  if ((N0.getOpcode() == ISD::ADD ||
3304  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3305  N0.getValue(1) != CarryIn)) &&
3306  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3307  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3308  N0.getOperand(0), N0.getOperand(1), CarryIn);
3309 
3310  /**
3311  * When one of the addcarry argument is itself a carry, we may be facing
3312  * a diamond carry propagation. In which case we try to transform the DAG
3313  * to ensure linear carry propagation if that is possible.
3314  */
3315  if (auto Y = getAsCarry(TLI, N1)) {
3316  // Because both are carries, Y and Z can be swapped.
3317  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3318  return R;
3319  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3320  return R;
3321  }
3322 
3323  return SDValue();
3324 }
3325 
3326 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3327 // clamp/truncation if necessary.
3329  SDValue RHS, SelectionDAG &DAG,
3330  const SDLoc &DL) {
3331  assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3332  "Illegal truncation");
3333 
3334  if (DstVT == SrcVT)
3335  return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3336 
3337  // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3338  // clamping RHS.
3339  APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3340  DstVT.getScalarSizeInBits());
3341  if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3342  return SDValue();
3343 
3344  SDValue SatLimit =
3346  DstVT.getScalarSizeInBits()),
3347  DL, SrcVT);
3348  RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3349  RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3350  LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3351  return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3352 }
3353 
3354 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3355 // usubsat(a,b), optionally as a truncated type.
3356 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3357  if (N->getOpcode() != ISD::SUB ||
3358  !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3359  return SDValue();
3360 
3361  EVT SubVT = N->getValueType(0);
3362  SDValue Op0 = N->getOperand(0);
3363  SDValue Op1 = N->getOperand(1);
3364 
3365  // Try to find umax(a,b) - b or a - umin(a,b) patterns
3366  // they may be converted to usubsat(a,b).
3367  if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3368  SDValue MaxLHS = Op0.getOperand(0);
3369  SDValue MaxRHS = Op0.getOperand(1);
3370  if (MaxLHS == Op1)
3371  return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3372  if (MaxRHS == Op1)
3373  return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3374  }
3375 
3376  if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3377  SDValue MinLHS = Op1.getOperand(0);
3378  SDValue MinRHS = Op1.getOperand(1);
3379  if (MinLHS == Op0)
3380  return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3381  if (MinRHS == Op0)
3382  return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3383  }
3384 
3385  // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3386  if (Op1.getOpcode() == ISD::TRUNCATE &&
3387  Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3388  Op1.getOperand(0).hasOneUse()) {
3389  SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3390  SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3391  if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3392  return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3393  DAG, SDLoc(N));
3394  if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3395  return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3396  DAG, SDLoc(N));
3397  }
3398 
3399  return SDValue();
3400 }
3401 
3402 // Since it may not be valid to emit a fold to zero for vector initializers
3403 // check if we can before folding.
3404 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3405  SelectionDAG &DAG, bool LegalOperations) {
3406  if (!VT.isVector())
3407  return DAG.getConstant(0, DL, VT);
3408  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3409  return DAG.getConstant(0, DL, VT);
3410  return SDValue();
3411 }
3412 
3413 SDValue DAGCombiner::visitSUB(SDNode *N) {
3414  SDValue N0 = N->getOperand(0);
3415  SDValue N1 = N->getOperand(1);
3416  EVT VT = N0.getValueType();
3417  SDLoc DL(N);
3418 
3419  auto PeekThroughFreeze = [](SDValue N) {
3420  if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3421  return N->getOperand(0);
3422  return N;
3423  };
3424 
3425  // fold (sub x, x) -> 0
3426  // FIXME: Refactor this and xor and other similar operations together.
3427  if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3428  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3429 
3430  // fold (sub c1, c2) -> c3
3431  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3432  return C;
3433 
3434  // fold vector ops
3435  if (VT.isVector()) {
3436  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3437  return FoldedVOp;
3438 
3439  // fold (sub x, 0) -> x, vector edition
3441  return N0;
3442  }
3443 
3444  if (SDValue NewSel = foldBinOpIntoSelect(N))
3445  return NewSel;
3446 
3448 
3449  // fold (sub x, c) -> (add x, -c)
3450  if (N1C) {
3451  return DAG.getNode(ISD::ADD, DL, VT, N0,
3452  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3453  }
3454 
3455  if (isNullOrNullSplat(N0)) {
3456  unsigned BitWidth = VT.getScalarSizeInBits();
3457  // Right-shifting everything out but the sign bit followed by negation is
3458  // the same as flipping arithmetic/logical shift type without the negation:
3459  // -(X >>u 31) -> (X >>s 31)
3460  // -(X >>s 31) -> (X >>u 31)
3461  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3462  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3463  if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3464  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3465  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3466  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3467  }
3468  }
3469 
3470  // 0 - X --> 0 if the sub is NUW.
3471  if (N->getFlags().hasNoUnsignedWrap())
3472  return N0;
3473 
3475  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3476  // N1 must be 0 because negating the minimum signed value is undefined.
3477  if (N->getFlags().hasNoSignedWrap())
3478  return N0;
3479 
3480  // 0 - X --> X if X is 0 or the minimum signed value.
3481  return N1;
3482  }
3483 
3484  // Convert 0 - abs(x).
3485  if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3487  if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3488  return Result;
3489 
3490  // Fold neg(splat(neg(x)) -> splat(x)
3491  if (VT.isVector()) {
3492  SDValue N1S = DAG.getSplatValue(N1, true);
3493  if (N1S && N1S.getOpcode() == ISD::SUB &&
3494  isNullConstant(N1S.getOperand(0))) {
3495  if (VT.isScalableVector())
3496  return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3497  return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3498  }
3499  }
3500  }
3501 
3502  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3503  if (isAllOnesOrAllOnesSplat(N0))
3504  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3505 
3506  // fold (A - (0-B)) -> A+B
3507  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3508  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3509 
3510  // fold A-(A-B) -> B
3511  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3512  return N1.getOperand(1);
3513 
3514  // fold (A+B)-A -> B
3515  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3516  return N0.getOperand(1);
3517 
3518  // fold (A+B)-B -> A
3519  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3520  return N0.getOperand(0);
3521 
3522  // fold (A+C1)-C2 -> A+(C1-C2)
3523  if (N0.getOpcode() == ISD::ADD) {
3524  SDValue N01 = N0.getOperand(1);
3525  if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3526  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3527  }
3528 
3529  // fold C2-(A+C1) -> (C2-C1)-A
3530  if (N1.getOpcode() == ISD::ADD) {
3531  SDValue N11 = N1.getOperand(1);
3532  if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3533  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3534  }
3535 
3536  // fold (A-C1)-C2 -> A-(C1+C2)
3537  if (N0.getOpcode() == ISD::SUB) {
3538  SDValue N01 = N0.getOperand(1);
3539  if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3540  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3541  }
3542 
3543  // fold (c1-A)-c2 -> (c1-c2)-A
3544  if (N0.getOpcode() == ISD::SUB) {
3545  SDValue N00 = N0.getOperand(0);
3546  if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3547  return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3548  }
3549 
3550  // fold ((A+(B+or-C))-B) -> A+or-C
3551  if (N0.getOpcode() == ISD::ADD &&
3552  (N0.getOperand(1).getOpcode() == ISD::SUB ||
3553  N0.getOperand(1).getOpcode() == ISD::ADD) &&
3554  N0.getOperand(1).getOperand(0) == N1)
3555  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3556  N0.getOperand(1).getOperand(1));
3557 
3558  // fold ((A+(C+B))-B) -> A+C
3559  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3560  N0.getOperand(1).getOperand(1) == N1)
3561  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3562  N0.getOperand(1).getOperand(0));
3563 
3564  // fold ((A-(B-C))-C) -> A-B
3565  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3566  N0.getOperand(1).getOperand(1) == N1)
3567  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3568  N0.getOperand(1).getOperand(0));
3569 
3570  // fold (A-(B-C)) -> A+(C-B)
3571  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3572  return DAG.getNode(ISD::ADD, DL, VT, N0,
3573  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3574  N1.getOperand(0)));
3575 
3576  // A - (A & B) -> A & (~B)
3577  if (N1.getOpcode() == ISD::AND) {
3578  SDValue A = N1.getOperand(0);
3579  SDValue B = N1.getOperand(1);
3580  if (A != N0)
3581  std::swap(A, B);
3582  if (A == N0 &&
3583  (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3584  SDValue InvB =
3585  DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3586  return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3587  }
3588  }
3589 
3590  // fold (X - (-Y * Z)) -> (X + (Y * Z))
3591  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3592  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3594  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3595  N1.getOperand(0).getOperand(1),
3596  N1.getOperand(1));
3597  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3598  }
3599  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3601  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3602  N1.getOperand(0),
3603  N1.getOperand(1).getOperand(1));
3604  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3605  }
3606  }
3607 
3608  // If either operand of a sub is undef, the result is undef
3609  if (N0.isUndef())
3610  return N0;
3611  if (N1.isUndef())
3612  return N1;
3613 
3614  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3615  return V;
3616 
3617  if (SDValue V = foldAddSubOfSignBit(N, DAG))
3618  return V;
3619 
3620  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3621  return V;
3622 
3623  if (SDValue V = foldSubToUSubSat(VT, N))
3624  return V;
3625 
3626  // (x - y) - 1 -> add (xor y, -1), x
3627  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3628  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3629  DAG.getAllOnesConstant(DL, VT));
3630  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3631  }
3632 
3633  // Look for:
3634  // sub y, (xor x, -1)
3635  // And if the target does not like this form then turn into:
3636  // add (add x, y), 1
3637  if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3638  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3639  return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3640  }
3641 
3642  // Hoist one-use addition by non-opaque constant:
3643  // (x + C) - y -> (x - y) + C
3644  if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3645  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3646  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3647  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3648  }
3649  // y - (x + C) -> (y - x) - C
3650  if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3651  isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3652  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3653  return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3654  }
3655  // (x - C) - y -> (x - y) - C
3656  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3657  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3658  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3659  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3660  return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3661  }
3662  // (C - x) - y -> C - (x + y)
3663  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3664  isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3665  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3666  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3667  }
3668 
3669  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3670  // rather than 'sub 0/1' (the sext should get folded).
3671  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3672  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3673  N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3674  TLI.getBooleanContents(VT) ==
3676  SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3677  return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3678  }
3679 
3680  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3681  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3682  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3683  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3684  SDValue S0 = N1.getOperand(0);
3685  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3687  if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3688  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3689  }
3690  }
3691 
3692  // If the relocation model supports it, consider symbol offsets.
3693  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3694  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3695  // fold (sub Sym, c) -> Sym-c
3696  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3697  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3698  GA->getOffset() -
3699  (uint64_t)N1C->getSExtValue());
3700  // fold (sub Sym+c1, Sym+c2) -> c1-c2
3701  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3702  if (GA->getGlobal() == GB->getGlobal())
3703  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3704  DL, VT);
3705  }
3706 
3707  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3708  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3709  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3710  if (TN->getVT() == MVT::i1) {
3711  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3712  DAG.getConstant(1, DL, VT));
3713  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3714  }
3715  }
3716 
3717  // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3718  if (N1.getOpcode() == ISD::VSCALE) {
3719  const APInt &IntVal = N1.getConstantOperandAPInt(0);
3720  return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3721  }
3722 
3723  // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3724  if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3725  APInt NewStep = -N1.getConstantOperandAPInt(0);
3726  return DAG.getNode(ISD::ADD, DL, VT, N0,
3727  DAG.getStepVector(DL, VT, NewStep));
3728  }
3729 
3730  // Prefer an add for more folding potential and possibly better codegen:
3731  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3732  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3733  SDValue ShAmt = N1.getOperand(1);
3734  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3735  if (ShAmtC &&
3736  ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3737  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3738  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3739  }
3740  }
3741 
3742  // As with the previous fold, prefer add for more folding potential.
3743  // Subtracting SMIN/0 is the same as adding SMIN/0:
3744  // N0 - (X << BW-1) --> N0 + (X << BW-1)
3745  if (N1.getOpcode() == ISD::SHL) {
3747  if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
3748  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
3749  }
3750 
3751  if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3752  // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3753  if (SDValue Carry = getAsCarry(TLI, N0)) {
3754  SDValue X = N1;
3755  SDValue Zero = DAG.getConstant(0, DL, VT);
3756  SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3757  return DAG.getNode(ISD::ADDCARRY, DL,
3758  DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3759  Carry);
3760  }
3761  }
3762 
3763  return SDValue();
3764 }
3765 
3766 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3767  SDValue N0 = N->getOperand(0);
3768  SDValue N1 = N->getOperand(1);
3769  EVT VT = N0.getValueType();
3770  SDLoc DL(N);
3771 
3772  // fold (sub_sat x, undef) -> 0
3773  if (N0.isUndef() || N1.isUndef())
3774  return DAG.getConstant(0, DL, VT);
3775 
3776  // fold (sub_sat x, x) -> 0
3777  if (N0 == N1)
3778  return DAG.getConstant(0, DL, VT);
3779 
3780  // fold (sub_sat c1, c2) -> c3
3781  if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3782  return C;
3783 
3784  // fold vector ops
3785  if (VT.isVector()) {
3786  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3787  return FoldedVOp;
3788 
3789  // fold (sub_sat x, 0) -> x, vector edition
3791  return N0;
3792  }
3793 
3794  // fold (sub_sat x, 0) -> x
3795  if (isNullConstant(N1))
3796  return N0;
3797 
3798  return SDValue();
3799 }
3800 
3801 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3802  SDValue N0 = N->getOperand(0);
3803  SDValue N1 = N->getOperand(1);
3804  EVT VT = N0.getValueType();
3805  SDLoc DL(N);
3806 
3807  // If the flag result is dead, turn this into an SUB.
3808  if (!N->hasAnyUseOfValue(1))
3809  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3811 
3812  // fold (subc x, x) -> 0 + no borrow
3813  if (N0 == N1)
3814  return CombineTo(N, DAG.getConstant(0, DL, VT),
3816 
3817  // fold (subc x, 0) -> x + no borrow
3818  if (isNullConstant(N1))
3819  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3820 
3821  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3822  if (isAllOnesConstant(N0))
3823  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3825 
3826  return SDValue();
3827 }
3828 
3829 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3830  SDValue N0 = N->getOperand(0);
3831  SDValue N1 = N->getOperand(1);
3832  EVT VT = N0.getValueType();
3833  bool IsSigned = (ISD::SSUBO == N->getOpcode());
3834 
3835  EVT CarryVT = N->getValueType(1);
3836  SDLoc DL(N);
3837 
3838  // If the flag result is dead, turn this into an SUB.
3839  if (!N->hasAnyUseOfValue(1))
3840  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3841  DAG.getUNDEF(CarryVT));
3842 
3843  // fold (subo x, x) -> 0 + no borrow
3844  if (N0 == N1)
3845  return CombineTo(N, DAG.getConstant(0, DL, VT),
3846  DAG.getConstant(0, DL, CarryVT));
3847 
3849 
3850  // fold (subox, c) -> (addo x, -c)
3851  if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3852  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3853  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3854  }
3855 
3856  // fold (subo x, 0) -> x + no borrow
3857  if (isNullOrNullSplat(N1))
3858  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3859 
3860  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3861  if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3862  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3863  DAG.getConstant(0, DL, CarryVT));
3864 
3865  return SDValue();
3866 }
3867 
3868 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3869  SDValue N0 = N->getOperand(0);
3870  SDValue N1 = N->getOperand(1);
3871  SDValue CarryIn = N->getOperand(2);
3872 
3873  // fold (sube x, y, false) -> (subc x, y)
3874  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3875  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3876 
3877  return SDValue();
3878 }
3879 
3880 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3881  SDValue N0 = N->getOperand(0);
3882  SDValue N1 = N->getOperand(1);
3883  SDValue CarryIn = N->getOperand(2);
3884 
3885  // fold (subcarry x, y, false) -> (usubo x, y)
3886  if (isNullConstant(CarryIn)) {
3887  if (!LegalOperations ||
3888  TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3889  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3890  }
3891 
3892  return SDValue();
3893 }
3894 
3895 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3896  SDValue N0 = N->getOperand(0);
3897  SDValue N1 = N->getOperand(1);
3898  SDValue CarryIn = N->getOperand(2);
3899 
3900  // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3901  if (isNullConstant(CarryIn)) {
3902  if (!LegalOperations ||
3903  TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3904  return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3905  }
3906 
3907  return SDValue();
3908 }
3909 
3910 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3911 // UMULFIXSAT here.
3912 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3913  SDValue N0 = N->getOperand(0);
3914  SDValue N1 = N->getOperand(1);
3915  SDValue Scale = N->getOperand(2);
3916  EVT VT = N0.getValueType();
3917 
3918  // fold (mulfix x, undef, scale) -> 0
3919  if (N0.isUndef() || N1.isUndef())
3920  return DAG.getConstant(0, SDLoc(N), VT);
3921 
3922  // Canonicalize constant to RHS (vector doesn't have to splat)
3925  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3926 
3927  // fold (mulfix x, 0, scale) -> 0
3928  if (isNullConstant(N1))
3929  return DAG.getConstant(0, SDLoc(N), VT);
3930 
3931  return SDValue();
3932 }
3933 
3934 SDValue DAGCombiner::visitMUL(SDNode *N) {
3935  SDValue N0 = N->getOperand(0);
3936  SDValue N1 = N->getOperand(1);
3937  EVT VT = N0.getValueType();
3938  SDLoc DL(N);
3939 
3940  // fold (mul x, undef) -> 0
3941  if (N0.isUndef() || N1.isUndef())
3942  return DAG.getConstant(0, DL, VT);
3943 
3944  // fold (mul c1, c2) -> c1*c2
3945  if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
3946  return C;
3947 
3948  // canonicalize constant to RHS (vector doesn't have to splat)
3951  return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
3952 
3953  bool N1IsConst = false;
3954  bool N1IsOpaqueConst = false;
3955  APInt ConstValue1;
3956 
3957  // fold vector ops
3958  if (VT.isVector()) {
3959  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3960  return FoldedVOp;
3961 
3962  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3963  assert((!N1IsConst ||
3964  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3965  "Splat APInt should be element width");
3966  } else {
3967  N1IsConst = isa<ConstantSDNode>(N1);
3968  if (N1IsConst) {
3969  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3970  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3971  }
3972  }
3973 
3974  // fold (mul x, 0) -> 0
3975  if (N1IsConst && ConstValue1.isZero())
3976  return N1;
3977 
3978  // fold (mul x, 1) -> x
3979  if (N1IsConst && ConstValue1.isOne())
3980  return N0;
3981 
3982  if (SDValue NewSel = foldBinOpIntoSelect(N))
3983  return NewSel;
3984 
3985  // fold (mul x, -1) -> 0-x
3986  if (N1IsConst && ConstValue1.isAllOnes())
3987  return DAG.getNode(ISD::SUB, DL, VT,
3988  DAG.getConstant(0, DL, VT), N0);
3989 
3990  // fold (mul x, (1 << c)) -> x << c
3991  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3992  DAG.isKnownToBeAPowerOfTwo(N1) &&
3993  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3994  SDValue LogBase2 = BuildLogBase2(N1, DL);
3995  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3996  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3997  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3998  }
3999 
4000  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4001  if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4002  unsigned Log2Val = (-ConstValue1).logBase2();
4003  // FIXME: If the input is something that is easily negated (e.g. a
4004  // single-use add), we should put the negate there.
4005  return DAG.getNode(ISD::SUB, DL, VT,
4006  DAG.getConstant(0, DL, VT),
4007  DAG.getNode(ISD::SHL, DL, VT, N0,
4008  DAG.getConstant(Log2Val, DL,
4009  getShiftAmountTy(N0.getValueType()))));
4010  }
4011 
4012  // Try to transform:
4013  // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4014  // mul x, (2^N + 1) --> add (shl x, N), x
4015  // mul x, (2^N - 1) --> sub (shl x, N), x
4016  // Examples: x * 33 --> (x << 5) + x
4017  // x * 15 --> (x << 4) - x
4018  // x * -33 --> -((x << 5) + x)
4019  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4020  // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4021  // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4022  // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4023  // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4024  // x * 0xf800 --> (x << 16) - (x << 11)
4025  // x * -0x8800 --> -((x << 15) + (x << 11))
4026  // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4027  if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4028  // TODO: We could handle more general decomposition of any constant by
4029  // having the target set a limit on number of ops and making a
4030  // callback to determine that sequence (similar to sqrt expansion).
4031  unsigned MathOp = ISD::DELETED_NODE;
4032  APInt MulC = ConstValue1.abs();
4033  // The constant `2` should be treated as (2^0 + 1).
4034  unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
4035  MulC.lshrInPlace(TZeros);
4036  if ((MulC - 1).isPowerOf2())
4037  MathOp = ISD::ADD;
4038  else if ((MulC + 1).isPowerOf2())
4039  MathOp = ISD::SUB;
4040 
4041  if (MathOp != ISD::DELETED_NODE) {
4042  unsigned ShAmt =
4043  MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4044  ShAmt += TZeros;
4045  assert(ShAmt < VT.getScalarSizeInBits() &&
4046  "multiply-by-constant generated out of bounds shift");
4047  SDValue Shl =
4048  DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4049  SDValue R =
4050  TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4051  DAG.getNode(ISD::SHL, DL, VT, N0,
4052  DAG.getConstant(TZeros, DL, VT)))
4053  : DAG.getNode(MathOp, DL, VT, Shl, N0);
4054  if (ConstValue1.isNegative())
4055  R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
4056  return R;
4057  }
4058  }
4059 
4060  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4061  if (N0.getOpcode() == ISD::SHL) {
4062  SDValue N01 = N0.getOperand(1);
4063  if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4064  return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4065  }
4066 
4067  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4068  // use.
4069  {
4070  SDValue Sh, Y;
4071 
4072  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4073  if (N0.getOpcode() == ISD::SHL &&
4075  Sh = N0; Y = N1;
4076  } else if (N1.getOpcode() == ISD::SHL &&
4078  N1->hasOneUse()) {
4079  Sh = N1; Y = N0;
4080  }
4081 
4082  if (Sh.getNode()) {
4083  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4084  return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4085  }
4086  }
4087 
4088  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4090  N0.getOpcode() == ISD::ADD &&
4092  isMulAddWithConstProfitable(N, N0, N1))
4093  return DAG.getNode(
4094  ISD::ADD, DL, VT,
4095  DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4096  DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4097 
4098  // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4099  if (N0.getOpcode() == ISD::VSCALE)
4100  if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
4101  const APInt &C0 = N0.getConstantOperandAPInt(0);
4102  const APInt &C1 = NC1->getAPIntValue();
4103  return DAG.getVScale(DL, VT, C0 * C1);
4104  }
4105 
4106  // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4107  APInt MulVal;
4108  if (N0.getOpcode() == ISD::STEP_VECTOR)
4109  if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4110  const APInt &C0 = N0.getConstantOperandAPInt(0);
4111  APInt NewStep = C0 * MulVal;
4112  return DAG.getStepVector(DL, VT, NewStep);
4113  }
4114 
4115  // Fold ((mul x, 0/undef) -> 0,
4116  // (mul x, 1) -> x) -> x)
4117  // -> and(x, mask)
4118  // We can replace vectors with '0' and '1' factors with a clearing mask.
4119  if (VT.isFixedLengthVector()) {
4120  unsigned NumElts = VT.getVectorNumElements();
4121  SmallBitVector ClearMask;
4122  ClearMask.reserve(NumElts);
4123  auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4124  if (!V || V->isZero()) {
4125  ClearMask.push_back(true);
4126  return true;
4127  }
4128  ClearMask.push_back(false);
4129  return V->isOne();
4130  };
4131  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4132  ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4133  assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4134  EVT LegalSVT = N1.getOperand(0).getValueType();
4135  SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4136  SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4137  SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
4138  for (unsigned I = 0; I != NumElts; ++I)
4139  if (ClearMask[I])
4140  Mask[I] = Zero;
4141  return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4142  }
4143  }
4144 
4145  // reassociate mul
4146  if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4147  return RMUL;
4148 
4149  return SDValue();
4150 }
4151 
4152 /// Return true if divmod libcall is available.
4153 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4154  const TargetLowering &TLI) {
4155  RTLIB::Libcall LC;
4156  EVT NodeType = Node->getValueType(0);
4157  if (!NodeType.isSimple())
4158  return false;
4159  switch (NodeType.getSimpleVT().SimpleTy) {
4160  default: return false; // No libcall for vector types.
4161  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4162  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4163  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4164  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4165  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4166  }
4167 
4168  return TLI.getLibcallName(LC) != nullptr;
4169 }
4170 
4171 /// Issue divrem if both quotient and remainder are needed.
4172 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4173  if (Node->use_empty())
4174  return SDValue(); // This is a dead node, leave it alone.
4175 
4176  unsigned Opcode = Node->getOpcode();
4177  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4178  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4179 
4180  // DivMod lib calls can still work on non-legal types if using lib-calls.
4181  EVT VT = Node->getValueType(0);
4182  if (VT.isVector() || !VT.isInteger())
4183  return SDValue();
4184 
4185  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4186  return SDValue();
4187 
4188  // If DIVREM is going to get expanded into a libcall,
4189  // but there is no libcall available, then don't combine.
4190  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4191  !isDivRemLibcallAvailable(Node, isSigned, TLI))
4192  return SDValue();
4193 
4194  // If div is legal, it's better to do the normal expansion
4195  unsigned OtherOpcode = 0;
4196  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4197  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4198  if (TLI.isOperationLegalOrCustom(Opcode, VT))
4199  return SDValue();
4200  } else {
4201  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4202  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4203  return SDValue();
4204  }
4205 
4206  SDValue Op0 = Node->getOperand(0);
4207  SDValue Op1 = Node->getOperand(1);
4208  SDValue combined;
4209  for (SDNode *User : Op0->uses()) {
4210  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4211  User->use_empty())
4212  continue;
4213  // Convert the other matching node(s), too;
4214  // otherwise, the DIVREM may get target-legalized into something
4215  // target-specific that we won't be able to recognize.
4216  unsigned UserOpc = User->getOpcode();
4217  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4218  User->getOperand(0) == Op0 &&
4219  User->getOperand(1) == Op1) {
4220  if (!combined) {
4221  if (UserOpc == OtherOpcode) {
4222  SDVTList VTs = DAG.getVTList(VT, VT);
4223  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4224  } else if (UserOpc == DivRemOpc) {
4225  combined = SDValue(User, 0);
4226  } else {
4227  assert(UserOpc == Opcode);
4228  continue;
4229  }
4230  }
4231  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4232  CombineTo(User, combined);
4233  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4234  CombineTo(User, combined.getValue(1));
4235  }
4236  }
4237  return combined;
4238 }
4239 
4241  SDValue N0 = N->getOperand(0);
4242  SDValue N1 = N->getOperand(1);
4243  EVT VT = N->getValueType(0);
4244  SDLoc DL(N);
4245 
4246  unsigned Opc = N->getOpcode();
4247  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4249 
4250  // X / undef -> undef
4251  // X % undef -> undef
4252  // X / 0 -> undef
4253  // X % 0 -> undef
4254  // NOTE: This includes vectors where any divisor element is zero/undef.
4255  if (DAG.isUndef(Opc, {N0, N1}))
4256  return DAG.getUNDEF(VT);
4257 
4258  // undef / X -> 0
4259  // undef % X -> 0
4260  if (N0.isUndef())
4261  return DAG.getConstant(0, DL, VT);
4262 
4263  // 0 / X -> 0
4264  // 0 % X -> 0
4266  if (N0C && N0C->isZero())
4267  return N0;
4268 
4269  // X / X -> 1
4270  // X % X -> 0
4271  if (N0 == N1)
4272  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4273 
4274  // X / 1 -> X
4275  // X % 1 -> 0
4276  // If this is a boolean op (single-bit element type), we can't have
4277  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4278  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4279  // it's a 1.
4280  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4281  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4282 
4283  return SDValue();
4284 }
4285 
4286 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4287  SDValue N0 = N->getOperand(0);
4288  SDValue N1 = N->getOperand(1);
4289  EVT VT = N->getValueType(0);
4290  EVT CCVT = getSetCCResultType(VT);
4291  SDLoc DL(N);
4292 
4293  // fold (sdiv c1, c2) -> c1/c2
4294  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4295  return C;
4296 
4297  // fold vector ops
4298  if (VT.isVector())
4299  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4300  return FoldedVOp;
4301 
4302  // fold (sdiv X, -1) -> 0-X
4304  if (N1C && N1C->isAllOnes())
4305  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4306 
4307  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4308  if (N1C && N1C->getAPIntValue().isMinSignedValue())
4309  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4310  DAG.getConstant(1, DL, VT),
4311  DAG.getConstant(0, DL, VT));
4312 
4313  if (SDValue V = simplifyDivRem(N, DAG))
4314  return V;
4315 
4316  if (SDValue NewSel = foldBinOpIntoSelect(N))
4317  return NewSel;
4318 
4319  // If we know the sign bits of both operands are zero, strength reduce to a
4320  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4321  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4322  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4323 
4324  if (SDValue V = visitSDIVLike(N0, N1, N)) {
4325  // If the corresponding remainder node exists, update its users with
4326  // (Dividend - (Quotient * Divisor).
4327  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4328  { N0, N1 })) {
4329  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4330  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4331  AddToWorklist(Mul.getNode());
4332  AddToWorklist(Sub.getNode());
4333  CombineTo(RemNode, Sub);
4334  }
4335  return V;
4336  }
4337 
4338  // sdiv, srem -> sdivrem
4339  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4340  // true. Otherwise, we break the simplification logic in visitREM().
4342  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4343  if (SDValue DivRem = useDivRem(N))
4344  return DivRem;
4345 
4346  return SDValue();
4347 }
4348 
4349 static bool isDivisorPowerOfTwo(SDValue Divisor) {
4350  // Helper for determining whether a value is a power-2 constant scalar or a
4351  // vector of such elements.
4352  auto IsPowerOfTwo = [](ConstantSDNode *C) {
4353  if (C->isZero() || C->isOpaque())
4354  return false;
4355  if (C->getAPIntValue().isPowerOf2())
4356  return true;
4357  if (C->getAPIntValue().isNegatedPowerOf2())
4358  return true;
4359  return false;
4360  };
4361 
4362  return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4363 }
4364 
4365 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4366  SDLoc DL(N);
4367  EVT VT = N->getValueType(0);
4368  EVT CCVT = getSetCCResultType(VT);
4369  unsigned BitWidth = VT.getScalarSizeInBits();
4370 
4371  // fold (sdiv X, pow2) -> simple ops after legalize
4372  // FIXME: We check for the exact bit here because the generic lowering gives
4373  // better results in that case. The target-specific lowering should learn how
4374  // to handle exact sdivs efficiently.
4375  if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4376  // Target-specific implementation of sdiv x, pow2.
4377  if (SDValue Res = BuildSDIVPow2(N))
4378  return Res;
4379 
4380  // Create constants that are functions of the shift amount value.
4381  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4382  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4383  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4384  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4385  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4386  if (!isConstantOrConstantVector(Inexact))
4387  return SDValue();
4388 
4389  // Splat the sign bit into the register
4390  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4391  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4392  AddToWorklist(Sign.getNode());
4393 
4394  // Add (N0 < 0) ? abs2 - 1 : 0;
4395  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4396  AddToWorklist(Srl.getNode());
4397  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4398  AddToWorklist(Add.getNode());
4399  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4400  AddToWorklist(Sra.getNode());
4401 
4402  <