LLVM  14.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/Constant.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CodeGen.h"
60 #include "llvm/Support/Compiler.h"
61 #include "llvm/Support/Debug.h"
63 #include "llvm/Support/KnownBits.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <functional>
73 #include <iterator>
74 #include <string>
75 #include <tuple>
76 #include <utility>
77 
78 using namespace llvm;
79 
80 #define DEBUG_TYPE "dagcombine"
81 
82 STATISTIC(NodesCombined , "Number of dag nodes combined");
83 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
84 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
85 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
86 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
87 STATISTIC(SlicedLoads, "Number of load sliced");
88 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
89 
90 static cl::opt<bool>
91 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92  cl::desc("Enable DAG combiner's use of IR alias analysis"));
93 
94 static cl::opt<bool>
95 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96  cl::desc("Enable DAG combiner's use of TBAA"));
97 
98 #ifndef NDEBUG
100 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101  cl::desc("Only use DAG-combiner alias analysis in this"
102  " function"));
103 #endif
104 
105 /// Hidden option to stress test load slicing, i.e., when this option
106 /// is enabled, load slicing bypasses most of its profitability guards.
107 static cl::opt<bool>
108 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109  cl::desc("Bypass the profitability model of load slicing"),
110  cl::init(false));
111 
112 static cl::opt<bool>
113  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114  cl::desc("DAG combiner may split indexing from loads"));
115 
116 static cl::opt<bool>
117  EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118  cl::desc("DAG combiner enable merging multiple stores "
119  "into a wider store"));
120 
122  "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123  cl::desc("Limit the number of operands to inline for Token Factors"));
124 
126  "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127  cl::desc("Limit the number of times for the same StoreNode and RootNode "
128  "to bail out in store merging dependence check"));
129 
131  "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132  cl::desc("DAG cominber enable reducing the width of load/op/store "
133  "sequence"));
134 
136  "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137  cl::desc("DAG cominber enable load/<replace bytes>/store with "
138  "a narrower store"));
139 
140 namespace {
141 
142  class DAGCombiner {
143  SelectionDAG &DAG;
144  const TargetLowering &TLI;
145  const SelectionDAGTargetInfo *STI;
147  CodeGenOpt::Level OptLevel;
148  bool LegalDAG = false;
149  bool LegalOperations = false;
150  bool LegalTypes = false;
151  bool ForCodeSize;
152  bool DisableGenericCombines;
153 
154  /// Worklist of all of the nodes that need to be simplified.
155  ///
156  /// This must behave as a stack -- new nodes to process are pushed onto the
157  /// back and when processing we pop off of the back.
158  ///
159  /// The worklist will not contain duplicates but may contain null entries
160  /// due to nodes being deleted from the underlying DAG.
161  SmallVector<SDNode *, 64> Worklist;
162 
163  /// Mapping from an SDNode to its position on the worklist.
164  ///
165  /// This is used to find and remove nodes from the worklist (by nulling
166  /// them) when they are deleted from the underlying DAG. It relies on
167  /// stable indices of nodes within the worklist.
168  DenseMap<SDNode *, unsigned> WorklistMap;
169  /// This records all nodes attempted to add to the worklist since we
170  /// considered a new worklist entry. As we keep do not add duplicate nodes
171  /// in the worklist, this is different from the tail of the worklist.
172  SmallSetVector<SDNode *, 32> PruningList;
173 
174  /// Set of nodes which have been combined (at least once).
175  ///
176  /// This is used to allow us to reliably add any operands of a DAG node
177  /// which have not yet been combined to the worklist.
178  SmallPtrSet<SDNode *, 32> CombinedNodes;
179 
180  /// Map from candidate StoreNode to the pair of RootNode and count.
181  /// The count is used to track how many times we have seen the StoreNode
182  /// with the same RootNode bail out in dependence check. If we have seen
183  /// the bail out for the same pair many times over a limit, we won't
184  /// consider the StoreNode with the same RootNode as store merging
185  /// candidate again.
187 
188  // AA - Used for DAG load/store alias analysis.
189  AliasAnalysis *AA;
190 
191  /// When an instruction is simplified, add all users of the instruction to
192  /// the work lists because they might get more simplified now.
193  void AddUsersToWorklist(SDNode *N) {
194  for (SDNode *Node : N->uses())
195  AddToWorklist(Node);
196  }
197 
198  /// Convenient shorthand to add a node and all of its user to the worklist.
199  void AddToWorklistWithUsers(SDNode *N) {
200  AddUsersToWorklist(N);
201  AddToWorklist(N);
202  }
203 
204  // Prune potentially dangling nodes. This is called after
205  // any visit to a node, but should also be called during a visit after any
206  // failed combine which may have created a DAG node.
207  void clearAddedDanglingWorklistEntries() {
208  // Check any nodes added to the worklist to see if they are prunable.
209  while (!PruningList.empty()) {
210  auto *N = PruningList.pop_back_val();
211  if (N->use_empty())
212  recursivelyDeleteUnusedNodes(N);
213  }
214  }
215 
216  SDNode *getNextWorklistEntry() {
217  // Before we do any work, remove nodes that are not in use.
218  clearAddedDanglingWorklistEntries();
219  SDNode *N = nullptr;
220  // The Worklist holds the SDNodes in order, but it may contain null
221  // entries.
222  while (!N && !Worklist.empty()) {
223  N = Worklist.pop_back_val();
224  }
225 
226  if (N) {
227  bool GoodWorklistEntry = WorklistMap.erase(N);
228  (void)GoodWorklistEntry;
229  assert(GoodWorklistEntry &&
230  "Found a worklist entry without a corresponding map entry!");
231  }
232  return N;
233  }
234 
235  /// Call the node-specific routine that folds each particular type of node.
236  SDValue visit(SDNode *N);
237 
238  public:
239  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
240  : DAG(D), TLI(D.getTargetLoweringInfo()),
241  STI(D.getSubtarget().getSelectionDAGInfo()),
242  Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
243  ForCodeSize = DAG.shouldOptForSize();
244  DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
245 
246  MaximumLegalStoreInBits = 0;
247  // We use the minimum store size here, since that's all we can guarantee
248  // for the scalable vector types.
249  for (MVT VT : MVT::all_valuetypes())
250  if (EVT(VT).isSimple() && VT != MVT::Other &&
251  TLI.isTypeLegal(EVT(VT)) &&
252  VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
253  MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
254  }
255 
256  void ConsiderForPruning(SDNode *N) {
257  // Mark this for potential pruning.
258  PruningList.insert(N);
259  }
260 
261  /// Add to the worklist making sure its instance is at the back (next to be
262  /// processed.)
263  void AddToWorklist(SDNode *N) {
264  assert(N->getOpcode() != ISD::DELETED_NODE &&
265  "Deleted Node added to Worklist");
266 
267  // Skip handle nodes as they can't usefully be combined and confuse the
268  // zero-use deletion strategy.
269  if (N->getOpcode() == ISD::HANDLENODE)
270  return;
271 
272  ConsiderForPruning(N);
273 
274  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
275  Worklist.push_back(N);
276  }
277 
278  /// Remove all instances of N from the worklist.
279  void removeFromWorklist(SDNode *N) {
280  CombinedNodes.erase(N);
281  PruningList.remove(N);
282  StoreRootCountMap.erase(N);
283 
284  auto It = WorklistMap.find(N);
285  if (It == WorklistMap.end())
286  return; // Not in the worklist.
287 
288  // Null out the entry rather than erasing it to avoid a linear operation.
289  Worklist[It->second] = nullptr;
290  WorklistMap.erase(It);
291  }
292 
293  void deleteAndRecombine(SDNode *N);
294  bool recursivelyDeleteUnusedNodes(SDNode *N);
295 
296  /// Replaces all uses of the results of one DAG node with new values.
297  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
298  bool AddTo = true);
299 
300  /// Replaces all uses of the results of one DAG node with new values.
301  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
302  return CombineTo(N, &Res, 1, AddTo);
303  }
304 
305  /// Replaces all uses of the results of one DAG node with new values.
306  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
307  bool AddTo = true) {
308  SDValue To[] = { Res0, Res1 };
309  return CombineTo(N, To, 2, AddTo);
310  }
311 
312  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
313 
314  private:
315  unsigned MaximumLegalStoreInBits;
316 
317  /// Check the specified integer node value to see if it can be simplified or
318  /// if things it uses can be simplified by bit propagation.
319  /// If so, return true.
320  bool SimplifyDemandedBits(SDValue Op) {
321  unsigned BitWidth = Op.getScalarValueSizeInBits();
323  return SimplifyDemandedBits(Op, DemandedBits);
324  }
325 
326  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
327  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
328  KnownBits Known;
329  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
330  return false;
331 
332  // Revisit the node.
333  AddToWorklist(Op.getNode());
334 
335  CommitTargetLoweringOpt(TLO);
336  return true;
337  }
338 
339  /// Check the specified vector node value to see if it can be simplified or
340  /// if things it uses can be simplified as it only uses some of the
341  /// elements. If so, return true.
342  bool SimplifyDemandedVectorElts(SDValue Op) {
343  // TODO: For now just pretend it cannot be simplified.
344  if (Op.getValueType().isScalableVector())
345  return false;
346 
347  unsigned NumElts = Op.getValueType().getVectorNumElements();
348  APInt DemandedElts = APInt::getAllOnes(NumElts);
349  return SimplifyDemandedVectorElts(Op, DemandedElts);
350  }
351 
352  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
353  const APInt &DemandedElts,
354  bool AssumeSingleUse = false);
355  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
356  bool AssumeSingleUse = false);
357 
358  bool CombineToPreIndexedLoadStore(SDNode *N);
359  bool CombineToPostIndexedLoadStore(SDNode *N);
360  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
361  bool SliceUpLoad(SDNode *N);
362 
363  // Scalars have size 0 to distinguish from singleton vectors.
364  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
365  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
366  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
367 
368  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
369  /// load.
370  ///
371  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
372  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
373  /// \param EltNo index of the vector element to load.
374  /// \param OriginalLoad load that EVE came from to be replaced.
375  /// \returns EVE on success SDValue() on failure.
376  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
377  SDValue EltNo,
378  LoadSDNode *OriginalLoad);
379  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
380  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
381  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
382  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
383  SDValue PromoteIntBinOp(SDValue Op);
384  SDValue PromoteIntShiftOp(SDValue Op);
385  SDValue PromoteExtend(SDValue Op);
386  bool PromoteLoad(SDValue Op);
387 
388  /// Call the node-specific routine that knows how to fold each
389  /// particular type of node. If that doesn't do anything, try the
390  /// target-specific DAG combines.
392 
393  // Visitation implementation - Implement dag node combining for different
394  // node types. The semantics are as follows:
395  // Return Value:
396  // SDValue.getNode() == 0 - No change was made
397  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
398  // otherwise - N should be replaced by the returned Operand.
399  //
400  SDValue visitTokenFactor(SDNode *N);
401  SDValue visitMERGE_VALUES(SDNode *N);
402  SDValue visitADD(SDNode *N);
403  SDValue visitADDLike(SDNode *N);
404  SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
405  SDValue visitSUB(SDNode *N);
406  SDValue visitADDSAT(SDNode *N);
407  SDValue visitSUBSAT(SDNode *N);
408  SDValue visitADDC(SDNode *N);
409  SDValue visitADDO(SDNode *N);
410  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
411  SDValue visitSUBC(SDNode *N);
412  SDValue visitSUBO(SDNode *N);
413  SDValue visitADDE(SDNode *N);
414  SDValue visitADDCARRY(SDNode *N);
415  SDValue visitSADDO_CARRY(SDNode *N);
416  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
417  SDValue visitSUBE(SDNode *N);
418  SDValue visitSUBCARRY(SDNode *N);
419  SDValue visitSSUBO_CARRY(SDNode *N);
420  SDValue visitMUL(SDNode *N);
421  SDValue visitMULFIX(SDNode *N);
422  SDValue useDivRem(SDNode *N);
423  SDValue visitSDIV(SDNode *N);
424  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
425  SDValue visitUDIV(SDNode *N);
426  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
427  SDValue visitREM(SDNode *N);
428  SDValue visitMULHU(SDNode *N);
429  SDValue visitMULHS(SDNode *N);
430  SDValue visitSMUL_LOHI(SDNode *N);
431  SDValue visitUMUL_LOHI(SDNode *N);
432  SDValue visitMULO(SDNode *N);
433  SDValue visitIMINMAX(SDNode *N);
434  SDValue visitAND(SDNode *N);
435  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
436  SDValue visitOR(SDNode *N);
437  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
438  SDValue visitXOR(SDNode *N);
439  SDValue SimplifyVBinOp(SDNode *N);
440  SDValue visitSHL(SDNode *N);
441  SDValue visitSRA(SDNode *N);
442  SDValue visitSRL(SDNode *N);
443  SDValue visitFunnelShift(SDNode *N);
444  SDValue visitRotate(SDNode *N);
445  SDValue visitABS(SDNode *N);
446  SDValue visitBSWAP(SDNode *N);
447  SDValue visitBITREVERSE(SDNode *N);
448  SDValue visitCTLZ(SDNode *N);
449  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
450  SDValue visitCTTZ(SDNode *N);
451  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
452  SDValue visitCTPOP(SDNode *N);
453  SDValue visitSELECT(SDNode *N);
454  SDValue visitVSELECT(SDNode *N);
455  SDValue visitSELECT_CC(SDNode *N);
456  SDValue visitSETCC(SDNode *N);
457  SDValue visitSETCCCARRY(SDNode *N);
458  SDValue visitSIGN_EXTEND(SDNode *N);
459  SDValue visitZERO_EXTEND(SDNode *N);
460  SDValue visitANY_EXTEND(SDNode *N);
461  SDValue visitAssertExt(SDNode *N);
462  SDValue visitAssertAlign(SDNode *N);
463  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
464  SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
465  SDValue visitTRUNCATE(SDNode *N);
466  SDValue visitBITCAST(SDNode *N);
467  SDValue visitFREEZE(SDNode *N);
468  SDValue visitBUILD_PAIR(SDNode *N);
469  SDValue visitFADD(SDNode *N);
470  SDValue visitSTRICT_FADD(SDNode *N);
471  SDValue visitFSUB(SDNode *N);
472  SDValue visitFMUL(SDNode *N);
473  SDValue visitFMA(SDNode *N);
474  SDValue visitFDIV(SDNode *N);
475  SDValue visitFREM(SDNode *N);
476  SDValue visitFSQRT(SDNode *N);
477  SDValue visitFCOPYSIGN(SDNode *N);
478  SDValue visitFPOW(SDNode *N);
479  SDValue visitSINT_TO_FP(SDNode *N);
480  SDValue visitUINT_TO_FP(SDNode *N);
481  SDValue visitFP_TO_SINT(SDNode *N);
482  SDValue visitFP_TO_UINT(SDNode *N);
483  SDValue visitFP_ROUND(SDNode *N);
484  SDValue visitFP_EXTEND(SDNode *N);
485  SDValue visitFNEG(SDNode *N);
486  SDValue visitFABS(SDNode *N);
487  SDValue visitFCEIL(SDNode *N);
488  SDValue visitFTRUNC(SDNode *N);
489  SDValue visitFFLOOR(SDNode *N);
490  SDValue visitFMINNUM(SDNode *N);
491  SDValue visitFMAXNUM(SDNode *N);
492  SDValue visitFMINIMUM(SDNode *N);
493  SDValue visitFMAXIMUM(SDNode *N);
494  SDValue visitBRCOND(SDNode *N);
495  SDValue visitBR_CC(SDNode *N);
496  SDValue visitLOAD(SDNode *N);
497 
498  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
499  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
500 
501  SDValue visitSTORE(SDNode *N);
502  SDValue visitLIFETIME_END(SDNode *N);
503  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
504  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
505  SDValue visitBUILD_VECTOR(SDNode *N);
506  SDValue visitCONCAT_VECTORS(SDNode *N);
507  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
508  SDValue visitVECTOR_SHUFFLE(SDNode *N);
509  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
510  SDValue visitINSERT_SUBVECTOR(SDNode *N);
511  SDValue visitMLOAD(SDNode *N);
512  SDValue visitMSTORE(SDNode *N);
513  SDValue visitMGATHER(SDNode *N);
514  SDValue visitMSCATTER(SDNode *N);
515  SDValue visitFP_TO_FP16(SDNode *N);
516  SDValue visitFP16_TO_FP(SDNode *N);
517  SDValue visitVECREDUCE(SDNode *N);
518 
519  SDValue visitFADDForFMACombine(SDNode *N);
520  SDValue visitFSUBForFMACombine(SDNode *N);
521  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
522 
523  SDValue XformToShuffleWithZero(SDNode *N);
524  bool reassociationCanBreakAddressingModePattern(unsigned Opc,
525  const SDLoc &DL, SDValue N0,
526  SDValue N1);
527  SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
528  SDValue N1);
529  SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
530  SDValue N1, SDNodeFlags Flags);
531 
532  SDValue visitShiftByConstant(SDNode *N);
533 
534  SDValue foldSelectOfConstants(SDNode *N);
535  SDValue foldVSelectOfConstants(SDNode *N);
536  SDValue foldBinOpIntoSelect(SDNode *BO);
537  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
538  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
539  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
540  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
541  SDValue N2, SDValue N3, ISD::CondCode CC,
542  bool NotExtCompare = false);
543  SDValue convertSelectOfFPConstantsToLoadOffset(
544  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
545  ISD::CondCode CC);
546  SDValue foldSignChangeInBitcast(SDNode *N);
547  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
548  SDValue N2, SDValue N3, ISD::CondCode CC);
549  SDValue foldSelectOfBinops(SDNode *N);
550  SDValue foldSextSetcc(SDNode *N);
551  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
552  const SDLoc &DL);
553  SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
554  SDValue unfoldMaskedMerge(SDNode *N);
555  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
556  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
557  const SDLoc &DL, bool foldBooleans);
558  SDValue rebuildSetCC(SDValue N);
559 
560  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
561  SDValue &CC, bool MatchStrict = false) const;
562  bool isOneUseSetCC(SDValue N) const;
563 
564  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
565  unsigned HiOp);
566  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
567  SDValue CombineExtLoad(SDNode *N);
568  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
569  SDValue combineRepeatedFPDivisors(SDNode *N);
570  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
571  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
572  SDValue BuildSDIV(SDNode *N);
573  SDValue BuildSDIVPow2(SDNode *N);
574  SDValue BuildUDIV(SDNode *N);
575  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
576  SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
577  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
578  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
579  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
580  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
581  SDNodeFlags Flags, bool Reciprocal);
582  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
583  SDNodeFlags Flags, bool Reciprocal);
584  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
585  bool DemandHighBits = true);
586  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
587  SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
588  SDValue InnerPos, SDValue InnerNeg,
589  unsigned PosOpcode, unsigned NegOpcode,
590  const SDLoc &DL);
591  SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
592  SDValue InnerPos, SDValue InnerNeg,
593  unsigned PosOpcode, unsigned NegOpcode,
594  const SDLoc &DL);
595  SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
596  SDValue MatchLoadCombine(SDNode *N);
597  SDValue mergeTruncStores(StoreSDNode *N);
598  SDValue ReduceLoadWidth(SDNode *N);
599  SDValue ReduceLoadOpStoreWidth(SDNode *N);
601  SDValue TransformFPLoadStorePair(SDNode *N);
602  SDValue convertBuildVecZextToZext(SDNode *N);
603  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
604  SDValue reduceBuildVecTruncToBitCast(SDNode *N);
605  SDValue reduceBuildVecToShuffle(SDNode *N);
606  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
607  ArrayRef<int> VectorMask, SDValue VecIn1,
608  SDValue VecIn2, unsigned LeftIdx,
609  bool DidSplitVec);
610  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
611 
612  /// Walk up chain skipping non-aliasing memory nodes,
613  /// looking for aliasing nodes and adding them to the Aliases vector.
614  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
615  SmallVectorImpl<SDValue> &Aliases);
616 
617  /// Return true if there is any possibility that the two addresses overlap.
618  bool isAlias(SDNode *Op0, SDNode *Op1) const;
619 
620  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
621  /// chain (aliasing node.)
622  SDValue FindBetterChain(SDNode *N, SDValue Chain);
623 
624  /// Try to replace a store and any possibly adjacent stores on
625  /// consecutive chains with better chains. Return true only if St is
626  /// replaced.
627  ///
628  /// Notice that other chains may still be replaced even if the function
629  /// returns false.
630  bool findBetterNeighborChains(StoreSDNode *St);
631 
632  // Helper for findBetterNeighborChains. Walk up store chain add additional
633  // chained stores that do not overlap and can be parallelized.
634  bool parallelizeChainedStores(StoreSDNode *St);
635 
636  /// Holds a pointer to an LSBaseSDNode as well as information on where it
637  /// is located in a sequence of memory operations connected by a chain.
638  struct MemOpLink {
639  // Ptr to the mem node.
640  LSBaseSDNode *MemNode;
641 
642  // Offset from the base ptr.
643  int64_t OffsetFromBase;
644 
645  MemOpLink(LSBaseSDNode *N, int64_t Offset)
646  : MemNode(N), OffsetFromBase(Offset) {}
647  };
648 
649  // Classify the origin of a stored value.
650  enum class StoreSource { Unknown, Constant, Extract, Load };
651  StoreSource getStoreSource(SDValue StoreVal) {
652  switch (StoreVal.getOpcode()) {
653  case ISD::Constant:
654  case ISD::ConstantFP:
655  return StoreSource::Constant;
658  return StoreSource::Extract;
659  case ISD::LOAD:
660  return StoreSource::Load;
661  default:
662  return StoreSource::Unknown;
663  }
664  }
665 
666  /// This is a helper function for visitMUL to check the profitability
667  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
668  /// MulNode is the original multiply, AddNode is (add x, c1),
669  /// and ConstNode is c2.
670  bool isMulAddWithConstProfitable(SDNode *MulNode,
671  SDValue &AddNode,
672  SDValue &ConstNode);
673 
674  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
675  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
676  /// the type of the loaded value to be extended.
677  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
678  EVT LoadResultTy, EVT &ExtVT);
679 
680  /// Helper function to calculate whether the given Load/Store can have its
681  /// width reduced to ExtVT.
682  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
683  EVT &MemVT, unsigned ShAmt = 0);
684 
685  /// Used by BackwardsPropagateMask to find suitable loads.
686  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
687  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
688  ConstantSDNode *Mask, SDNode *&NodeToMask);
689  /// Attempt to propagate a given AND node back to load leaves so that they
690  /// can be combined into narrow loads.
691  bool BackwardsPropagateMask(SDNode *N);
692 
693  /// Helper function for mergeConsecutiveStores which merges the component
694  /// store chains.
695  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
696  unsigned NumStores);
697 
698  /// This is a helper function for mergeConsecutiveStores. When the source
699  /// elements of the consecutive stores are all constants or all extracted
700  /// vector elements, try to merge them into one larger store introducing
701  /// bitcasts if necessary. \return True if a merged store was created.
702  bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
703  EVT MemVT, unsigned NumStores,
704  bool IsConstantSrc, bool UseVector,
705  bool UseTrunc);
706 
707  /// This is a helper function for mergeConsecutiveStores. Stores that
708  /// potentially may be merged with St are placed in StoreNodes. RootNode is
709  /// a chain predecessor to all store candidates.
710  void getStoreMergeCandidates(StoreSDNode *St,
711  SmallVectorImpl<MemOpLink> &StoreNodes,
712  SDNode *&Root);
713 
714  /// Helper function for mergeConsecutiveStores. Checks if candidate stores
715  /// have indirect dependency through their operands. RootNode is the
716  /// predecessor to all stores calculated by getStoreMergeCandidates and is
717  /// used to prune the dependency check. \return True if safe to merge.
718  bool checkMergeStoreCandidatesForDependencies(
719  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
720  SDNode *RootNode);
721 
722  /// This is a helper function for mergeConsecutiveStores. Given a list of
723  /// store candidates, find the first N that are consecutive in memory.
724  /// Returns 0 if there are not at least 2 consecutive stores to try merging.
725  unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
726  int64_t ElementSizeBytes) const;
727 
728  /// This is a helper function for mergeConsecutiveStores. It is used for
729  /// store chains that are composed entirely of constant values.
730  bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
731  unsigned NumConsecutiveStores,
732  EVT MemVT, SDNode *Root, bool AllowVectors);
733 
734  /// This is a helper function for mergeConsecutiveStores. It is used for
735  /// store chains that are composed entirely of extracted vector elements.
736  /// When extracting multiple vector elements, try to store them in one
737  /// vector store rather than a sequence of scalar stores.
738  bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
739  unsigned NumConsecutiveStores, EVT MemVT,
740  SDNode *Root);
741 
742  /// This is a helper function for mergeConsecutiveStores. It is used for
743  /// store chains that are composed entirely of loaded values.
744  bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
745  unsigned NumConsecutiveStores, EVT MemVT,
746  SDNode *Root, bool AllowVectors,
747  bool IsNonTemporalStore, bool IsNonTemporalLoad);
748 
749  /// Merge consecutive store operations into a wide store.
750  /// This optimization uses wide integers or vectors when possible.
751  /// \return true if stores were merged.
752  bool mergeConsecutiveStores(StoreSDNode *St);
753 
754  /// Try to transform a truncation where C is a constant:
755  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
756  ///
757  /// \p N needs to be a truncation and its first operand an AND. Other
758  /// requirements are checked by the function (e.g. that trunc is
759  /// single-use) and if missed an empty SDValue is returned.
760  SDValue distributeTruncateThroughAnd(SDNode *N);
761 
762  /// Helper function to determine whether the target supports operation
763  /// given by \p Opcode for type \p VT, that is, whether the operation
764  /// is legal or custom before legalizing operations, and whether is
765  /// legal (but not custom) after legalization.
766  bool hasOperation(unsigned Opcode, EVT VT) {
767  return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
768  }
769 
770  public:
771  /// Runs the dag combiner on all nodes in the work list
772  void Run(CombineLevel AtLevel);
773 
774  SelectionDAG &getDAG() const { return DAG; }
775 
776  /// Returns a type large enough to hold any valid shift amount - before type
777  /// legalization these can be huge.
778  EVT getShiftAmountTy(EVT LHSTy) {
779  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
780  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
781  }
782 
783  /// This method returns true if we are running before type legalization or
784  /// if the specified VT is legal.
785  bool isTypeLegal(const EVT &VT) {
786  if (!LegalTypes) return true;
787  return TLI.isTypeLegal(VT);
788  }
789 
790  /// Convenience wrapper around TargetLowering::getSetCCResultType
791  EVT getSetCCResultType(EVT VT) const {
792  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
793  }
794 
795  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
796  SDValue OrigLoad, SDValue ExtLoad,
797  ISD::NodeType ExtType);
798  };
799 
800 /// This class is a DAGUpdateListener that removes any deleted
801 /// nodes from the worklist.
802 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
803  DAGCombiner &DC;
804 
805 public:
806  explicit WorklistRemover(DAGCombiner &dc)
807  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
808 
809  void NodeDeleted(SDNode *N, SDNode *E) override {
810  DC.removeFromWorklist(N);
811  }
812 };
813 
814 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
815  DAGCombiner &DC;
816 
817 public:
818  explicit WorklistInserter(DAGCombiner &dc)
819  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
820 
821  // FIXME: Ideally we could add N to the worklist, but this causes exponential
822  // compile time costs in large DAGs, e.g. Halide.
823  void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
824 };
825 
826 } // end anonymous namespace
827 
828 //===----------------------------------------------------------------------===//
829 // TargetLowering::DAGCombinerInfo implementation
830 //===----------------------------------------------------------------------===//
831 
833  ((DAGCombiner*)DC)->AddToWorklist(N);
834 }
835 
837 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
838  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
839 }
840 
842 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
843  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
844 }
845 
847 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
848  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
849 }
850 
853  return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
854 }
855 
858  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
859 }
860 
861 //===----------------------------------------------------------------------===//
862 // Helper Functions
863 //===----------------------------------------------------------------------===//
864 
865 void DAGCombiner::deleteAndRecombine(SDNode *N) {
867 
868  // If the operands of this node are only used by the node, they will now be
869  // dead. Make sure to re-visit them and recursively delete dead nodes.
870  for (const SDValue &Op : N->ops())
871  // For an operand generating multiple values, one of the values may
872  // become dead allowing further simplification (e.g. split index
873  // arithmetic from an indexed load).
874  if (Op->hasOneUse() || Op->getNumValues() > 1)
875  AddToWorklist(Op.getNode());
876 
877  DAG.DeleteNode(N);
878 }
879 
880 // APInts must be the same size for most operations, this helper
881 // function zero extends the shorter of the pair so that they match.
882 // We provide an Offset so that we can create bitwidths that won't overflow.
883 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
884  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
885  LHS = LHS.zextOrSelf(Bits);
886  RHS = RHS.zextOrSelf(Bits);
887 }
888 
889 // Return true if this node is a setcc, or is a select_cc
890 // that selects between the target values used for true and false, making it
891 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
892 // the appropriate nodes based on the type of node we are checking. This
893 // simplifies life a bit for the callers.
894 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
895  SDValue &CC, bool MatchStrict) const {
896  if (N.getOpcode() == ISD::SETCC) {
897  LHS = N.getOperand(0);
898  RHS = N.getOperand(1);
899  CC = N.getOperand(2);
900  return true;
901  }
902 
903  if (MatchStrict &&
904  (N.getOpcode() == ISD::STRICT_FSETCC ||
905  N.getOpcode() == ISD::STRICT_FSETCCS)) {
906  LHS = N.getOperand(1);
907  RHS = N.getOperand(2);
908  CC = N.getOperand(3);
909  return true;
910  }
911 
912  if (N.getOpcode() != ISD::SELECT_CC ||
913  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
914  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
915  return false;
916 
917  if (TLI.getBooleanContents(N.getValueType()) ==
919  return false;
920 
921  LHS = N.getOperand(0);
922  RHS = N.getOperand(1);
923  CC = N.getOperand(4);
924  return true;
925 }
926 
927 /// Return true if this is a SetCC-equivalent operation with only one use.
928 /// If this is true, it allows the users to invert the operation for free when
929 /// it is profitable to do so.
930 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
931  SDValue N0, N1, N2;
932  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
933  return true;
934  return false;
935 }
936 
937 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
938  if (!ScalarTy.isSimple())
939  return false;
940 
941  uint64_t MaskForTy = 0ULL;
942  switch (ScalarTy.getSimpleVT().SimpleTy) {
943  case MVT::i8:
944  MaskForTy = 0xFFULL;
945  break;
946  case MVT::i16:
947  MaskForTy = 0xFFFFULL;
948  break;
949  case MVT::i32:
950  MaskForTy = 0xFFFFFFFFULL;
951  break;
952  default:
953  return false;
954  break;
955  }
956 
957  APInt Val;
958  if (ISD::isConstantSplatVector(N, Val))
959  return Val.getLimitedValue() == MaskForTy;
960 
961  return false;
962 }
963 
964 // Determines if it is a constant integer or a splat/build vector of constant
965 // integers (and undefs).
966 // Do not permit build vector implicit truncation.
967 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
968  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
969  return !(Const->isOpaque() && NoOpaques);
970  if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
971  return false;
972  unsigned BitWidth = N.getScalarValueSizeInBits();
973  for (const SDValue &Op : N->op_values()) {
974  if (Op.isUndef())
975  continue;
976  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
977  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
978  (Const->isOpaque() && NoOpaques))
979  return false;
980  }
981  return true;
982 }
983 
984 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
985 // undef's.
986 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
987  if (V.getOpcode() != ISD::BUILD_VECTOR)
988  return false;
989  return isConstantOrConstantVector(V, NoOpaques) ||
991 }
992 
993 // Determine if this an indexed load with an opaque target constant index.
994 static bool canSplitIdx(LoadSDNode *LD) {
995  return MaySplitLoadIndex &&
996  (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
997  !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
998 }
999 
1000 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1001  const SDLoc &DL,
1002  SDValue N0,
1003  SDValue N1) {
1004  // Currently this only tries to ensure we don't undo the GEP splits done by
1005  // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1006  // we check if the following transformation would be problematic:
1007  // (load/store (add, (add, x, offset1), offset2)) ->
1008  // (load/store (add, x, offset1+offset2)).
1009 
1010  if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1011  return false;
1012 
1013  if (N0.hasOneUse())
1014  return false;
1015 
1016  auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1017  auto *C2 = dyn_cast<ConstantSDNode>(N1);
1018  if (!C1 || !C2)
1019  return false;
1020 
1021  const APInt &C1APIntVal = C1->getAPIntValue();
1022  const APInt &C2APIntVal = C2->getAPIntValue();
1023  if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1024  return false;
1025 
1026  const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1027  if (CombinedValueIntVal.getBitWidth() > 64)
1028  return false;
1029  const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1030 
1031  for (SDNode *Node : N0->uses()) {
1032  auto LoadStore = dyn_cast<MemSDNode>(Node);
1033  if (LoadStore) {
1034  // Is x[offset2] already not a legal addressing mode? If so then
1035  // reassociating the constants breaks nothing (we test offset2 because
1036  // that's the one we hope to fold into the load or store).
1038  AM.HasBaseReg = true;
1039  AM.BaseOffs = C2APIntVal.getSExtValue();
1040  EVT VT = LoadStore->getMemoryVT();
1041  unsigned AS = LoadStore->getAddressSpace();
1042  Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1043  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1044  continue;
1045 
1046  // Would x[offset1+offset2] still be a legal addressing mode?
1047  AM.BaseOffs = CombinedValue;
1048  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1049  return true;
1050  }
1051  }
1052 
1053  return false;
1054 }
1055 
1056 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1057 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1058 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1059  SDValue N0, SDValue N1) {
1060  EVT VT = N0.getValueType();
1061 
1062  if (N0.getOpcode() != Opc)
1063  return SDValue();
1064 
1067  // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1068  if (SDValue OpNode =
1069  DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1070  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1071  return SDValue();
1072  }
1073  if (N0.hasOneUse()) {
1074  // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1075  // iff (op x, c1) has one use
1076  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1077  if (!OpNode.getNode())
1078  return SDValue();
1079  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1080  }
1081  }
1082  return SDValue();
1083 }
1084 
1085 // Try to reassociate commutative binops.
1086 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1087  SDValue N1, SDNodeFlags Flags) {
1088  assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1089 
1090  // Floating-point reassociation is not allowed without loose FP math.
1091  if (N0.getValueType().isFloatingPoint() ||
1093  if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1094  return SDValue();
1095 
1096  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1097  return Combined;
1098  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1099  return Combined;
1100  return SDValue();
1101 }
1102 
1103 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1104  bool AddTo) {
1105  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1106  ++NodesCombined;
1107  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1108  To[0].getNode()->dump(&DAG);
1109  dbgs() << " and " << NumTo - 1 << " other values\n");
1110  for (unsigned i = 0, e = NumTo; i != e; ++i)
1111  assert((!To[i].getNode() ||
1112  N->getValueType(i) == To[i].getValueType()) &&
1113  "Cannot combine value to value of different type!");
1114 
1115  WorklistRemover DeadNodes(*this);
1116  DAG.ReplaceAllUsesWith(N, To);
1117  if (AddTo) {
1118  // Push the new nodes and any users onto the worklist
1119  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1120  if (To[i].getNode()) {
1121  AddToWorklist(To[i].getNode());
1122  AddUsersToWorklist(To[i].getNode());
1123  }
1124  }
1125  }
1126 
1127  // Finally, if the node is now dead, remove it from the graph. The node
1128  // may not be dead if the replacement process recursively simplified to
1129  // something else needing this node.
1130  if (N->use_empty())
1131  deleteAndRecombine(N);
1132  return SDValue(N, 0);
1133 }
1134 
1135 void DAGCombiner::
1136 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1137  // Replace the old value with the new one.
1138  ++NodesCombined;
1139  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1140  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1141  dbgs() << '\n');
1142 
1143  // Replace all uses. If any nodes become isomorphic to other nodes and
1144  // are deleted, make sure to remove them from our worklist.
1145  WorklistRemover DeadNodes(*this);
1146  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1147 
1148  // Push the new node and any (possibly new) users onto the worklist.
1149  AddToWorklistWithUsers(TLO.New.getNode());
1150 
1151  // Finally, if the node is now dead, remove it from the graph. The node
1152  // may not be dead if the replacement process recursively simplified to
1153  // something else needing this node.
1154  if (TLO.Old.getNode()->use_empty())
1155  deleteAndRecombine(TLO.Old.getNode());
1156 }
1157 
1158 /// Check the specified integer node value to see if it can be simplified or if
1159 /// things it uses can be simplified by bit propagation. If so, return true.
1160 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1161  const APInt &DemandedElts,
1162  bool AssumeSingleUse) {
1163  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1164  KnownBits Known;
1165  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1166  AssumeSingleUse))
1167  return false;
1168 
1169  // Revisit the node.
1170  AddToWorklist(Op.getNode());
1171 
1172  CommitTargetLoweringOpt(TLO);
1173  return true;
1174 }
1175 
1176 /// Check the specified vector node value to see if it can be simplified or
1177 /// if things it uses can be simplified as it only uses some of the elements.
1178 /// If so, return true.
1179 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1180  const APInt &DemandedElts,
1181  bool AssumeSingleUse) {
1182  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1183  APInt KnownUndef, KnownZero;
1184  if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1185  TLO, 0, AssumeSingleUse))
1186  return false;
1187 
1188  // Revisit the node.
1189  AddToWorklist(Op.getNode());
1190 
1191  CommitTargetLoweringOpt(TLO);
1192  return true;
1193 }
1194 
1195 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1196  SDLoc DL(Load);
1197  EVT VT = Load->getValueType(0);
1198  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1199 
1200  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1201  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1202  WorklistRemover DeadNodes(*this);
1203  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1204  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1205  deleteAndRecombine(Load);
1206  AddToWorklist(Trunc.getNode());
1207 }
1208 
1209 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1210  Replace = false;
1211  SDLoc DL(Op);
1212  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1213  LoadSDNode *LD = cast<LoadSDNode>(Op);
1214  EVT MemVT = LD->getMemoryVT();
1216  : LD->getExtensionType();
1217  Replace = true;
1218  return DAG.getExtLoad(ExtType, DL, PVT,
1219  LD->getChain(), LD->getBasePtr(),
1220  MemVT, LD->getMemOperand());
1221  }
1222 
1223  unsigned Opc = Op.getOpcode();
1224  switch (Opc) {
1225  default: break;
1226  case ISD::AssertSext:
1227  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1228  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1229  break;
1230  case ISD::AssertZext:
1231  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1232  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1233  break;
1234  case ISD::Constant: {
1235  unsigned ExtOpc =
1236  Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1237  return DAG.getNode(ExtOpc, DL, PVT, Op);
1238  }
1239  }
1240 
1241  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1242  return SDValue();
1243  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1244 }
1245 
1246 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1248  return SDValue();
1249  EVT OldVT = Op.getValueType();
1250  SDLoc DL(Op);
1251  bool Replace = false;
1252  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1253  if (!NewOp.getNode())
1254  return SDValue();
1255  AddToWorklist(NewOp.getNode());
1256 
1257  if (Replace)
1258  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1259  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1260  DAG.getValueType(OldVT));
1261 }
1262 
1263 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1264  EVT OldVT = Op.getValueType();
1265  SDLoc DL(Op);
1266  bool Replace = false;
1267  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1268  if (!NewOp.getNode())
1269  return SDValue();
1270  AddToWorklist(NewOp.getNode());
1271 
1272  if (Replace)
1273  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1274  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1275 }
1276 
1277 /// Promote the specified integer binary operation if the target indicates it is
1278 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1279 /// i32 since i16 instructions are longer.
1280 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1281  if (!LegalOperations)
1282  return SDValue();
1283 
1284  EVT VT = Op.getValueType();
1285  if (VT.isVector() || !VT.isInteger())
1286  return SDValue();
1287 
1288  // If operation type is 'undesirable', e.g. i16 on x86, consider
1289  // promoting it.
1290  unsigned Opc = Op.getOpcode();
1291  if (TLI.isTypeDesirableForOp(Opc, VT))
1292  return SDValue();
1293 
1294  EVT PVT = VT;
1295  // Consult target whether it is a good idea to promote this operation and
1296  // what's the right type to promote it to.
1297  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1298  assert(PVT != VT && "Don't know what type to promote to!");
1299 
1300  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1301 
1302  bool Replace0 = false;
1303  SDValue N0 = Op.getOperand(0);
1304  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1305 
1306  bool Replace1 = false;
1307  SDValue N1 = Op.getOperand(1);
1308  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1309  SDLoc DL(Op);
1310 
1311  SDValue RV =
1312  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1313 
1314  // We are always replacing N0/N1's use in N and only need additional
1315  // replacements if there are additional uses.
1316  // Note: We are checking uses of the *nodes* (SDNode) rather than values
1317  // (SDValue) here because the node may reference multiple values
1318  // (for example, the chain value of a load node).
1319  Replace0 &= !N0->hasOneUse();
1320  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1321 
1322  // Combine Op here so it is preserved past replacements.
1323  CombineTo(Op.getNode(), RV);
1324 
1325  // If operands have a use ordering, make sure we deal with
1326  // predecessor first.
1327  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1328  std::swap(N0, N1);
1329  std::swap(NN0, NN1);
1330  }
1331 
1332  if (Replace0) {
1333  AddToWorklist(NN0.getNode());
1334  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1335  }
1336  if (Replace1) {
1337  AddToWorklist(NN1.getNode());
1338  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1339  }
1340  return Op;
1341  }
1342  return SDValue();
1343 }
1344 
1345 /// Promote the specified integer shift operation if the target indicates it is
1346 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1347 /// i32 since i16 instructions are longer.
1348 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1349  if (!LegalOperations)
1350  return SDValue();
1351 
1352  EVT VT = Op.getValueType();
1353  if (VT.isVector() || !VT.isInteger())
1354  return SDValue();
1355 
1356  // If operation type is 'undesirable', e.g. i16 on x86, consider
1357  // promoting it.
1358  unsigned Opc = Op.getOpcode();
1359  if (TLI.isTypeDesirableForOp(Opc, VT))
1360  return SDValue();
1361 
1362  EVT PVT = VT;
1363  // Consult target whether it is a good idea to promote this operation and
1364  // what's the right type to promote it to.
1365  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1366  assert(PVT != VT && "Don't know what type to promote to!");
1367 
1368  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1369 
1370  bool Replace = false;
1371  SDValue N0 = Op.getOperand(0);
1372  SDValue N1 = Op.getOperand(1);
1373  if (Opc == ISD::SRA)
1374  N0 = SExtPromoteOperand(N0, PVT);
1375  else if (Opc == ISD::SRL)
1376  N0 = ZExtPromoteOperand(N0, PVT);
1377  else
1378  N0 = PromoteOperand(N0, PVT, Replace);
1379 
1380  if (!N0.getNode())
1381  return SDValue();
1382 
1383  SDLoc DL(Op);
1384  SDValue RV =
1385  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1386 
1387  if (Replace)
1388  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1389 
1390  // Deal with Op being deleted.
1391  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1392  return RV;
1393  }
1394  return SDValue();
1395 }
1396 
1397 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1398  if (!LegalOperations)
1399  return SDValue();
1400 
1401  EVT VT = Op.getValueType();
1402  if (VT.isVector() || !VT.isInteger())
1403  return SDValue();
1404 
1405  // If operation type is 'undesirable', e.g. i16 on x86, consider
1406  // promoting it.
1407  unsigned Opc = Op.getOpcode();
1408  if (TLI.isTypeDesirableForOp(Opc, VT))
1409  return SDValue();
1410 
1411  EVT PVT = VT;
1412  // Consult target whether it is a good idea to promote this operation and
1413  // what's the right type to promote it to.
1414  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1415  assert(PVT != VT && "Don't know what type to promote to!");
1416  // fold (aext (aext x)) -> (aext x)
1417  // fold (aext (zext x)) -> (zext x)
1418  // fold (aext (sext x)) -> (sext x)
1419  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1420  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1421  }
1422  return SDValue();
1423 }
1424 
1425 bool DAGCombiner::PromoteLoad(SDValue Op) {
1426  if (!LegalOperations)
1427  return false;
1428 
1429  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1430  return false;
1431 
1432  EVT VT = Op.getValueType();
1433  if (VT.isVector() || !VT.isInteger())
1434  return false;
1435 
1436  // If operation type is 'undesirable', e.g. i16 on x86, consider
1437  // promoting it.
1438  unsigned Opc = Op.getOpcode();
1439  if (TLI.isTypeDesirableForOp(Opc, VT))
1440  return false;
1441 
1442  EVT PVT = VT;
1443  // Consult target whether it is a good idea to promote this operation and
1444  // what's the right type to promote it to.
1445  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1446  assert(PVT != VT && "Don't know what type to promote to!");
1447 
1448  SDLoc DL(Op);
1449  SDNode *N = Op.getNode();
1450  LoadSDNode *LD = cast<LoadSDNode>(N);
1451  EVT MemVT = LD->getMemoryVT();
1453  : LD->getExtensionType();
1454  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1455  LD->getChain(), LD->getBasePtr(),
1456  MemVT, LD->getMemOperand());
1457  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1458 
1459  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1460  Result.getNode()->dump(&DAG); dbgs() << '\n');
1461  WorklistRemover DeadNodes(*this);
1462  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1463  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1464  deleteAndRecombine(N);
1465  AddToWorklist(Result.getNode());
1466  return true;
1467  }
1468  return false;
1469 }
1470 
1471 /// Recursively delete a node which has no uses and any operands for
1472 /// which it is the only use.
1473 ///
1474 /// Note that this both deletes the nodes and removes them from the worklist.
1475 /// It also adds any nodes who have had a user deleted to the worklist as they
1476 /// may now have only one use and subject to other combines.
1477 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1478  if (!N->use_empty())
1479  return false;
1480 
1482  Nodes.insert(N);
1483  do {
1484  N = Nodes.pop_back_val();
1485  if (!N)
1486  continue;
1487 
1488  if (N->use_empty()) {
1489  for (const SDValue &ChildN : N->op_values())
1490  Nodes.insert(ChildN.getNode());
1491 
1493  DAG.DeleteNode(N);
1494  } else {
1495  AddToWorklist(N);
1496  }
1497  } while (!Nodes.empty());
1498  return true;
1499 }
1500 
1501 //===----------------------------------------------------------------------===//
1502 // Main DAG Combiner implementation
1503 //===----------------------------------------------------------------------===//
1504 
1505 void DAGCombiner::Run(CombineLevel AtLevel) {
1506  // set the instance variables, so that the various visit routines may use it.
1507  Level = AtLevel;
1508  LegalDAG = Level >= AfterLegalizeDAG;
1509  LegalOperations = Level >= AfterLegalizeVectorOps;
1510  LegalTypes = Level >= AfterLegalizeTypes;
1511 
1512  WorklistInserter AddNodes(*this);
1513 
1514  // Add all the dag nodes to the worklist.
1515  for (SDNode &Node : DAG.allnodes())
1516  AddToWorklist(&Node);
1517 
1518  // Create a dummy node (which is not added to allnodes), that adds a reference
1519  // to the root node, preventing it from being deleted, and tracking any
1520  // changes of the root.
1521  HandleSDNode Dummy(DAG.getRoot());
1522 
1523  // While we have a valid worklist entry node, try to combine it.
1524  while (SDNode *N = getNextWorklistEntry()) {
1525  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1526  // N is deleted from the DAG, since they too may now be dead or may have a
1527  // reduced number of uses, allowing other xforms.
1528  if (recursivelyDeleteUnusedNodes(N))
1529  continue;
1530 
1531  WorklistRemover DeadNodes(*this);
1532 
1533  // If this combine is running after legalizing the DAG, re-legalize any
1534  // nodes pulled off the worklist.
1535  if (LegalDAG) {
1536  SmallSetVector<SDNode *, 16> UpdatedNodes;
1537  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1538 
1539  for (SDNode *LN : UpdatedNodes)
1540  AddToWorklistWithUsers(LN);
1541 
1542  if (!NIsValid)
1543  continue;
1544  }
1545 
1546  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1547 
1548  // Add any operands of the new node which have not yet been combined to the
1549  // worklist as well. Because the worklist uniques things already, this
1550  // won't repeatedly process the same operand.
1551  CombinedNodes.insert(N);
1552  for (const SDValue &ChildN : N->op_values())
1553  if (!CombinedNodes.count(ChildN.getNode()))
1554  AddToWorklist(ChildN.getNode());
1555 
1556  SDValue RV = combine(N);
1557 
1558  if (!RV.getNode())
1559  continue;
1560 
1561  ++NodesCombined;
1562 
1563  // If we get back the same node we passed in, rather than a new node or
1564  // zero, we know that the node must have defined multiple values and
1565  // CombineTo was used. Since CombineTo takes care of the worklist
1566  // mechanics for us, we have no work to do in this case.
1567  if (RV.getNode() == N)
1568  continue;
1569 
1570  assert(N->getOpcode() != ISD::DELETED_NODE &&
1571  RV.getOpcode() != ISD::DELETED_NODE &&
1572  "Node was deleted but visit returned new node!");
1573 
1574  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1575 
1576  if (N->getNumValues() == RV.getNode()->getNumValues())
1577  DAG.ReplaceAllUsesWith(N, RV.getNode());
1578  else {
1579  assert(N->getValueType(0) == RV.getValueType() &&
1580  N->getNumValues() == 1 && "Type mismatch");
1581  DAG.ReplaceAllUsesWith(N, &RV);
1582  }
1583 
1584  // Push the new node and any users onto the worklist. Omit this if the
1585  // new node is the EntryToken (e.g. if a store managed to get optimized
1586  // out), because re-visiting the EntryToken and its users will not uncover
1587  // any additional opportunities, but there may be a large number of such
1588  // users, potentially causing compile time explosion.
1589  if (RV.getOpcode() != ISD::EntryToken) {
1590  AddToWorklist(RV.getNode());
1591  AddUsersToWorklist(RV.getNode());
1592  }
1593 
1594  // Finally, if the node is now dead, remove it from the graph. The node
1595  // may not be dead if the replacement process recursively simplified to
1596  // something else needing this node. This will also take care of adding any
1597  // operands which have lost a user to the worklist.
1598  recursivelyDeleteUnusedNodes(N);
1599  }
1600 
1601  // If the root changed (e.g. it was a dead load, update the root).
1602  DAG.setRoot(Dummy.getValue());
1603  DAG.RemoveDeadNodes();
1604 }
1605 
1606 SDValue DAGCombiner::visit(SDNode *N) {
1607  switch (N->getOpcode()) {
1608  default: break;
1609  case ISD::TokenFactor: return visitTokenFactor(N);
1610  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1611  case ISD::ADD: return visitADD(N);
1612  case ISD::SUB: return visitSUB(N);
1613  case ISD::SADDSAT:
1614  case ISD::UADDSAT: return visitADDSAT(N);
1615  case ISD::SSUBSAT:
1616  case ISD::USUBSAT: return visitSUBSAT(N);
1617  case ISD::ADDC: return visitADDC(N);
1618  case ISD::SADDO:
1619  case ISD::UADDO: return visitADDO(N);
1620  case ISD::SUBC: return visitSUBC(N);
1621  case ISD::SSUBO:
1622  case ISD::USUBO: return visitSUBO(N);
1623  case ISD::ADDE: return visitADDE(N);
1624  case ISD::ADDCARRY: return visitADDCARRY(N);
1625  case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1626  case ISD::SUBE: return visitSUBE(N);
1627  case ISD::SUBCARRY: return visitSUBCARRY(N);
1628  case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1629  case ISD::SMULFIX:
1630  case ISD::SMULFIXSAT:
1631  case ISD::UMULFIX:
1632  case ISD::UMULFIXSAT: return visitMULFIX(N);
1633  case ISD::MUL: return visitMUL(N);
1634  case ISD::SDIV: return visitSDIV(N);
1635  case ISD::UDIV: return visitUDIV(N);
1636  case ISD::SREM:
1637  case ISD::UREM: return visitREM(N);
1638  case ISD::MULHU: return visitMULHU(N);
1639  case ISD::MULHS: return visitMULHS(N);
1640  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1641  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1642  case ISD::SMULO:
1643  case ISD::UMULO: return visitMULO(N);
1644  case ISD::SMIN:
1645  case ISD::SMAX:
1646  case ISD::UMIN:
1647  case ISD::UMAX: return visitIMINMAX(N);
1648  case ISD::AND: return visitAND(N);
1649  case ISD::OR: return visitOR(N);
1650  case ISD::XOR: return visitXOR(N);
1651  case ISD::SHL: return visitSHL(N);
1652  case ISD::SRA: return visitSRA(N);
1653  case ISD::SRL: return visitSRL(N);
1654  case ISD::ROTR:
1655  case ISD::ROTL: return visitRotate(N);
1656  case ISD::FSHL:
1657  case ISD::FSHR: return visitFunnelShift(N);
1658  case ISD::ABS: return visitABS(N);
1659  case ISD::BSWAP: return visitBSWAP(N);
1660  case ISD::BITREVERSE: return visitBITREVERSE(N);
1661  case ISD::CTLZ: return visitCTLZ(N);
1662  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1663  case ISD::CTTZ: return visitCTTZ(N);
1664  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1665  case ISD::CTPOP: return visitCTPOP(N);
1666  case ISD::SELECT: return visitSELECT(N);
1667  case ISD::VSELECT: return visitVSELECT(N);
1668  case ISD::SELECT_CC: return visitSELECT_CC(N);
1669  case ISD::SETCC: return visitSETCC(N);
1670  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1671  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1672  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1673  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1674  case ISD::AssertSext:
1675  case ISD::AssertZext: return visitAssertExt(N);
1676  case ISD::AssertAlign: return visitAssertAlign(N);
1677  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1679  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1680  case ISD::TRUNCATE: return visitTRUNCATE(N);
1681  case ISD::BITCAST: return visitBITCAST(N);
1682  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1683  case ISD::FADD: return visitFADD(N);
1684  case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1685  case ISD::FSUB: return visitFSUB(N);
1686  case ISD::FMUL: return visitFMUL(N);
1687  case ISD::FMA: return visitFMA(N);
1688  case ISD::FDIV: return visitFDIV(N);
1689  case ISD::FREM: return visitFREM(N);
1690  case ISD::FSQRT: return visitFSQRT(N);
1691  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1692  case ISD::FPOW: return visitFPOW(N);
1693  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1694  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1695  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1696  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1697  case ISD::FP_ROUND: return visitFP_ROUND(N);
1698  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1699  case ISD::FNEG: return visitFNEG(N);
1700  case ISD::FABS: return visitFABS(N);
1701  case ISD::FFLOOR: return visitFFLOOR(N);
1702  case ISD::FMINNUM: return visitFMINNUM(N);
1703  case ISD::FMAXNUM: return visitFMAXNUM(N);
1704  case ISD::FMINIMUM: return visitFMINIMUM(N);
1705  case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1706  case ISD::FCEIL: return visitFCEIL(N);
1707  case ISD::FTRUNC: return visitFTRUNC(N);
1708  case ISD::BRCOND: return visitBRCOND(N);
1709  case ISD::BR_CC: return visitBR_CC(N);
1710  case ISD::LOAD: return visitLOAD(N);
1711  case ISD::STORE: return visitSTORE(N);
1712  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1713  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1714  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1715  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1716  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1717  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1718  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1719  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1720  case ISD::MGATHER: return visitMGATHER(N);
1721  case ISD::MLOAD: return visitMLOAD(N);
1722  case ISD::MSCATTER: return visitMSCATTER(N);
1723  case ISD::MSTORE: return visitMSTORE(N);
1724  case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1725  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1726  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1727  case ISD::FREEZE: return visitFREEZE(N);
1728  case ISD::VECREDUCE_FADD:
1729  case ISD::VECREDUCE_FMUL:
1730  case ISD::VECREDUCE_ADD:
1731  case ISD::VECREDUCE_MUL:
1732  case ISD::VECREDUCE_AND:
1733  case ISD::VECREDUCE_OR:
1734  case ISD::VECREDUCE_XOR:
1735  case ISD::VECREDUCE_SMAX:
1736  case ISD::VECREDUCE_SMIN:
1737  case ISD::VECREDUCE_UMAX:
1738  case ISD::VECREDUCE_UMIN:
1739  case ISD::VECREDUCE_FMAX:
1740  case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1741  }
1742  return SDValue();
1743 }
1744 
1746  SDValue RV;
1747  if (!DisableGenericCombines)
1748  RV = visit(N);
1749 
1750  // If nothing happened, try a target-specific DAG combine.
1751  if (!RV.getNode()) {
1752  assert(N->getOpcode() != ISD::DELETED_NODE &&
1753  "Node was deleted but visit returned NULL!");
1754 
1755  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1756  TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1757 
1758  // Expose the DAG combiner to the target combiner impls.
1760  DagCombineInfo(DAG, Level, false, this);
1761 
1762  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1763  }
1764  }
1765 
1766  // If nothing happened still, try promoting the operation.
1767  if (!RV.getNode()) {
1768  switch (N->getOpcode()) {
1769  default: break;
1770  case ISD::ADD:
1771  case ISD::SUB:
1772  case ISD::MUL:
1773  case ISD::AND:
1774  case ISD::OR:
1775  case ISD::XOR:
1776  RV = PromoteIntBinOp(SDValue(N, 0));
1777  break;
1778  case ISD::SHL:
1779  case ISD::SRA:
1780  case ISD::SRL:
1781  RV = PromoteIntShiftOp(SDValue(N, 0));
1782  break;
1783  case ISD::SIGN_EXTEND:
1784  case ISD::ZERO_EXTEND:
1785  case ISD::ANY_EXTEND:
1786  RV = PromoteExtend(SDValue(N, 0));
1787  break;
1788  case ISD::LOAD:
1789  if (PromoteLoad(SDValue(N, 0)))
1790  RV = SDValue(N, 0);
1791  break;
1792  }
1793  }
1794 
1795  // If N is a commutative binary node, try to eliminate it if the commuted
1796  // version is already present in the DAG.
1797  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1798  N->getNumValues() == 1) {
1799  SDValue N0 = N->getOperand(0);
1800  SDValue N1 = N->getOperand(1);
1801 
1802  // Constant operands are canonicalized to RHS.
1803  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1804  SDValue Ops[] = {N1, N0};
1805  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1806  N->getFlags());
1807  if (CSENode)
1808  return SDValue(CSENode, 0);
1809  }
1810  }
1811 
1812  return RV;
1813 }
1814 
1815 /// Given a node, return its input chain if it has one, otherwise return a null
1816 /// sd operand.
1818  if (unsigned NumOps = N->getNumOperands()) {
1819  if (N->getOperand(0).getValueType() == MVT::Other)
1820  return N->getOperand(0);
1821  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1822  return N->getOperand(NumOps-1);
1823  for (unsigned i = 1; i < NumOps-1; ++i)
1824  if (N->getOperand(i).getValueType() == MVT::Other)
1825  return N->getOperand(i);
1826  }
1827  return SDValue();
1828 }
1829 
1830 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1831  // If N has two operands, where one has an input chain equal to the other,
1832  // the 'other' chain is redundant.
1833  if (N->getNumOperands() == 2) {
1834  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1835  return N->getOperand(0);
1836  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1837  return N->getOperand(1);
1838  }
1839 
1840  // Don't simplify token factors if optnone.
1841  if (OptLevel == CodeGenOpt::None)
1842  return SDValue();
1843 
1844  // Don't simplify the token factor if the node itself has too many operands.
1845  if (N->getNumOperands() > TokenFactorInlineLimit)
1846  return SDValue();
1847 
1848  // If the sole user is a token factor, we should make sure we have a
1849  // chance to merge them together. This prevents TF chains from inhibiting
1850  // optimizations.
1851  if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1852  AddToWorklist(*(N->use_begin()));
1853 
1854  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1855  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1856  SmallPtrSet<SDNode*, 16> SeenOps;
1857  bool Changed = false; // If we should replace this token factor.
1858 
1859  // Start out with this token factor.
1860  TFs.push_back(N);
1861 
1862  // Iterate through token factors. The TFs grows when new token factors are
1863  // encountered.
1864  for (unsigned i = 0; i < TFs.size(); ++i) {
1865  // Limit number of nodes to inline, to avoid quadratic compile times.
1866  // We have to add the outstanding Token Factors to Ops, otherwise we might
1867  // drop Ops from the resulting Token Factors.
1868  if (Ops.size() > TokenFactorInlineLimit) {
1869  for (unsigned j = i; j < TFs.size(); j++)
1870  Ops.emplace_back(TFs[j], 0);
1871  // Drop unprocessed Token Factors from TFs, so we do not add them to the
1872  // combiner worklist later.
1873  TFs.resize(i);
1874  break;
1875  }
1876 
1877  SDNode *TF = TFs[i];
1878  // Check each of the operands.
1879  for (const SDValue &Op : TF->op_values()) {
1880  switch (Op.getOpcode()) {
1881  case ISD::EntryToken:
1882  // Entry tokens don't need to be added to the list. They are
1883  // redundant.
1884  Changed = true;
1885  break;
1886 
1887  case ISD::TokenFactor:
1888  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1889  // Queue up for processing.
1890  TFs.push_back(Op.getNode());
1891  Changed = true;
1892  break;
1893  }
1895 
1896  default:
1897  // Only add if it isn't already in the list.
1898  if (SeenOps.insert(Op.getNode()).second)
1899  Ops.push_back(Op);
1900  else
1901  Changed = true;
1902  break;
1903  }
1904  }
1905  }
1906 
1907  // Re-visit inlined Token Factors, to clean them up in case they have been
1908  // removed. Skip the first Token Factor, as this is the current node.
1909  for (unsigned i = 1, e = TFs.size(); i < e; i++)
1910  AddToWorklist(TFs[i]);
1911 
1912  // Remove Nodes that are chained to another node in the list. Do so
1913  // by walking up chains breath-first stopping when we've seen
1914  // another operand. In general we must climb to the EntryNode, but we can exit
1915  // early if we find all remaining work is associated with just one operand as
1916  // no further pruning is possible.
1917 
1918  // List of nodes to search through and original Ops from which they originate.
1920  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1921  SmallPtrSet<SDNode *, 16> SeenChains;
1922  bool DidPruneOps = false;
1923 
1924  unsigned NumLeftToConsider = 0;
1925  for (const SDValue &Op : Ops) {
1926  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1927  OpWorkCount.push_back(1);
1928  }
1929 
1930  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1931  // If this is an Op, we can remove the op from the list. Remark any
1932  // search associated with it as from the current OpNumber.
1933  if (SeenOps.contains(Op)) {
1934  Changed = true;
1935  DidPruneOps = true;
1936  unsigned OrigOpNumber = 0;
1937  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1938  OrigOpNumber++;
1939  assert((OrigOpNumber != Ops.size()) &&
1940  "expected to find TokenFactor Operand");
1941  // Re-mark worklist from OrigOpNumber to OpNumber
1942  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1943  if (Worklist[i].second == OrigOpNumber) {
1944  Worklist[i].second = OpNumber;
1945  }
1946  }
1947  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1948  OpWorkCount[OrigOpNumber] = 0;
1949  NumLeftToConsider--;
1950  }
1951  // Add if it's a new chain
1952  if (SeenChains.insert(Op).second) {
1953  OpWorkCount[OpNumber]++;
1954  Worklist.push_back(std::make_pair(Op, OpNumber));
1955  }
1956  };
1957 
1958  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1959  // We need at least be consider at least 2 Ops to prune.
1960  if (NumLeftToConsider <= 1)
1961  break;
1962  auto CurNode = Worklist[i].first;
1963  auto CurOpNumber = Worklist[i].second;
1964  assert((OpWorkCount[CurOpNumber] > 0) &&
1965  "Node should not appear in worklist");
1966  switch (CurNode->getOpcode()) {
1967  case ISD::EntryToken:
1968  // Hitting EntryToken is the only way for the search to terminate without
1969  // hitting
1970  // another operand's search. Prevent us from marking this operand
1971  // considered.
1972  NumLeftToConsider++;
1973  break;
1974  case ISD::TokenFactor:
1975  for (const SDValue &Op : CurNode->op_values())
1976  AddToWorklist(i, Op.getNode(), CurOpNumber);
1977  break;
1978  case ISD::LIFETIME_START:
1979  case ISD::LIFETIME_END:
1980  case ISD::CopyFromReg:
1981  case ISD::CopyToReg:
1982  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1983  break;
1984  default:
1985  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1986  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1987  break;
1988  }
1989  OpWorkCount[CurOpNumber]--;
1990  if (OpWorkCount[CurOpNumber] == 0)
1991  NumLeftToConsider--;
1992  }
1993 
1994  // If we've changed things around then replace token factor.
1995  if (Changed) {
1996  SDValue Result;
1997  if (Ops.empty()) {
1998  // The entry token is the only possible outcome.
1999  Result = DAG.getEntryNode();
2000  } else {
2001  if (DidPruneOps) {
2002  SmallVector<SDValue, 8> PrunedOps;
2003  //
2004  for (const SDValue &Op : Ops) {
2005  if (SeenChains.count(Op.getNode()) == 0)
2006  PrunedOps.push_back(Op);
2007  }
2008  Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2009  } else {
2010  Result = DAG.getTokenFactor(SDLoc(N), Ops);
2011  }
2012  }
2013  return Result;
2014  }
2015  return SDValue();
2016 }
2017 
2018 /// MERGE_VALUES can always be eliminated.
2019 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2020  WorklistRemover DeadNodes(*this);
2021  // Replacing results may cause a different MERGE_VALUES to suddenly
2022  // be CSE'd with N, and carry its uses with it. Iterate until no
2023  // uses remain, to ensure that the node can be safely deleted.
2024  // First add the users of this node to the work list so that they
2025  // can be tried again once they have new operands.
2026  AddUsersToWorklist(N);
2027  do {
2028  // Do as a single replacement to avoid rewalking use lists.
2030  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2031  Ops.push_back(N->getOperand(i));
2032  DAG.ReplaceAllUsesWith(N, Ops.data());
2033  } while (!N->use_empty());
2034  deleteAndRecombine(N);
2035  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2036 }
2037 
2038 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2039 /// ConstantSDNode pointer else nullptr.
2041  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2042  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2043 }
2044 
2045 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2046 /// and that N may be folded in the load / store addressing mode.
2048  const TargetLowering &TLI) {
2049  EVT VT;
2050  unsigned AS;
2051 
2052  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2053  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2054  return false;
2055  VT = LD->getMemoryVT();
2056  AS = LD->getAddressSpace();
2057  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2058  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2059  return false;
2060  VT = ST->getMemoryVT();
2061  AS = ST->getAddressSpace();
2062  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2063  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2064  return false;
2065  VT = LD->getMemoryVT();
2066  AS = LD->getAddressSpace();
2067  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2068  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2069  return false;
2070  VT = ST->getMemoryVT();
2071  AS = ST->getAddressSpace();
2072  } else
2073  return false;
2074 
2076  if (N->getOpcode() == ISD::ADD) {
2077  AM.HasBaseReg = true;
2078  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2079  if (Offset)
2080  // [reg +/- imm]
2081  AM.BaseOffs = Offset->getSExtValue();
2082  else
2083  // [reg +/- reg]
2084  AM.Scale = 1;
2085  } else if (N->getOpcode() == ISD::SUB) {
2086  AM.HasBaseReg = true;
2087  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2088  if (Offset)
2089  // [reg +/- imm]
2090  AM.BaseOffs = -Offset->getSExtValue();
2091  else
2092  // [reg +/- reg]
2093  AM.Scale = 1;
2094  } else
2095  return false;
2096 
2097  return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2098  VT.getTypeForEVT(*DAG.getContext()), AS);
2099 }
2100 
2101 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2102  assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2103  "Unexpected binary operator");
2104 
2105  // Don't do this unless the old select is going away. We want to eliminate the
2106  // binary operator, not replace a binop with a select.
2107  // TODO: Handle ISD::SELECT_CC.
2108  unsigned SelOpNo = 0;
2109  SDValue Sel = BO->getOperand(0);
2110  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2111  SelOpNo = 1;
2112  Sel = BO->getOperand(1);
2113  }
2114 
2115  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2116  return SDValue();
2117 
2118  SDValue CT = Sel.getOperand(1);
2119  if (!isConstantOrConstantVector(CT, true) &&
2121  return SDValue();
2122 
2123  SDValue CF = Sel.getOperand(2);
2124  if (!isConstantOrConstantVector(CF, true) &&
2126  return SDValue();
2127 
2128  // Bail out if any constants are opaque because we can't constant fold those.
2129  // The exception is "and" and "or" with either 0 or -1 in which case we can
2130  // propagate non constant operands into select. I.e.:
2131  // and (select Cond, 0, -1), X --> select Cond, 0, X
2132  // or X, (select Cond, -1, 0) --> select Cond, -1, X
2133  auto BinOpcode = BO->getOpcode();
2134  bool CanFoldNonConst =
2135  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2138 
2139  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2140  if (!CanFoldNonConst &&
2141  !isConstantOrConstantVector(CBO, true) &&
2143  return SDValue();
2144 
2145  EVT VT = BO->getValueType(0);
2146 
2147  // We have a select-of-constants followed by a binary operator with a
2148  // constant. Eliminate the binop by pulling the constant math into the select.
2149  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2150  SDLoc DL(Sel);
2151  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2152  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2153  if (!CanFoldNonConst && !NewCT.isUndef() &&
2154  !isConstantOrConstantVector(NewCT, true) &&
2156  return SDValue();
2157 
2158  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2159  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2160  if (!CanFoldNonConst && !NewCF.isUndef() &&
2161  !isConstantOrConstantVector(NewCF, true) &&
2163  return SDValue();
2164 
2165  SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2166  SelectOp->setFlags(BO->getFlags());
2167  return SelectOp;
2168 }
2169 
2171  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2172  "Expecting add or sub");
2173 
2174  // Match a constant operand and a zext operand for the math instruction:
2175  // add Z, C
2176  // sub C, Z
2177  bool IsAdd = N->getOpcode() == ISD::ADD;
2178  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2179  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2180  auto *CN = dyn_cast<ConstantSDNode>(C);
2181  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2182  return SDValue();
2183 
2184  // Match the zext operand as a setcc of a boolean.
2185  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2186  Z.getOperand(0).getValueType() != MVT::i1)
2187  return SDValue();
2188 
2189  // Match the compare as: setcc (X & 1), 0, eq.
2190  SDValue SetCC = Z.getOperand(0);
2191  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2192  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2193  SetCC.getOperand(0).getOpcode() != ISD::AND ||
2194  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2195  return SDValue();
2196 
2197  // We are adding/subtracting a constant and an inverted low bit. Turn that
2198  // into a subtract/add of the low bit with incremented/decremented constant:
2199  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2200  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2201  EVT VT = C.getValueType();
2202  SDLoc DL(N);
2203  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2204  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2205  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2206  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2207 }
2208 
2209 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2210 /// a shift and add with a different constant.
2212  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2213  "Expecting add or sub");
2214 
2215  // We need a constant operand for the add/sub, and the other operand is a
2216  // logical shift right: add (srl), C or sub C, (srl).
2217  bool IsAdd = N->getOpcode() == ISD::ADD;
2218  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2219  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2220  if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2221  ShiftOp.getOpcode() != ISD::SRL)
2222  return SDValue();
2223 
2224  // The shift must be of a 'not' value.
2225  SDValue Not = ShiftOp.getOperand(0);
2226  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2227  return SDValue();
2228 
2229  // The shift must be moving the sign bit to the least-significant-bit.
2230  EVT VT = ShiftOp.getValueType();
2231  SDValue ShAmt = ShiftOp.getOperand(1);
2232  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2233  if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2234  return SDValue();
2235 
2236  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2237  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2238  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2239  SDLoc DL(N);
2240  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2241  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2242  if (SDValue NewC =
2243  DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2244  {ConstantOp, DAG.getConstant(1, DL, VT)}))
2245  return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2246  return SDValue();
2247 }
2248 
2249 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2250 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2251 /// are no common bits set in the operands).
2252 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2253  SDValue N0 = N->getOperand(0);
2254  SDValue N1 = N->getOperand(1);
2255  EVT VT = N0.getValueType();
2256  SDLoc DL(N);
2257 
2258  // fold vector ops
2259  if (VT.isVector()) {
2260  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2261  return FoldedVOp;
2262 
2263  // fold (add x, 0) -> x, vector edition
2265  return N0;
2267  return N1;
2268  }
2269 
2270  // fold (add x, undef) -> undef
2271  if (N0.isUndef())
2272  return N0;
2273 
2274  if (N1.isUndef())
2275  return N1;
2276 
2278  // canonicalize constant to RHS
2280  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2281  // fold (add c1, c2) -> c1+c2
2282  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2283  }
2284 
2285  // fold (add x, 0) -> x
2286  if (isNullConstant(N1))
2287  return N0;
2288 
2289  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2290  // fold ((A-c1)+c2) -> (A+(c2-c1))
2291  if (N0.getOpcode() == ISD::SUB &&
2292  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2293  SDValue Sub =
2294  DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2295  assert(Sub && "Constant folding failed");
2296  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2297  }
2298 
2299  // fold ((c1-A)+c2) -> (c1+c2)-A
2300  if (N0.getOpcode() == ISD::SUB &&
2301  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2302  SDValue Add =
2303  DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2304  assert(Add && "Constant folding failed");
2305  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2306  }
2307 
2308  // add (sext i1 X), 1 -> zext (not i1 X)
2309  // We don't transform this pattern:
2310  // add (zext i1 X), -1 -> sext (not i1 X)
2311  // because most (?) targets generate better code for the zext form.
2312  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2313  isOneOrOneSplat(N1)) {
2314  SDValue X = N0.getOperand(0);
2315  if ((!LegalOperations ||
2316  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2317  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2318  X.getScalarValueSizeInBits() == 1) {
2319  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2320  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2321  }
2322  }
2323 
2324  // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2325  // equivalent to (add x, c0).
2326  if (N0.getOpcode() == ISD::OR &&
2327  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2328  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2329  if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2330  {N1, N0.getOperand(1)}))
2331  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2332  }
2333  }
2334 
2335  if (SDValue NewSel = foldBinOpIntoSelect(N))
2336  return NewSel;
2337 
2338  // reassociate add
2339  if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2340  if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2341  return RADD;
2342 
2343  // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2344  // equivalent to (add x, c).
2345  auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2346  if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
2347  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2348  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2349  return DAG.getNode(ISD::ADD, DL, VT,
2350  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2351  N0.getOperand(1));
2352  }
2353  return SDValue();
2354  };
2355  if (SDValue Add = ReassociateAddOr(N0, N1))
2356  return Add;
2357  if (SDValue Add = ReassociateAddOr(N1, N0))
2358  return Add;
2359  }
2360  // fold ((0-A) + B) -> B-A
2361  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2362  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2363 
2364  // fold (A + (0-B)) -> A-B
2365  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2366  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2367 
2368  // fold (A+(B-A)) -> B
2369  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2370  return N1.getOperand(0);
2371 
2372  // fold ((B-A)+A) -> B
2373  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2374  return N0.getOperand(0);
2375 
2376  // fold ((A-B)+(C-A)) -> (C-B)
2377  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2378  N0.getOperand(0) == N1.getOperand(1))
2379  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2380  N0.getOperand(1));
2381 
2382  // fold ((A-B)+(B-C)) -> (A-C)
2383  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2384  N0.getOperand(1) == N1.getOperand(0))
2385  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2386  N1.getOperand(1));
2387 
2388  // fold (A+(B-(A+C))) to (B-C)
2389  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2390  N0 == N1.getOperand(1).getOperand(0))
2391  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2392  N1.getOperand(1).getOperand(1));
2393 
2394  // fold (A+(B-(C+A))) to (B-C)
2395  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2396  N0 == N1.getOperand(1).getOperand(1))
2397  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2398  N1.getOperand(1).getOperand(0));
2399 
2400  // fold (A+((B-A)+or-C)) to (B+or-C)
2401  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2402  N1.getOperand(0).getOpcode() == ISD::SUB &&
2403  N0 == N1.getOperand(0).getOperand(1))
2404  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2405  N1.getOperand(1));
2406 
2407  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2408  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2409  SDValue N00 = N0.getOperand(0);
2410  SDValue N01 = N0.getOperand(1);
2411  SDValue N10 = N1.getOperand(0);
2412  SDValue N11 = N1.getOperand(1);
2413 
2415  return DAG.getNode(ISD::SUB, DL, VT,
2416  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2417  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2418  }
2419 
2420  // fold (add (umax X, C), -C) --> (usubsat X, C)
2421  if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2422  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2423  return (!Max && !Op) ||
2424  (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2425  };
2426  if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2427  /*AllowUndefs*/ true))
2428  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2429  N0.getOperand(1));
2430  }
2431 
2432  if (SimplifyDemandedBits(SDValue(N, 0)))
2433  return SDValue(N, 0);
2434 
2435  if (isOneOrOneSplat(N1)) {
2436  // fold (add (xor a, -1), 1) -> (sub 0, a)
2437  if (isBitwiseNot(N0))
2438  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2439  N0.getOperand(0));
2440 
2441  // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2442  if (N0.getOpcode() == ISD::ADD) {
2443  SDValue A, Xor;
2444 
2445  if (isBitwiseNot(N0.getOperand(0))) {
2446  A = N0.getOperand(1);
2447  Xor = N0.getOperand(0);
2448  } else if (isBitwiseNot(N0.getOperand(1))) {
2449  A = N0.getOperand(0);
2450  Xor = N0.getOperand(1);
2451  }
2452 
2453  if (Xor)
2454  return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2455  }
2456 
2457  // Look for:
2458  // add (add x, y), 1
2459  // And if the target does not like this form then turn into:
2460  // sub y, (xor x, -1)
2461  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2462  N0.getOpcode() == ISD::ADD) {
2463  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2464  DAG.getAllOnesConstant(DL, VT));
2465  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2466  }
2467  }
2468 
2469  // (x - y) + -1 -> add (xor y, -1), x
2470  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2472  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2473  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2474  }
2475 
2476  if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2477  return Combined;
2478 
2479  if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2480  return Combined;
2481 
2482  return SDValue();
2483 }
2484 
2485 SDValue DAGCombiner::visitADD(SDNode *N) {
2486  SDValue N0 = N->getOperand(0);
2487  SDValue N1 = N->getOperand(1);
2488  EVT VT = N0.getValueType();
2489  SDLoc DL(N);
2490 
2491  if (SDValue Combined = visitADDLike(N))
2492  return Combined;
2493 
2494  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2495  return V;
2496 
2497  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2498  return V;
2499 
2500  // fold (a+b) -> (a|b) iff a and b share no bits.
2501  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2502  DAG.haveNoCommonBitsSet(N0, N1))
2503  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2504 
2505  // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2506  if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2507  const APInt &C0 = N0->getConstantOperandAPInt(0);
2508  const APInt &C1 = N1->getConstantOperandAPInt(0);
2509  return DAG.getVScale(DL, VT, C0 + C1);
2510  }
2511 
2512  // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2513  if ((N0.getOpcode() == ISD::ADD) &&
2514  (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2515  (N1.getOpcode() == ISD::VSCALE)) {
2516  const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2517  const APInt &VS1 = N1->getConstantOperandAPInt(0);
2518  SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2519  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2520  }
2521 
2522  // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2523  if (N0.getOpcode() == ISD::STEP_VECTOR &&
2524  N1.getOpcode() == ISD::STEP_VECTOR) {
2525  const APInt &C0 = N0->getConstantOperandAPInt(0);
2526  const APInt &C1 = N1->getConstantOperandAPInt(0);
2527  APInt NewStep = C0 + C1;
2528  return DAG.getStepVector(DL, VT, NewStep);
2529  }
2530 
2531  // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2532  if ((N0.getOpcode() == ISD::ADD) &&
2533  (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2534  (N1.getOpcode() == ISD::STEP_VECTOR)) {
2535  const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2536  const APInt &SV1 = N1->getConstantOperandAPInt(0);
2537  APInt NewStep = SV0 + SV1;
2538  SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2539  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2540  }
2541 
2542  return SDValue();
2543 }
2544 
2545 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2546  unsigned Opcode = N->getOpcode();
2547  SDValue N0 = N->getOperand(0);
2548  SDValue N1 = N->getOperand(1);
2549  EVT VT = N0.getValueType();
2550  SDLoc DL(N);
2551 
2552  // fold vector ops
2553  if (VT.isVector()) {
2554  // TODO SimplifyVBinOp
2555 
2556  // fold (add_sat x, 0) -> x, vector edition
2558  return N0;
2560  return N1;
2561  }
2562 
2563  // fold (add_sat x, undef) -> -1
2564  if (N0.isUndef() || N1.isUndef())
2565  return DAG.getAllOnesConstant(DL, VT);
2566 
2568  // canonicalize constant to RHS
2570  return DAG.getNode(Opcode, DL, VT, N1, N0);
2571  // fold (add_sat c1, c2) -> c3
2572  return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2573  }
2574 
2575  // fold (add_sat x, 0) -> x
2576  if (isNullConstant(N1))
2577  return N0;
2578 
2579  // If it cannot overflow, transform into an add.
2580  if (Opcode == ISD::UADDSAT)
2581  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2582  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2583 
2584  return SDValue();
2585 }
2586 
2587 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2588  bool Masked = false;
2589 
2590  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2591  while (true) {
2592  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2593  V = V.getOperand(0);
2594  continue;
2595  }
2596 
2597  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2598  Masked = true;
2599  V = V.getOperand(0);
2600  continue;
2601  }
2602 
2603  break;
2604  }
2605 
2606  // If this is not a carry, return.
2607  if (V.getResNo() != 1)
2608  return SDValue();
2609 
2610  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2611  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2612  return SDValue();
2613 
2614  EVT VT = V.getNode()->getValueType(0);
2615  if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2616  return SDValue();
2617 
2618  // If the result is masked, then no matter what kind of bool it is we can
2619  // return. If it isn't, then we need to make sure the bool type is either 0 or
2620  // 1 and not other values.
2621  if (Masked ||
2622  TLI.getBooleanContents(V.getValueType()) ==
2624  return V;
2625 
2626  return SDValue();
2627 }
2628 
2629 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2630 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2631 /// the opcode and bypass the mask operation.
2632 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2633  SelectionDAG &DAG, const SDLoc &DL) {
2634  if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2635  return SDValue();
2636 
2637  EVT VT = N0.getValueType();
2638  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2639  return SDValue();
2640 
2641  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2642  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2643  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2644 }
2645 
2646 /// Helper for doing combines based on N0 and N1 being added to each other.
2647 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2648  SDNode *LocReference) {
2649  EVT VT = N0.getValueType();
2650  SDLoc DL(LocReference);
2651 
2652  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2653  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2655  return DAG.getNode(ISD::SUB, DL, VT, N0,
2656  DAG.getNode(ISD::SHL, DL, VT,
2657  N1.getOperand(0).getOperand(1),
2658  N1.getOperand(1)));
2659 
2660  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2661  return V;
2662 
2663  // Look for:
2664  // add (add x, 1), y
2665  // And if the target does not like this form then turn into:
2666  // sub y, (xor x, -1)
2667  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2668  N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2669  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2670  DAG.getAllOnesConstant(DL, VT));
2671  return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2672  }
2673 
2674  // Hoist one-use subtraction by non-opaque constant:
2675  // (x - C) + y -> (x + y) - C
2676  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2677  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2678  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2679  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2680  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2681  }
2682  // Hoist one-use subtraction from non-opaque constant:
2683  // (C - x) + y -> (y - x) + C
2684  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2685  isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2686  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2687  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2688  }
2689 
2690  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2691  // rather than 'add 0/-1' (the zext should get folded).
2692  // add (sext i1 Y), X --> sub X, (zext i1 Y)
2693  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2694  N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2696  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2697  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2698  }
2699 
2700  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2701  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2702  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2703  if (TN->getVT() == MVT::i1) {
2704  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2705  DAG.getConstant(1, DL, VT));
2706  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2707  }
2708  }
2709 
2710  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2711  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2712  N1.getResNo() == 0)
2713  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2714  N0, N1.getOperand(0), N1.getOperand(2));
2715 
2716  // (add X, Carry) -> (addcarry X, 0, Carry)
2718  if (SDValue Carry = getAsCarry(TLI, N1))
2719  return DAG.getNode(ISD::ADDCARRY, DL,
2720  DAG.getVTList(VT, Carry.getValueType()), N0,
2721  DAG.getConstant(0, DL, VT), Carry);
2722 
2723  return SDValue();
2724 }
2725 
2726 SDValue DAGCombiner::visitADDC(SDNode *N) {
2727  SDValue N0 = N->getOperand(0);
2728  SDValue N1 = N->getOperand(1);
2729  EVT VT = N0.getValueType();
2730  SDLoc DL(N);
2731 
2732  // If the flag result is dead, turn this into an ADD.
2733  if (!N->hasAnyUseOfValue(1))
2734  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2736 
2737  // canonicalize constant to RHS.
2738  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2739  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2740  if (N0C && !N1C)
2741  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2742 
2743  // fold (addc x, 0) -> x + no carry out
2744  if (isNullConstant(N1))
2745  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2746  DL, MVT::Glue));
2747 
2748  // If it cannot overflow, transform into an add.
2749  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2750  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2752 
2753  return SDValue();
2754 }
2755 
2756 /**
2757  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2758  * then the flip also occurs if computing the inverse is the same cost.
2759  * This function returns an empty SDValue in case it cannot flip the boolean
2760  * without increasing the cost of the computation. If you want to flip a boolean
2761  * no matter what, use DAG.getLogicalNOT.
2762  */
2764  const TargetLowering &TLI,
2765  bool Force) {
2766  if (Force && isa<ConstantSDNode>(V))
2767  return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2768 
2769  if (V.getOpcode() != ISD::XOR)
2770  return SDValue();
2771 
2772  ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2773  if (!Const)
2774  return SDValue();
2775 
2776  EVT VT = V.getValueType();
2777 
2778  bool IsFlip = false;
2779  switch(TLI.getBooleanContents(VT)) {
2781  IsFlip = Const->isOne();
2782  break;
2784  IsFlip = Const->isAllOnes();
2785  break;
2787  IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2788  break;
2789  }
2790 
2791  if (IsFlip)
2792  return V.getOperand(0);
2793  if (Force)
2794  return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2795  return SDValue();
2796 }
2797 
2798 SDValue DAGCombiner::visitADDO(SDNode *N) {
2799  SDValue N0 = N->getOperand(0);
2800  SDValue N1 = N->getOperand(1);
2801  EVT VT = N0.getValueType();
2802  bool IsSigned = (ISD::SADDO == N->getOpcode());
2803 
2804  EVT CarryVT = N->getValueType(1);
2805  SDLoc DL(N);
2806 
2807  // If the flag result is dead, turn this into an ADD.
2808  if (!N->hasAnyUseOfValue(1))
2809  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2810  DAG.getUNDEF(CarryVT));
2811 
2812  // canonicalize constant to RHS.
2815  return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2816 
2817  // fold (addo x, 0) -> x + no carry out
2818  if (isNullOrNullSplat(N1))
2819  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2820 
2821  if (!IsSigned) {
2822  // If it cannot overflow, transform into an add.
2823  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2824  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2825  DAG.getConstant(0, DL, CarryVT));
2826 
2827  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2828  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2829  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2830  DAG.getConstant(0, DL, VT), N0.getOperand(0));
2831  return CombineTo(
2832  N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2833  }
2834 
2835  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2836  return Combined;
2837 
2838  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2839  return Combined;
2840  }
2841 
2842  return SDValue();
2843 }
2844 
2845 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2846  EVT VT = N0.getValueType();
2847  if (VT.isVector())
2848  return SDValue();
2849 
2850  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2851  // If Y + 1 cannot overflow.
2852  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2853  SDValue Y = N1.getOperand(0);
2854  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2856  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2857  N1.getOperand(2));
2858  }
2859 
2860  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2862  if (SDValue Carry = getAsCarry(TLI, N1))
2863  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2864  DAG.getConstant(0, SDLoc(N), VT), Carry);
2865 
2866  return SDValue();
2867 }
2868 
2869 SDValue DAGCombiner::visitADDE(SDNode *N) {
2870  SDValue N0 = N->getOperand(0);
2871  SDValue N1 = N->getOperand(1);
2872  SDValue CarryIn = N->getOperand(2);
2873 
2874  // canonicalize constant to RHS
2875  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2876  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2877  if (N0C && !N1C)
2878  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2879  N1, N0, CarryIn);
2880 
2881  // fold (adde x, y, false) -> (addc x, y)
2882  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2883  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2884 
2885  return SDValue();
2886 }
2887 
2888 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2889  SDValue N0 = N->getOperand(0);
2890  SDValue N1 = N->getOperand(1);
2891  SDValue CarryIn = N->getOperand(2);
2892  SDLoc DL(N);
2893 
2894  // canonicalize constant to RHS
2895  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2896  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2897  if (N0C && !N1C)
2898  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2899 
2900  // fold (addcarry x, y, false) -> (uaddo x, y)
2901  if (isNullConstant(CarryIn)) {
2902  if (!LegalOperations ||
2903  TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2904  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2905  }
2906 
2907  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2908  if (isNullConstant(N0) && isNullConstant(N1)) {
2909  EVT VT = N0.getValueType();
2910  EVT CarryVT = CarryIn.getValueType();
2911  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2912  AddToWorklist(CarryExt.getNode());
2913  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2914  DAG.getConstant(1, DL, VT)),
2915  DAG.getConstant(0, DL, CarryVT));
2916  }
2917 
2918  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2919  return Combined;
2920 
2921  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2922  return Combined;
2923 
2924  return SDValue();
2925 }
2926 
2927 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2928  SDValue N0 = N->getOperand(0);
2929  SDValue N1 = N->getOperand(1);
2930  SDValue CarryIn = N->getOperand(2);
2931  SDLoc DL(N);
2932 
2933  // canonicalize constant to RHS
2934  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2935  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2936  if (N0C && !N1C)
2937  return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2938 
2939  // fold (saddo_carry x, y, false) -> (saddo x, y)
2940  if (isNullConstant(CarryIn)) {
2941  if (!LegalOperations ||
2942  TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2943  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2944  }
2945 
2946  return SDValue();
2947 }
2948 
2949 /**
2950  * If we are facing some sort of diamond carry propapagtion pattern try to
2951  * break it up to generate something like:
2952  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2953  *
2954  * The end result is usually an increase in operation required, but because the
2955  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2956  *
2957  * Patterns typically look something like
2958  * (uaddo A, B)
2959  * / \
2960  * Carry Sum
2961  * | \
2962  * | (addcarry *, 0, Z)
2963  * | /
2964  * \ Carry
2965  * | /
2966  * (addcarry X, *, *)
2967  *
2968  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2969  * produce a combine with a single path for carry propagation.
2970  */
2972  SDValue X, SDValue Carry0, SDValue Carry1,
2973  SDNode *N) {
2974  if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2975  return SDValue();
2976  if (Carry1.getOpcode() != ISD::UADDO)
2977  return SDValue();
2978 
2979  SDValue Z;
2980 
2981  /**
2982  * First look for a suitable Z. It will present itself in the form of
2983  * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2984  */
2985  if (Carry0.getOpcode() == ISD::ADDCARRY &&
2986  isNullConstant(Carry0.getOperand(1))) {
2987  Z = Carry0.getOperand(2);
2988  } else if (Carry0.getOpcode() == ISD::UADDO &&
2989  isOneConstant(Carry0.getOperand(1))) {
2990  EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2991  Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2992  } else {
2993  // We couldn't find a suitable Z.
2994  return SDValue();
2995  }
2996 
2997 
2998  auto cancelDiamond = [&](SDValue A,SDValue B) {
2999  SDLoc DL(N);
3000  SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3001  Combiner.AddToWorklist(NewY.getNode());
3002  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3003  DAG.getConstant(0, DL, X.getValueType()),
3004  NewY.getValue(1));
3005  };
3006 
3007  /**
3008  * (uaddo A, B)
3009  * |
3010  * Sum
3011  * |
3012  * (addcarry *, 0, Z)
3013  */
3014  if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3015  return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3016  }
3017 
3018  /**
3019  * (addcarry A, 0, Z)
3020  * |
3021  * Sum
3022  * |
3023  * (uaddo *, B)
3024  */
3025  if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3026  return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3027  }
3028 
3029  if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3030  return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3031  }
3032 
3033  return SDValue();
3034 }
3035 
3036 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3037 // match patterns like:
3038 //
3039 // (uaddo A, B) CarryIn
3040 // | \ |
3041 // | \ |
3042 // PartialSum PartialCarryOutX /
3043 // | | /
3044 // | ____|____________/
3045 // | / |
3046 // (uaddo *, *) \________
3047 // | \ \
3048 // | \ |
3049 // | PartialCarryOutY |
3050 // | \ |
3051 // | \ /
3052 // AddCarrySum | ______/
3053 // | /
3054 // CarryOut = (or *, *)
3055 //
3056 // And generate ADDCARRY (or SUBCARRY) with two result values:
3057 //
3058 // {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3059 //
3060 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3061 // a single path for carry/borrow out propagation:
3063  const TargetLowering &TLI, SDValue Carry0,
3064  SDValue Carry1, SDNode *N) {
3065  if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3066  return SDValue();
3067  unsigned Opcode = Carry0.getOpcode();
3068  if (Opcode != Carry1.getOpcode())
3069  return SDValue();
3070  if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3071  return SDValue();
3072 
3073  // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3074  // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3075  // the above ASCII art.)
3076  if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3077  Carry1.getOperand(1) != Carry0.getValue(0))
3078  std::swap(Carry0, Carry1);
3079  if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3080  Carry1.getOperand(1) != Carry0.getValue(0))
3081  return SDValue();
3082 
3083  // The carry in value must be on the righthand side for subtraction.
3084  unsigned CarryInOperandNum =
3085  Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3086  if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3087  return SDValue();
3088  SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3089 
3090  unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3091  if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3092  return SDValue();
3093 
3094  // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3095  // TODO: make getAsCarry() aware of how partial carries are merged.
3096  if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3097  return SDValue();
3098  CarryIn = CarryIn.getOperand(0);
3099  if (CarryIn.getValueType() != MVT::i1)
3100  return SDValue();
3101 
3102  SDLoc DL(N);
3103  SDValue Merged =
3104  DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3105  Carry0.getOperand(1), CarryIn);
3106 
3107  // Please note that because we have proven that the result of the UADDO/USUBO
3108  // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3109  // therefore prove that if the first UADDO/USUBO overflows, the second
3110  // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3111  // maximum value.
3112  //
3113  // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3114  // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3115  //
3116  // This is important because it means that OR and XOR can be used to merge
3117  // carry flags; and that AND can return a constant zero.
3118  //
3119  // TODO: match other operations that can merge flags (ADD, etc)
3120  DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3121  if (N->getOpcode() == ISD::AND)
3122  return DAG.getConstant(0, DL, MVT::i1);
3123  return Merged.getValue(1);
3124 }
3125 
3126 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3127  SDNode *N) {
3128  // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3129  if (isBitwiseNot(N0))
3130  if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3131  SDLoc DL(N);
3132  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3133  N0.getOperand(0), NotC);
3134  return CombineTo(
3135  N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3136  }
3137 
3138  // Iff the flag result is dead:
3139  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3140  // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3141  // or the dependency between the instructions.
3142  if ((N0.getOpcode() == ISD::ADD ||
3143  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3144  N0.getValue(1) != CarryIn)) &&
3145  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3146  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3147  N0.getOperand(0), N0.getOperand(1), CarryIn);
3148 
3149  /**
3150  * When one of the addcarry argument is itself a carry, we may be facing
3151  * a diamond carry propagation. In which case we try to transform the DAG
3152  * to ensure linear carry propagation if that is possible.
3153  */
3154  if (auto Y = getAsCarry(TLI, N1)) {
3155  // Because both are carries, Y and Z can be swapped.
3156  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3157  return R;
3158  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3159  return R;
3160  }
3161 
3162  return SDValue();
3163 }
3164 
3165 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3166 // clamp/truncation if necessary.
3167 static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3168  SDValue RHS, SelectionDAG &DAG,
3169  const SDLoc &DL) {
3170  assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3171  "Illegal truncation");
3172 
3173  if (DstVT == SrcVT)
3174  return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3175 
3176  // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3177  // clamping RHS.
3178  APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3179  DstVT.getScalarSizeInBits());
3180  if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3181  return SDValue();
3182 
3183  SDValue SatLimit =
3185  DstVT.getScalarSizeInBits()),
3186  DL, SrcVT);
3187  RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3188  RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3189  LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3190  return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3191 }
3192 
3193 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3194 // usubsat(a,b), optionally as a truncated type.
3195 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3196  if (N->getOpcode() != ISD::SUB ||
3197  !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3198  return SDValue();
3199 
3200  EVT SubVT = N->getValueType(0);
3201  SDValue Op0 = N->getOperand(0);
3202  SDValue Op1 = N->getOperand(1);
3203 
3204  // Try to find umax(a,b) - b or a - umin(a,b) patterns
3205  // they may be converted to usubsat(a,b).
3206  if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3207  SDValue MaxLHS = Op0.getOperand(0);
3208  SDValue MaxRHS = Op0.getOperand(1);
3209  if (MaxLHS == Op1)
3210  return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3211  if (MaxRHS == Op1)
3212  return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3213  }
3214 
3215  if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3216  SDValue MinLHS = Op1.getOperand(0);
3217  SDValue MinRHS = Op1.getOperand(1);
3218  if (MinLHS == Op0)
3219  return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3220  if (MinRHS == Op0)
3221  return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3222  }
3223 
3224  // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3225  if (Op1.getOpcode() == ISD::TRUNCATE &&
3226  Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3227  Op1.getOperand(0).hasOneUse()) {
3228  SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3229  SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3230  if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3231  return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3232  DAG, SDLoc(N));
3233  if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3234  return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3235  DAG, SDLoc(N));
3236  }
3237 
3238  return SDValue();
3239 }
3240 
3241 // Since it may not be valid to emit a fold to zero for vector initializers
3242 // check if we can before folding.
3243 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3244  SelectionDAG &DAG, bool LegalOperations) {
3245  if (!VT.isVector())
3246  return DAG.getConstant(0, DL, VT);
3247  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3248  return DAG.getConstant(0, DL, VT);
3249  return SDValue();
3250 }
3251 
3252 SDValue DAGCombiner::visitSUB(SDNode *N) {
3253  SDValue N0 = N->getOperand(0);
3254  SDValue N1 = N->getOperand(1);
3255  EVT VT = N0.getValueType();
3256  SDLoc DL(N);
3257 
3258  // fold vector ops
3259  if (VT.isVector()) {
3260  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3261  return FoldedVOp;
3262 
3263  // fold (sub x, 0) -> x, vector edition
3265  return N0;
3266  }
3267 
3268  // fold (sub x, x) -> 0
3269  // FIXME: Refactor this and xor and other similar operations together.
3270  if (N0 == N1)
3271  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3272 
3273  // fold (sub c1, c2) -> c3
3274  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3275  return C;
3276 
3277  if (SDValue NewSel = foldBinOpIntoSelect(N))
3278  return NewSel;
3279 
3281 
3282  // fold (sub x, c) -> (add x, -c)
3283  if (N1C) {
3284  return DAG.getNode(ISD::ADD, DL, VT, N0,
3285  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3286  }
3287 
3288  if (isNullOrNullSplat(N0)) {
3289  unsigned BitWidth = VT.getScalarSizeInBits();
3290  // Right-shifting everything out but the sign bit followed by negation is
3291  // the same as flipping arithmetic/logical shift type without the negation:
3292  // -(X >>u 31) -> (X >>s 31)
3293  // -(X >>s 31) -> (X >>u 31)
3294  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3295  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3296  if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3297  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3298  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3299  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3300  }
3301  }
3302 
3303  // 0 - X --> 0 if the sub is NUW.
3304  if (N->getFlags().hasNoUnsignedWrap())
3305  return N0;
3306 
3308  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3309  // N1 must be 0 because negating the minimum signed value is undefined.
3310  if (N->getFlags().hasNoSignedWrap())
3311  return N0;
3312 
3313  // 0 - X --> X if X is 0 or the minimum signed value.
3314  return N1;
3315  }
3316 
3317  // Convert 0 - abs(x).
3318  SDValue Result;
3319  if (N1->getOpcode() == ISD::ABS &&
3320  !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
3321  TLI.expandABS(N1.getNode(), Result, DAG, true))
3322  return Result;
3323 
3324  // Fold neg(splat(neg(x)) -> splat(x)
3325  if (VT.isVector()) {
3326  SDValue N1S = DAG.getSplatValue(N1, true);
3327  if (N1S && N1S.getOpcode() == ISD::SUB &&
3328  isNullConstant(N1S.getOperand(0))) {
3329  if (VT.isScalableVector())
3330  return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3331  return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3332  }
3333  }
3334  }
3335 
3336  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3337  if (isAllOnesOrAllOnesSplat(N0))
3338  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3339 
3340  // fold (A - (0-B)) -> A+B
3341  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3342  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3343 
3344  // fold A-(A-B) -> B
3345  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3346  return N1.getOperand(1);
3347 
3348  // fold (A+B)-A -> B
3349  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3350  return N0.getOperand(1);
3351 
3352  // fold (A+B)-B -> A
3353  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3354  return N0.getOperand(0);
3355 
3356  // fold (A+C1)-C2 -> A+(C1-C2)
3357  if (N0.getOpcode() == ISD::ADD &&
3358  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3359  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3360  SDValue NewC =
3361  DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3362  assert(NewC && "Constant folding failed");
3363  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3364  }
3365 
3366  // fold C2-(A+C1) -> (C2-C1)-A
3367  if (N1.getOpcode() == ISD::ADD) {
3368  SDValue N11 = N1.getOperand(1);
3369  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3370  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3371  SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3372  assert(NewC && "Constant folding failed");
3373  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3374  }
3375  }
3376 
3377  // fold (A-C1)-C2 -> A-(C1+C2)
3378  if (N0.getOpcode() == ISD::SUB &&
3379  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3380  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3381  SDValue NewC =
3382  DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3383  assert(NewC && "Constant folding failed");
3384  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3385  }
3386 
3387  // fold (c1-A)-c2 -> (c1-c2)-A
3388  if (N0.getOpcode() == ISD::SUB &&
3389  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3390  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3391  SDValue NewC =
3392  DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3393  assert(NewC && "Constant folding failed");
3394  return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3395  }
3396 
3397  // fold ((A+(B+or-C))-B) -> A+or-C
3398  if (N0.getOpcode() == ISD::ADD &&
3399  (N0.getOperand(1).getOpcode() == ISD::SUB ||
3400  N0.getOperand(1).getOpcode() == ISD::ADD) &&
3401  N0.getOperand(1).getOperand(0) == N1)
3402  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3403  N0.getOperand(1).getOperand(1));
3404 
3405  // fold ((A+(C+B))-B) -> A+C
3406  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3407  N0.getOperand(1).getOperand(1) == N1)
3408  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3409  N0.getOperand(1).getOperand(0));
3410 
3411  // fold ((A-(B-C))-C) -> A-B
3412  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3413  N0.getOperand(1).getOperand(1) == N1)
3414  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3415  N0.getOperand(1).getOperand(0));
3416 
3417  // fold (A-(B-C)) -> A+(C-B)
3418  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3419  return DAG.getNode(ISD::ADD, DL, VT, N0,
3420  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3421  N1.getOperand(0)));
3422 
3423  // A - (A & B) -> A & (~B)
3424  if (N1.getOpcode() == ISD::AND) {
3425  SDValue A = N1.getOperand(0);
3426  SDValue B = N1.getOperand(1);
3427  if (A != N0)
3428  std::swap(A, B);
3429  if (A == N0 &&
3430  (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3431  SDValue InvB =
3432  DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3433  return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3434  }
3435  }
3436 
3437  // fold (X - (-Y * Z)) -> (X + (Y * Z))
3438  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3439  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3441  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3442  N1.getOperand(0).getOperand(1),
3443  N1.getOperand(1));
3444  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3445  }
3446  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3448  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3449  N1.getOperand(0),
3450  N1.getOperand(1).getOperand(1));
3451  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3452  }
3453  }
3454 
3455  // If either operand of a sub is undef, the result is undef
3456  if (N0.isUndef())
3457  return N0;
3458  if (N1.isUndef())
3459  return N1;
3460 
3461  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3462  return V;
3463 
3464  if (SDValue V = foldAddSubOfSignBit(N, DAG))
3465  return V;
3466 
3467  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3468  return V;
3469 
3470  if (SDValue V = foldSubToUSubSat(VT, N))
3471  return V;
3472 
3473  // (x - y) - 1 -> add (xor y, -1), x
3474  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3475  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3476  DAG.getAllOnesConstant(DL, VT));
3477  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3478  }
3479 
3480  // Look for:
3481  // sub y, (xor x, -1)
3482  // And if the target does not like this form then turn into:
3483  // add (add x, y), 1
3484  if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3485  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3486  return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3487  }
3488 
3489  // Hoist one-use addition by non-opaque constant:
3490  // (x + C) - y -> (x - y) + C
3491  if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3492  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3493  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3494  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3495  }
3496  // y - (x + C) -> (y - x) - C
3497  if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3498  isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3499  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3500  return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3501  }
3502  // (x - C) - y -> (x - y) - C
3503  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3504  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3505  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3506  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3507  return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3508  }
3509  // (C - x) - y -> C - (x + y)
3510  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3511  isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3512  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3513  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3514  }
3515 
3516  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3517  // rather than 'sub 0/1' (the sext should get folded).
3518  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3519  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3520  N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3521  TLI.getBooleanContents(VT) ==
3523  SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3524  return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3525  }
3526 
3527  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3528  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3529  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3530  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3531  SDValue S0 = N1.getOperand(0);
3532  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3534  if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3535  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3536  }
3537  }
3538 
3539  // If the relocation model supports it, consider symbol offsets.
3540  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3541  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3542  // fold (sub Sym, c) -> Sym-c
3543  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3544  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3545  GA->getOffset() -
3546  (uint64_t)N1C->getSExtValue());
3547  // fold (sub Sym+c1, Sym+c2) -> c1-c2
3548  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3549  if (GA->getGlobal() == GB->getGlobal())
3550  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3551  DL, VT);
3552  }
3553 
3554  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3555  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3556  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3557  if (TN->getVT() == MVT::i1) {
3558  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3559  DAG.getConstant(1, DL, VT));
3560  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3561  }
3562  }
3563 
3564  // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3565  if (N1.getOpcode() == ISD::VSCALE) {
3566  const APInt &IntVal = N1.getConstantOperandAPInt(0);
3567  return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3568  }
3569 
3570  // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3571  if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3572  APInt NewStep = -N1.getConstantOperandAPInt(0);
3573  return DAG.getNode(ISD::ADD, DL, VT, N0,
3574  DAG.getStepVector(DL, VT, NewStep));
3575  }
3576 
3577  // Prefer an add for more folding potential and possibly better codegen:
3578  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3579  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3580  SDValue ShAmt = N1.getOperand(1);
3581  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3582  if (ShAmtC &&
3583  ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3584  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3585  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3586  }
3587  }
3588 
3589  if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3590  // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3591  if (SDValue Carry = getAsCarry(TLI, N0)) {
3592  SDValue X = N1;
3593  SDValue Zero = DAG.getConstant(0, DL, VT);
3594  SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3595  return DAG.getNode(ISD::ADDCARRY, DL,
3596  DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3597  Carry);
3598  }
3599  }
3600 
3601  return SDValue();
3602 }
3603 
3604 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3605  SDValue N0 = N->getOperand(0);
3606  SDValue N1 = N->getOperand(1);
3607  EVT VT = N0.getValueType();
3608  SDLoc DL(N);
3609 
3610  // fold vector ops
3611  if (VT.isVector()) {
3612  // TODO SimplifyVBinOp
3613 
3614  // fold (sub_sat x, 0) -> x, vector edition
3616  return N0;
3617  }
3618 
3619  // fold (sub_sat x, undef) -> 0
3620  if (N0.isUndef() || N1.isUndef())
3621  return DAG.getConstant(0, DL, VT);
3622 
3623  // fold (sub_sat x, x) -> 0
3624  if (N0 == N1)
3625  return DAG.getConstant(0, DL, VT);
3626 
3627  // fold (sub_sat c1, c2) -> c3
3628  if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3629  return C;
3630 
3631  // fold (sub_sat x, 0) -> x
3632  if (isNullConstant(N1))
3633  return N0;
3634 
3635  return SDValue();
3636 }
3637 
3638 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3639  SDValue N0 = N->getOperand(0);
3640  SDValue N1 = N->getOperand(1);
3641  EVT VT = N0.getValueType();
3642  SDLoc DL(N);
3643 
3644  // If the flag result is dead, turn this into an SUB.
3645  if (!N->hasAnyUseOfValue(1))
3646  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3648 
3649  // fold (subc x, x) -> 0 + no borrow
3650  if (N0 == N1)
3651  return CombineTo(N, DAG.getConstant(0, DL, VT),
3653 
3654  // fold (subc x, 0) -> x + no borrow
3655  if (isNullConstant(N1))
3656  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3657 
3658  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3659  if (isAllOnesConstant(N0))
3660  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3662 
3663  return SDValue();
3664 }
3665 
3666 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3667  SDValue N0 = N->getOperand(0);
3668  SDValue N1 = N->getOperand(1);
3669  EVT VT = N0.getValueType();
3670  bool IsSigned = (ISD::SSUBO == N->getOpcode());
3671 
3672  EVT CarryVT = N->getValueType(1);
3673  SDLoc DL(N);
3674 
3675  // If the flag result is dead, turn this into an SUB.
3676  if (!N->hasAnyUseOfValue(1))
3677  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3678  DAG.getUNDEF(CarryVT));
3679 
3680  // fold (subo x, x) -> 0 + no borrow
3681  if (N0 == N1)
3682  return CombineTo(N, DAG.getConstant(0, DL, VT),
3683  DAG.getConstant(0, DL, CarryVT));
3684 
3686 
3687  // fold (subox, c) -> (addo x, -c)
3688  if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3689  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3690  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3691  }
3692 
3693  // fold (subo x, 0) -> x + no borrow
3694  if (isNullOrNullSplat(N1))
3695  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3696 
3697  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3698  if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3699  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3700  DAG.getConstant(0, DL, CarryVT));
3701 
3702  return SDValue();
3703 }
3704 
3705 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3706  SDValue N0 = N->getOperand(0);
3707  SDValue N1 = N->getOperand(1);
3708  SDValue CarryIn = N->getOperand(2);
3709 
3710  // fold (sube x, y, false) -> (subc x, y)
3711  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3712  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3713 
3714  return SDValue();
3715 }
3716 
3717 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3718  SDValue N0 = N->getOperand(0);
3719  SDValue N1 = N->getOperand(1);
3720  SDValue CarryIn = N->getOperand(2);
3721 
3722  // fold (subcarry x, y, false) -> (usubo x, y)
3723  if (isNullConstant(CarryIn)) {
3724  if (!LegalOperations ||
3725  TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3726  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3727  }
3728 
3729  return SDValue();
3730 }
3731 
3732 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3733  SDValue N0 = N->getOperand(0);
3734  SDValue N1 = N->getOperand(1);
3735  SDValue CarryIn = N->getOperand(2);
3736 
3737  // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3738  if (isNullConstant(CarryIn)) {
3739  if (!LegalOperations ||
3740  TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3741  return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3742  }
3743 
3744  return SDValue();
3745 }
3746 
3747 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3748 // UMULFIXSAT here.
3749 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3750  SDValue N0 = N->getOperand(0);
3751  SDValue N1 = N->getOperand(1);
3752  SDValue Scale = N->getOperand(2);
3753  EVT VT = N0.getValueType();
3754 
3755  // fold (mulfix x, undef, scale) -> 0
3756  if (N0.isUndef() || N1.isUndef())
3757  return DAG.getConstant(0, SDLoc(N), VT);
3758 
3759  // Canonicalize constant to RHS (vector doesn't have to splat)
3762  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3763 
3764  // fold (mulfix x, 0, scale) -> 0
3765  if (isNullConstant(N1))
3766  return DAG.getConstant(0, SDLoc(N), VT);
3767 
3768  return SDValue();
3769 }
3770 
3771 SDValue DAGCombiner::visitMUL(SDNode *N) {
3772  SDValue N0 = N->getOperand(0);
3773  SDValue N1 = N->getOperand(1);
3774  EVT VT = N0.getValueType();
3775 
3776  // fold (mul x, undef) -> 0
3777  if (N0.isUndef() || N1.isUndef())
3778  return DAG.getConstant(0, SDLoc(N), VT);
3779 
3780  bool N1IsConst = false;
3781  bool N1IsOpaqueConst = false;
3782  APInt ConstValue1;
3783 
3784  // fold vector ops
3785  if (VT.isVector()) {
3786  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3787  return FoldedVOp;
3788 
3789  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3790  assert((!N1IsConst ||
3791  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3792  "Splat APInt should be element width");
3793  } else {
3794  N1IsConst = isa<ConstantSDNode>(N1);
3795  if (N1IsConst) {
3796  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3797  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3798  }
3799  }
3800 
3801  // fold (mul c1, c2) -> c1*c2
3802  if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3803  return C;
3804 
3805  // canonicalize constant to RHS (vector doesn't have to splat)
3808  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3809 
3810  // fold (mul x, 0) -> 0
3811  if (N1IsConst && ConstValue1.isZero())
3812  return N1;
3813 
3814  // fold (mul x, 1) -> x
3815  if (N1IsConst && ConstValue1.isOne())
3816  return N0;
3817 
3818  if (SDValue NewSel = foldBinOpIntoSelect(N))
3819  return NewSel;
3820 
3821  // fold (mul x, -1) -> 0-x
3822  if (N1IsConst && ConstValue1.isAllOnes()) {
3823  SDLoc DL(N);
3824  return DAG.getNode(ISD::SUB, DL, VT,
3825  DAG.getConstant(0, DL, VT), N0);
3826  }
3827 
3828  // fold (mul x, (1 << c)) -> x << c
3829  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3830  DAG.isKnownToBeAPowerOfTwo(N1) &&
3831  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3832  SDLoc DL(N);
3833  SDValue LogBase2 = BuildLogBase2(N1, DL);
3834  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3835  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3836  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3837  }
3838 
3839  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3840  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3841  unsigned Log2Val = (-ConstValue1).logBase2();
3842  SDLoc DL(N);
3843  // FIXME: If the input is something that is easily negated (e.g. a
3844  // single-use add), we should put the negate there.
3845  return DAG.getNode(ISD::SUB, DL, VT,
3846  DAG.getConstant(0, DL, VT),
3847  DAG.getNode(ISD::SHL, DL, VT, N0,
3848  DAG.getConstant(Log2Val, DL,
3849  getShiftAmountTy(N0.getValueType()))));
3850  }
3851 
3852  // Try to transform:
3853  // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3854  // mul x, (2^N + 1) --> add (shl x, N), x
3855  // mul x, (2^N - 1) --> sub (shl x, N), x
3856  // Examples: x * 33 --> (x << 5) + x
3857  // x * 15 --> (x << 4) - x
3858  // x * -33 --> -((x << 5) + x)
3859  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3860  // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3861  // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3862  // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3863  // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3864  // x * 0xf800 --> (x << 16) - (x << 11)
3865  // x * -0x8800 --> -((x << 15) + (x << 11))
3866  // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3867  if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3868  // TODO: We could handle more general decomposition of any constant by
3869  // having the target set a limit on number of ops and making a
3870  // callback to determine that sequence (similar to sqrt expansion).
3871  unsigned MathOp = ISD::DELETED_NODE;
3872  APInt MulC = ConstValue1.abs();
3873  // The constant `2` should be treated as (2^0 + 1).
3874  unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3875  MulC.lshrInPlace(TZeros);
3876  if ((MulC - 1).isPowerOf2())
3877  MathOp = ISD::ADD;
3878  else if ((MulC + 1).isPowerOf2())
3879  MathOp = ISD::SUB;
3880 
3881  if (MathOp != ISD::DELETED_NODE) {
3882  unsigned ShAmt =
3883  MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3884  ShAmt += TZeros;
3885  assert(ShAmt < VT.getScalarSizeInBits() &&
3886  "multiply-by-constant generated out of bounds shift");
3887  SDLoc DL(N);
3888  SDValue Shl =
3889  DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3890  SDValue R =
3891  TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3892  DAG.getNode(ISD::SHL, DL, VT, N0,
3893  DAG.getConstant(TZeros, DL, VT)))
3894  : DAG.getNode(MathOp, DL, VT, Shl, N0);
3895  if (ConstValue1.isNegative())
3896  R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3897  return R;
3898  }
3899  }
3900 
3901  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3902  if (N0.getOpcode() == ISD::SHL &&
3903  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3904  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3905  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3907  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3908  }
3909 
3910  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3911  // use.
3912  {
3913  SDValue Sh(nullptr, 0), Y(nullptr, 0);
3914 
3915  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3916  if (N0.getOpcode() == ISD::SHL &&
3918  N0.getNode()->hasOneUse()) {
3919  Sh = N0; Y = N1;
3920  } else if (N1.getOpcode() == ISD::SHL &&
3922  N1.getNode()->hasOneUse()) {
3923  Sh = N1; Y = N0;
3924  }
3925 
3926  if (Sh.getNode()) {
3927  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3928  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3929  }
3930  }
3931 
3932  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3934  N0.getOpcode() == ISD::ADD &&
3936  isMulAddWithConstProfitable(N, N0, N1))
3937  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3938  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3939  N0.getOperand(0), N1),
3940  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3941  N0.getOperand(1), N1));
3942 
3943  // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3944  if (N0.getOpcode() == ISD::VSCALE)
3945  if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3946  const APInt &C0 = N0.getConstantOperandAPInt(0);
3947  const APInt &C1 = NC1->getAPIntValue();
3948  return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3949  }
3950 
3951  // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
3952  APInt MulVal;
3953  if (N0.getOpcode() == ISD::STEP_VECTOR)
3954  if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
3955  const APInt &C0 = N0.getConstantOperandAPInt(0);
3956  APInt NewStep = C0 * MulVal;
3957  return DAG.getStepVector(SDLoc(N), VT, NewStep);
3958  }
3959 
3960  // Fold ((mul x, 0/undef) -> 0,
3961  // (mul x, 1) -> x) -> x)
3962  // -> and(x, mask)
3963  // We can replace vectors with '0' and '1' factors with a clearing mask.
3964  if (VT.isFixedLengthVector()) {
3965  unsigned NumElts = VT.getVectorNumElements();
3966  SmallBitVector ClearMask;
3967  ClearMask.reserve(NumElts);
3968  auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3969  if (!V || V->isZero()) {
3970  ClearMask.push_back(true);
3971  return true;
3972  }
3973  ClearMask.push_back(false);
3974  return V->isOne();
3975  };
3976  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3977  ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3978  assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
3979  SDLoc DL(N);
3980  EVT LegalSVT = N1.getOperand(0).getValueType();
3981  SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3982  SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3983  SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3984  for (unsigned I = 0; I != NumElts; ++I)
3985  if (ClearMask[I])
3986  Mask[I] = Zero;
3987  return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3988  }
3989  }
3990 
3991  // reassociate mul
3992  if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3993  return RMUL;
3994 
3995  return SDValue();
3996 }
3997 
3998 /// Return true if divmod libcall is available.
3999 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4000  const TargetLowering &TLI) {
4001  RTLIB::Libcall LC;
4002  EVT NodeType = Node->getValueType(0);
4003  if (!NodeType.isSimple())
4004  return false;
4005  switch (NodeType.getSimpleVT().SimpleTy) {
4006  default: return false; // No libcall for vector types.
4007  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4008  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4009  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4010  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4011  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4012  }
4013 
4014  return TLI.getLibcallName(LC) != nullptr;
4015 }
4016 
4017 /// Issue divrem if both quotient and remainder are needed.
4018 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4019  if (Node->use_empty())
4020  return SDValue(); // This is a dead node, leave it alone.
4021 
4022  unsigned Opcode = Node->getOpcode();
4023  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4024  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4025 
4026  // DivMod lib calls can still work on non-legal types if using lib-calls.
4027  EVT VT = Node->getValueType(0);
4028  if (VT.isVector() || !VT.isInteger())
4029  return SDValue();
4030 
4031  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4032  return SDValue();
4033 
4034  // If DIVREM is going to get expanded into a libcall,
4035  // but there is no libcall available, then don't combine.
4036  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4037  !isDivRemLibcallAvailable(Node, isSigned, TLI))
4038  return SDValue();
4039 
4040  // If div is legal, it's better to do the normal expansion
4041  unsigned OtherOpcode = 0;
4042  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4043  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4044  if (TLI.isOperationLegalOrCustom(Opcode, VT))
4045  return SDValue();
4046  } else {
4047  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4048  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4049  return SDValue();
4050  }
4051 
4052  SDValue Op0 = Node->getOperand(0);
4053  SDValue Op1 = Node->getOperand(1);
4054  SDValue combined;
4055  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
4056  UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
4057  SDNode *User = *UI;
4058  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4059  User->use_empty())
4060  continue;
4061  // Convert the other matching node(s), too;
4062  // otherwise, the DIVREM may get target-legalized into something
4063  // target-specific that we won't be able to recognize.
4064  unsigned UserOpc = User->getOpcode();
4065  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4066  User->getOperand(0) == Op0 &&
4067  User->getOperand(1) == Op1) {
4068  if (!combined) {
4069  if (UserOpc == OtherOpcode) {
4070  SDVTList VTs = DAG.getVTList(VT, VT);
4071  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4072  } else if (UserOpc == DivRemOpc) {
4073  combined = SDValue(User, 0);
4074  } else {
4075  assert(UserOpc == Opcode);
4076  continue;
4077  }
4078  }
4079  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4080  CombineTo(User, combined);
4081  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4082  CombineTo(User, combined.getValue(1));
4083  }
4084  }
4085  return combined;
4086 }
4087 
4089  SDValue N0 = N->getOperand(0);
4090  SDValue N1 = N->getOperand(1);
4091  EVT VT = N->getValueType(0);
4092  SDLoc DL(N);
4093 
4094  unsigned Opc = N->getOpcode();
4095  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4097 
4098  // X / undef -> undef
4099  // X % undef -> undef
4100  // X / 0 -> undef
4101  // X % 0 -> undef
4102  // NOTE: This includes vectors where any divisor element is zero/undef.
4103  if (DAG.isUndef(Opc, {N0, N1}))
4104  return DAG.getUNDEF(VT);
4105 
4106  // undef / X -> 0
4107  // undef % X -> 0
4108  if (N0.isUndef())
4109  return DAG.getConstant(0, DL, VT);
4110 
4111  // 0 / X -> 0
4112  // 0 % X -> 0
4114  if (N0C && N0C->isZero())
4115  return N0;
4116 
4117  // X / X -> 1
4118  // X % X -> 0
4119  if (N0 == N1)
4120  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4121 
4122  // X / 1 -> X
4123  // X % 1 -> 0
4124  // If this is a boolean op (single-bit element type), we can't have
4125  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4126  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4127  // it's a 1.
4128  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4129  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4130 
4131  return SDValue();
4132 }
4133 
4134 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4135  SDValue N0 = N->getOperand(0);
4136  SDValue N1 = N->getOperand(1);
4137  EVT VT = N->getValueType(0);
4138  EVT CCVT = getSetCCResultType(VT);
4139 
4140  // fold vector ops
4141  if (VT.isVector())
4142  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4143  return FoldedVOp;
4144 
4145  SDLoc DL(N);
4146 
4147  // fold (sdiv c1, c2) -> c1/c2
4149  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4150  return C;
4151 
4152  // fold (sdiv X, -1) -> 0-X
4153  if (N1C && N1C->isAllOnes())
4154  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4155 
4156  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4157  if (N1C && N1C->getAPIntValue().isMinSignedValue())
4158  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4159  DAG.getConstant(1, DL, VT),
4160  DAG.getConstant(0, DL, VT));
4161 
4162  if (SDValue V = simplifyDivRem(N, DAG))
4163  return V;
4164 
4165  if (SDValue NewSel = foldBinOpIntoSelect(N))
4166  return NewSel;
4167 
4168  // If we know the sign bits of both operands are zero, strength reduce to a
4169  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4170  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4171  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4172 
4173  if (SDValue V = visitSDIVLike(N0, N1, N)) {
4174  // If the corresponding remainder node exists, update its users with
4175  // (Dividend - (Quotient * Divisor).
4176  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4177  { N0, N1 })) {
4178  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4179  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4180  AddToWorklist(Mul.getNode());
4181  AddToWorklist(Sub.getNode());
4182  CombineTo(RemNode, Sub);
4183  }
4184  return V;
4185  }
4186 
4187  // sdiv, srem -> sdivrem
4188  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4189  // true. Otherwise, we break the simplification logic in visitREM().
4191  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4192  if (SDValue DivRem = useDivRem(N))
4193  return DivRem;
4194 
4195  return SDValue();
4196 }
4197 
4198 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4199  SDLoc DL(N);
4200  EVT VT = N->getValueType(0);
4201  EVT CCVT = getSetCCResultType(VT);
4202  unsigned BitWidth = VT.getScalarSizeInBits();
4203 
4204  // Helper for determining whether a value is a power-2 constant scalar or a
4205  // vector of such elements.
4206  auto IsPowerOfTwo = [](ConstantSDNode *C) {
4207  if (C->isZero() || C->isOpaque())
4208  return false;
4209  if (C->getAPIntValue().isPowerOf2())
4210  return true;
4211  if ((-C->getAPIntValue()).isPowerOf2())
4212  return true;
4213  return false;
4214  };
4215 
4216  // fold (sdiv X, pow2) -> simple ops after legalize
4217  // FIXME: We check for the exact bit here because the generic lowering gives
4218  // better results in that case. The target-specific lowering should learn how
4219  // to handle exact sdivs efficiently.
4220  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4221  // Target-specific implementation of sdiv x, pow2.
4222  if (SDValue Res = BuildSDIVPow2(N))
4223  return Res;
4224 
4225  // Create constants that are functions of the shift amount value.
4226  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4227  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4228  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4229  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4230  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4231  if (!isConstantOrConstantVector(Inexact))
4232  return SDValue();
4233 
4234  // Splat the sign bit into the register
4235  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4236  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4237  AddToWorklist(Sign.getNode());
4238 
4239  // Add (N0 < 0) ? abs2 - 1 : 0;
4240  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4241  AddToWorklist(Srl.getNode());
4242  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4243  AddToWorklist(Add.getNode());
4244  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4245  AddToWorklist(Sra.getNode());
4246 
4247  // Special case: (sdiv X, 1) -> X
4248  // Special Case: (sdiv X, -1) -> 0-X
4249  SDValue One = DAG.getConstant(1, DL, VT);
4250  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4251  SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4252  SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4253  SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4254  Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4255 
4256  // If dividing by a positive value, we're done. Otherwise, the result must
4257  // be negated.
4258  SDValue Zero = DAG.getConstant(0, DL, VT);
4259  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4260 
4261  // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4262  SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4263  SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4264  return Res;
4265  }
4266 
4267  // If integer divide is expensive and we satisfy the requirements, emit an
4268  // alternate sequence. Targets may check function attributes for size/speed
4269  // trade-offs.
4271  if (isConstantOrConstantVector(N1) &&
4272  !TLI.isIntDivCheap(N->getValueType(0), Attr))
4273  if (SDValue Op = BuildSDIV(N))
4274  return Op;
4275 
4276  return SDValue();
4277 }
4278 
4279 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4280  SDValue N0 = N->getOperand(0);
4281  SDValue N1 = N->getOperand(1);
4282  EVT VT = N->getValueType(0);
4283  EVT CCVT = getSetCCResultType(VT);
4284 
4285  // fold vector ops
4286  if (VT.isVector())
4287  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4288  return FoldedVOp;
4289 
4290  SDLoc DL(N);
4291 
4292  // fold (udiv c1, c2) -> c1/c2
4294  if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4295  return C;
4296 
4297  // fold (udiv X, -1) -> select(X == -1, 1, 0)
4298  if (N1C && N1C->isAllOnes())
4299  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4300  DAG.getConstant(1, DL, VT),
4301  DAG.getConstant(0, DL, VT));
4302 
4303  if (SDValue V = simplifyDivRem(N, DAG))
4304  return V;
4305 
4306  if (SDValue NewSel = foldBinOpIntoSelect(N))
4307  return NewSel;
4308 
4309  if (SDValue V = visitUDIVLike(N0, N1, N)) {
4310  // If the corresponding remainder node exists, update its users with
4311  // (Dividend - (Quotient * Divisor).
4312  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4313  { N0, N1 })) {
4314  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4315  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4316  AddToWorklist(Mul.getNode());
4317  AddToWorklist(Sub.getNode());
4318  CombineTo(RemNode, Sub);
4319  }
4320  return V;
4321  }
4322 
4323  // sdiv, srem -> sdivrem
4324  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4325  // true. Otherwise, we break the simplification logic in visitREM().
4327  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4328  if (SDValue DivRem = useDivRem(N))
4329  return DivRem;
4330 
4331  return SDValue();
4332 }
4333 
4334 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4335  SDLoc DL(N);
4336  EVT VT = N->getValueType(0);
4337 
4338  // fold (udiv x, (1 << c)) -> x >>u c
4339  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4340  DAG.isKnownToBeAPowerOfTwo(N1)) {
4341  SDValue LogBase2 = BuildLogBase2(N1, DL);
4342  AddToWorklist(LogBase2.getNode());
4343 
4344  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4345  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4346  AddToWorklist(Trunc.getNode());
4347  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4348  }
4349 
4350  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4351  if (N1.getOpcode() == ISD::SHL) {
4352  SDValue N10 = N1.getOperand(0);
4353  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4354  DAG.isKnownToBeAPowerOfTwo(N10)) {
4355  SDValue LogBase2 = BuildLogBase2(N10, DL);
4356  AddToWorklist(LogBase2.getNode());
4357 
4358  EVT ADDVT = N1.getOperand(1).getValueType();
4359  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4360  AddToWorklist(Trunc.getNode());
4361  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4362  AddToWorklist(Add.getNode());
4363  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4364  }
4365  }
4366 
4367  // fold (udiv x, c) -> alternate
4369  if (isConstantOrConstantVector(N1) &&
4370  !TLI.isIntDivCheap(N->getValueType(0), Attr))
4371  if (SDValue Op = BuildUDIV(N))
4372  return Op;
4373 
4374  return SDValue();
4375 }
4376 
4377 // handles ISD::SREM and ISD::UREM
4378 SDValue DAGCombiner::visitREM(SDNode *N) {
4379  unsigned Opcode = N->getOpcode();
4380  SDValue N0 = N->getOperand(0);
4381  SDValue N1 = N->getOperand(1);
4382  EVT VT = N->getValueType(0);
4383  EVT CCVT = getSetCCResultType(VT);
4384&