LLVM  14.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/Constant.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CodeGen.h"
60 #include "llvm/Support/Compiler.h"
61 #include "llvm/Support/Debug.h"
63 #include "llvm/Support/KnownBits.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <functional>
73 #include <iterator>
74 #include <string>
75 #include <tuple>
76 #include <utility>
77 
78 using namespace llvm;
79 
80 #define DEBUG_TYPE "dagcombine"
81 
82 STATISTIC(NodesCombined , "Number of dag nodes combined");
83 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
84 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
85 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
86 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
87 STATISTIC(SlicedLoads, "Number of load sliced");
88 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
89 
90 static cl::opt<bool>
91 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92  cl::desc("Enable DAG combiner's use of IR alias analysis"));
93 
94 static cl::opt<bool>
95 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96  cl::desc("Enable DAG combiner's use of TBAA"));
97 
98 #ifndef NDEBUG
100 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101  cl::desc("Only use DAG-combiner alias analysis in this"
102  " function"));
103 #endif
104 
105 /// Hidden option to stress test load slicing, i.e., when this option
106 /// is enabled, load slicing bypasses most of its profitability guards.
107 static cl::opt<bool>
108 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109  cl::desc("Bypass the profitability model of load slicing"),
110  cl::init(false));
111 
112 static cl::opt<bool>
113  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114  cl::desc("DAG combiner may split indexing from loads"));
115 
116 static cl::opt<bool>
117  EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118  cl::desc("DAG combiner enable merging multiple stores "
119  "into a wider store"));
120 
122  "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123  cl::desc("Limit the number of operands to inline for Token Factors"));
124 
126  "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127  cl::desc("Limit the number of times for the same StoreNode and RootNode "
128  "to bail out in store merging dependence check"));
129 
131  "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132  cl::desc("DAG combiner enable reducing the width of load/op/store "
133  "sequence"));
134 
136  "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137  cl::desc("DAG combiner enable load/<replace bytes>/store with "
138  "a narrower store"));
139 
140 namespace {
141 
142  class DAGCombiner {
143  SelectionDAG &DAG;
144  const TargetLowering &TLI;
145  const SelectionDAGTargetInfo *STI;
147  CodeGenOpt::Level OptLevel;
148  bool LegalDAG = false;
149  bool LegalOperations = false;
150  bool LegalTypes = false;
151  bool ForCodeSize;
152  bool DisableGenericCombines;
153 
154  /// Worklist of all of the nodes that need to be simplified.
155  ///
156  /// This must behave as a stack -- new nodes to process are pushed onto the
157  /// back and when processing we pop off of the back.
158  ///
159  /// The worklist will not contain duplicates but may contain null entries
160  /// due to nodes being deleted from the underlying DAG.
161  SmallVector<SDNode *, 64> Worklist;
162 
163  /// Mapping from an SDNode to its position on the worklist.
164  ///
165  /// This is used to find and remove nodes from the worklist (by nulling
166  /// them) when they are deleted from the underlying DAG. It relies on
167  /// stable indices of nodes within the worklist.
168  DenseMap<SDNode *, unsigned> WorklistMap;
169  /// This records all nodes attempted to add to the worklist since we
170  /// considered a new worklist entry. As we keep do not add duplicate nodes
171  /// in the worklist, this is different from the tail of the worklist.
172  SmallSetVector<SDNode *, 32> PruningList;
173 
174  /// Set of nodes which have been combined (at least once).
175  ///
176  /// This is used to allow us to reliably add any operands of a DAG node
177  /// which have not yet been combined to the worklist.
178  SmallPtrSet<SDNode *, 32> CombinedNodes;
179 
180  /// Map from candidate StoreNode to the pair of RootNode and count.
181  /// The count is used to track how many times we have seen the StoreNode
182  /// with the same RootNode bail out in dependence check. If we have seen
183  /// the bail out for the same pair many times over a limit, we won't
184  /// consider the StoreNode with the same RootNode as store merging
185  /// candidate again.
187 
188  // AA - Used for DAG load/store alias analysis.
189  AliasAnalysis *AA;
190 
191  /// When an instruction is simplified, add all users of the instruction to
192  /// the work lists because they might get more simplified now.
193  void AddUsersToWorklist(SDNode *N) {
194  for (SDNode *Node : N->uses())
195  AddToWorklist(Node);
196  }
197 
198  /// Convenient shorthand to add a node and all of its user to the worklist.
199  void AddToWorklistWithUsers(SDNode *N) {
200  AddUsersToWorklist(N);
201  AddToWorklist(N);
202  }
203 
204  // Prune potentially dangling nodes. This is called after
205  // any visit to a node, but should also be called during a visit after any
206  // failed combine which may have created a DAG node.
207  void clearAddedDanglingWorklistEntries() {
208  // Check any nodes added to the worklist to see if they are prunable.
209  while (!PruningList.empty()) {
210  auto *N = PruningList.pop_back_val();
211  if (N->use_empty())
212  recursivelyDeleteUnusedNodes(N);
213  }
214  }
215 
216  SDNode *getNextWorklistEntry() {
217  // Before we do any work, remove nodes that are not in use.
218  clearAddedDanglingWorklistEntries();
219  SDNode *N = nullptr;
220  // The Worklist holds the SDNodes in order, but it may contain null
221  // entries.
222  while (!N && !Worklist.empty()) {
223  N = Worklist.pop_back_val();
224  }
225 
226  if (N) {
227  bool GoodWorklistEntry = WorklistMap.erase(N);
228  (void)GoodWorklistEntry;
229  assert(GoodWorklistEntry &&
230  "Found a worklist entry without a corresponding map entry!");
231  }
232  return N;
233  }
234 
235  /// Call the node-specific routine that folds each particular type of node.
236  SDValue visit(SDNode *N);
237 
238  public:
239  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
240  : DAG(D), TLI(D.getTargetLoweringInfo()),
241  STI(D.getSubtarget().getSelectionDAGInfo()),
242  Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
243  ForCodeSize = DAG.shouldOptForSize();
244  DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
245 
246  MaximumLegalStoreInBits = 0;
247  // We use the minimum store size here, since that's all we can guarantee
248  // for the scalable vector types.
249  for (MVT VT : MVT::all_valuetypes())
250  if (EVT(VT).isSimple() && VT != MVT::Other &&
251  TLI.isTypeLegal(EVT(VT)) &&
252  VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
253  MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
254  }
255 
256  void ConsiderForPruning(SDNode *N) {
257  // Mark this for potential pruning.
258  PruningList.insert(N);
259  }
260 
261  /// Add to the worklist making sure its instance is at the back (next to be
262  /// processed.)
263  void AddToWorklist(SDNode *N) {
264  assert(N->getOpcode() != ISD::DELETED_NODE &&
265  "Deleted Node added to Worklist");
266 
267  // Skip handle nodes as they can't usefully be combined and confuse the
268  // zero-use deletion strategy.
269  if (N->getOpcode() == ISD::HANDLENODE)
270  return;
271 
272  ConsiderForPruning(N);
273 
274  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
275  Worklist.push_back(N);
276  }
277 
278  /// Remove all instances of N from the worklist.
279  void removeFromWorklist(SDNode *N) {
280  CombinedNodes.erase(N);
281  PruningList.remove(N);
282  StoreRootCountMap.erase(N);
283 
284  auto It = WorklistMap.find(N);
285  if (It == WorklistMap.end())
286  return; // Not in the worklist.
287 
288  // Null out the entry rather than erasing it to avoid a linear operation.
289  Worklist[It->second] = nullptr;
290  WorklistMap.erase(It);
291  }
292 
293  void deleteAndRecombine(SDNode *N);
294  bool recursivelyDeleteUnusedNodes(SDNode *N);
295 
296  /// Replaces all uses of the results of one DAG node with new values.
297  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
298  bool AddTo = true);
299 
300  /// Replaces all uses of the results of one DAG node with new values.
301  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
302  return CombineTo(N, &Res, 1, AddTo);
303  }
304 
305  /// Replaces all uses of the results of one DAG node with new values.
306  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
307  bool AddTo = true) {
308  SDValue To[] = { Res0, Res1 };
309  return CombineTo(N, To, 2, AddTo);
310  }
311 
312  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
313 
314  private:
315  unsigned MaximumLegalStoreInBits;
316 
317  /// Check the specified integer node value to see if it can be simplified or
318  /// if things it uses can be simplified by bit propagation.
319  /// If so, return true.
320  bool SimplifyDemandedBits(SDValue Op) {
321  unsigned BitWidth = Op.getScalarValueSizeInBits();
323  return SimplifyDemandedBits(Op, DemandedBits);
324  }
325 
326  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
327  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
328  KnownBits Known;
329  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
330  return false;
331 
332  // Revisit the node.
333  AddToWorklist(Op.getNode());
334 
335  CommitTargetLoweringOpt(TLO);
336  return true;
337  }
338 
339  /// Check the specified vector node value to see if it can be simplified or
340  /// if things it uses can be simplified as it only uses some of the
341  /// elements. If so, return true.
342  bool SimplifyDemandedVectorElts(SDValue Op) {
343  // TODO: For now just pretend it cannot be simplified.
344  if (Op.getValueType().isScalableVector())
345  return false;
346 
347  unsigned NumElts = Op.getValueType().getVectorNumElements();
348  APInt DemandedElts = APInt::getAllOnes(NumElts);
349  return SimplifyDemandedVectorElts(Op, DemandedElts);
350  }
351 
352  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
353  const APInt &DemandedElts,
354  bool AssumeSingleUse = false);
355  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
356  bool AssumeSingleUse = false);
357 
358  bool CombineToPreIndexedLoadStore(SDNode *N);
359  bool CombineToPostIndexedLoadStore(SDNode *N);
360  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
361  bool SliceUpLoad(SDNode *N);
362 
363  // Scalars have size 0 to distinguish from singleton vectors.
364  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
365  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
366  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
367 
368  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
369  /// load.
370  ///
371  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
372  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
373  /// \param EltNo index of the vector element to load.
374  /// \param OriginalLoad load that EVE came from to be replaced.
375  /// \returns EVE on success SDValue() on failure.
376  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
377  SDValue EltNo,
378  LoadSDNode *OriginalLoad);
379  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
380  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
381  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
382  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
383  SDValue PromoteIntBinOp(SDValue Op);
384  SDValue PromoteIntShiftOp(SDValue Op);
385  SDValue PromoteExtend(SDValue Op);
386  bool PromoteLoad(SDValue Op);
387 
388  /// Call the node-specific routine that knows how to fold each
389  /// particular type of node. If that doesn't do anything, try the
390  /// target-specific DAG combines.
392 
393  // Visitation implementation - Implement dag node combining for different
394  // node types. The semantics are as follows:
395  // Return Value:
396  // SDValue.getNode() == 0 - No change was made
397  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
398  // otherwise - N should be replaced by the returned Operand.
399  //
400  SDValue visitTokenFactor(SDNode *N);
401  SDValue visitMERGE_VALUES(SDNode *N);
402  SDValue visitADD(SDNode *N);
403  SDValue visitADDLike(SDNode *N);
404  SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
405  SDValue visitSUB(SDNode *N);
406  SDValue visitADDSAT(SDNode *N);
407  SDValue visitSUBSAT(SDNode *N);
408  SDValue visitADDC(SDNode *N);
409  SDValue visitADDO(SDNode *N);
410  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
411  SDValue visitSUBC(SDNode *N);
412  SDValue visitSUBO(SDNode *N);
413  SDValue visitADDE(SDNode *N);
414  SDValue visitADDCARRY(SDNode *N);
415  SDValue visitSADDO_CARRY(SDNode *N);
416  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
417  SDValue visitSUBE(SDNode *N);
418  SDValue visitSUBCARRY(SDNode *N);
419  SDValue visitSSUBO_CARRY(SDNode *N);
420  SDValue visitMUL(SDNode *N);
421  SDValue visitMULFIX(SDNode *N);
422  SDValue useDivRem(SDNode *N);
423  SDValue visitSDIV(SDNode *N);
424  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
425  SDValue visitUDIV(SDNode *N);
426  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
427  SDValue visitREM(SDNode *N);
428  SDValue visitMULHU(SDNode *N);
429  SDValue visitMULHS(SDNode *N);
430  SDValue visitSMUL_LOHI(SDNode *N);
431  SDValue visitUMUL_LOHI(SDNode *N);
432  SDValue visitMULO(SDNode *N);
433  SDValue visitIMINMAX(SDNode *N);
434  SDValue visitAND(SDNode *N);
435  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
436  SDValue visitOR(SDNode *N);
437  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
438  SDValue visitXOR(SDNode *N);
439  SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
440  SDValue visitSHL(SDNode *N);
441  SDValue visitSRA(SDNode *N);
442  SDValue visitSRL(SDNode *N);
443  SDValue visitFunnelShift(SDNode *N);
444  SDValue visitRotate(SDNode *N);
445  SDValue visitABS(SDNode *N);
446  SDValue visitBSWAP(SDNode *N);
447  SDValue visitBITREVERSE(SDNode *N);
448  SDValue visitCTLZ(SDNode *N);
449  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
450  SDValue visitCTTZ(SDNode *N);
451  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
452  SDValue visitCTPOP(SDNode *N);
453  SDValue visitSELECT(SDNode *N);
454  SDValue visitVSELECT(SDNode *N);
455  SDValue visitSELECT_CC(SDNode *N);
456  SDValue visitSETCC(SDNode *N);
457  SDValue visitSETCCCARRY(SDNode *N);
458  SDValue visitSIGN_EXTEND(SDNode *N);
459  SDValue visitZERO_EXTEND(SDNode *N);
460  SDValue visitANY_EXTEND(SDNode *N);
461  SDValue visitAssertExt(SDNode *N);
462  SDValue visitAssertAlign(SDNode *N);
463  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
464  SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
465  SDValue visitTRUNCATE(SDNode *N);
466  SDValue visitBITCAST(SDNode *N);
467  SDValue visitFREEZE(SDNode *N);
468  SDValue visitBUILD_PAIR(SDNode *N);
469  SDValue visitFADD(SDNode *N);
470  SDValue visitSTRICT_FADD(SDNode *N);
471  SDValue visitFSUB(SDNode *N);
472  SDValue visitFMUL(SDNode *N);
473  SDValue visitFMA(SDNode *N);
474  SDValue visitFDIV(SDNode *N);
475  SDValue visitFREM(SDNode *N);
476  SDValue visitFSQRT(SDNode *N);
477  SDValue visitFCOPYSIGN(SDNode *N);
478  SDValue visitFPOW(SDNode *N);
479  SDValue visitSINT_TO_FP(SDNode *N);
480  SDValue visitUINT_TO_FP(SDNode *N);
481  SDValue visitFP_TO_SINT(SDNode *N);
482  SDValue visitFP_TO_UINT(SDNode *N);
483  SDValue visitFP_ROUND(SDNode *N);
484  SDValue visitFP_EXTEND(SDNode *N);
485  SDValue visitFNEG(SDNode *N);
486  SDValue visitFABS(SDNode *N);
487  SDValue visitFCEIL(SDNode *N);
488  SDValue visitFTRUNC(SDNode *N);
489  SDValue visitFFLOOR(SDNode *N);
490  SDValue visitFMinMax(SDNode *N);
491  SDValue visitBRCOND(SDNode *N);
492  SDValue visitBR_CC(SDNode *N);
493  SDValue visitLOAD(SDNode *N);
494 
495  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
496  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
497 
498  SDValue visitSTORE(SDNode *N);
499  SDValue visitLIFETIME_END(SDNode *N);
500  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
501  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
502  SDValue visitBUILD_VECTOR(SDNode *N);
503  SDValue visitCONCAT_VECTORS(SDNode *N);
504  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
505  SDValue visitVECTOR_SHUFFLE(SDNode *N);
506  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
507  SDValue visitINSERT_SUBVECTOR(SDNode *N);
508  SDValue visitMLOAD(SDNode *N);
509  SDValue visitMSTORE(SDNode *N);
510  SDValue visitMGATHER(SDNode *N);
511  SDValue visitMSCATTER(SDNode *N);
512  SDValue visitFP_TO_FP16(SDNode *N);
513  SDValue visitFP16_TO_FP(SDNode *N);
514  SDValue visitVECREDUCE(SDNode *N);
515  SDValue visitVPOp(SDNode *N);
516 
517  SDValue visitFADDForFMACombine(SDNode *N);
518  SDValue visitFSUBForFMACombine(SDNode *N);
519  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
520 
521  SDValue XformToShuffleWithZero(SDNode *N);
522  bool reassociationCanBreakAddressingModePattern(unsigned Opc,
523  const SDLoc &DL, SDValue N0,
524  SDValue N1);
525  SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
526  SDValue N1);
527  SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
528  SDValue N1, SDNodeFlags Flags);
529 
530  SDValue visitShiftByConstant(SDNode *N);
531 
532  SDValue foldSelectOfConstants(SDNode *N);
533  SDValue foldVSelectOfConstants(SDNode *N);
534  SDValue foldBinOpIntoSelect(SDNode *BO);
535  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
536  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
537  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
538  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
539  SDValue N2, SDValue N3, ISD::CondCode CC,
540  bool NotExtCompare = false);
541  SDValue convertSelectOfFPConstantsToLoadOffset(
542  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
543  ISD::CondCode CC);
544  SDValue foldSignChangeInBitcast(SDNode *N);
545  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
546  SDValue N2, SDValue N3, ISD::CondCode CC);
547  SDValue foldSelectOfBinops(SDNode *N);
548  SDValue foldSextSetcc(SDNode *N);
549  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
550  const SDLoc &DL);
551  SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
552  SDValue unfoldMaskedMerge(SDNode *N);
553  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
554  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
555  const SDLoc &DL, bool foldBooleans);
556  SDValue rebuildSetCC(SDValue N);
557 
558  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
559  SDValue &CC, bool MatchStrict = false) const;
560  bool isOneUseSetCC(SDValue N) const;
561 
562  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
563  unsigned HiOp);
564  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
565  SDValue CombineExtLoad(SDNode *N);
566  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
567  SDValue combineRepeatedFPDivisors(SDNode *N);
568  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
569  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
570  SDValue BuildSDIV(SDNode *N);
571  SDValue BuildSDIVPow2(SDNode *N);
572  SDValue BuildUDIV(SDNode *N);
573  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
574  SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
575  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
576  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
577  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
578  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
579  SDNodeFlags Flags, bool Reciprocal);
580  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
581  SDNodeFlags Flags, bool Reciprocal);
582  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
583  bool DemandHighBits = true);
584  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
585  SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
586  SDValue InnerPos, SDValue InnerNeg,
587  unsigned PosOpcode, unsigned NegOpcode,
588  const SDLoc &DL);
589  SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
590  SDValue InnerPos, SDValue InnerNeg,
591  unsigned PosOpcode, unsigned NegOpcode,
592  const SDLoc &DL);
593  SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
594  SDValue MatchLoadCombine(SDNode *N);
595  SDValue mergeTruncStores(StoreSDNode *N);
596  SDValue reduceLoadWidth(SDNode *N);
597  SDValue ReduceLoadOpStoreWidth(SDNode *N);
599  SDValue TransformFPLoadStorePair(SDNode *N);
600  SDValue convertBuildVecZextToZext(SDNode *N);
601  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
602  SDValue reduceBuildVecTruncToBitCast(SDNode *N);
603  SDValue reduceBuildVecToShuffle(SDNode *N);
604  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
605  ArrayRef<int> VectorMask, SDValue VecIn1,
606  SDValue VecIn2, unsigned LeftIdx,
607  bool DidSplitVec);
608  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
609 
610  /// Walk up chain skipping non-aliasing memory nodes,
611  /// looking for aliasing nodes and adding them to the Aliases vector.
612  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
613  SmallVectorImpl<SDValue> &Aliases);
614 
615  /// Return true if there is any possibility that the two addresses overlap.
616  bool mayAlias(SDNode *Op0, SDNode *Op1) const;
617 
618  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
619  /// chain (aliasing node.)
620  SDValue FindBetterChain(SDNode *N, SDValue Chain);
621 
622  /// Try to replace a store and any possibly adjacent stores on
623  /// consecutive chains with better chains. Return true only if St is
624  /// replaced.
625  ///
626  /// Notice that other chains may still be replaced even if the function
627  /// returns false.
628  bool findBetterNeighborChains(StoreSDNode *St);
629 
630  // Helper for findBetterNeighborChains. Walk up store chain add additional
631  // chained stores that do not overlap and can be parallelized.
632  bool parallelizeChainedStores(StoreSDNode *St);
633 
634  /// Holds a pointer to an LSBaseSDNode as well as information on where it
635  /// is located in a sequence of memory operations connected by a chain.
636  struct MemOpLink {
637  // Ptr to the mem node.
638  LSBaseSDNode *MemNode;
639 
640  // Offset from the base ptr.
641  int64_t OffsetFromBase;
642 
643  MemOpLink(LSBaseSDNode *N, int64_t Offset)
644  : MemNode(N), OffsetFromBase(Offset) {}
645  };
646 
647  // Classify the origin of a stored value.
648  enum class StoreSource { Unknown, Constant, Extract, Load };
649  StoreSource getStoreSource(SDValue StoreVal) {
650  switch (StoreVal.getOpcode()) {
651  case ISD::Constant:
652  case ISD::ConstantFP:
653  return StoreSource::Constant;
656  return StoreSource::Extract;
657  case ISD::LOAD:
658  return StoreSource::Load;
659  default:
660  return StoreSource::Unknown;
661  }
662  }
663 
664  /// This is a helper function for visitMUL to check the profitability
665  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
666  /// MulNode is the original multiply, AddNode is (add x, c1),
667  /// and ConstNode is c2.
668  bool isMulAddWithConstProfitable(SDNode *MulNode,
669  SDValue &AddNode,
670  SDValue &ConstNode);
671 
672  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
673  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
674  /// the type of the loaded value to be extended.
675  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
676  EVT LoadResultTy, EVT &ExtVT);
677 
678  /// Helper function to calculate whether the given Load/Store can have its
679  /// width reduced to ExtVT.
680  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
681  EVT &MemVT, unsigned ShAmt = 0);
682 
683  /// Used by BackwardsPropagateMask to find suitable loads.
684  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
685  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
686  ConstantSDNode *Mask, SDNode *&NodeToMask);
687  /// Attempt to propagate a given AND node back to load leaves so that they
688  /// can be combined into narrow loads.
689  bool BackwardsPropagateMask(SDNode *N);
690 
691  /// Helper function for mergeConsecutiveStores which merges the component
692  /// store chains.
693  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
694  unsigned NumStores);
695 
696  /// This is a helper function for mergeConsecutiveStores. When the source
697  /// elements of the consecutive stores are all constants or all extracted
698  /// vector elements, try to merge them into one larger store introducing
699  /// bitcasts if necessary. \return True if a merged store was created.
700  bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
701  EVT MemVT, unsigned NumStores,
702  bool IsConstantSrc, bool UseVector,
703  bool UseTrunc);
704 
705  /// This is a helper function for mergeConsecutiveStores. Stores that
706  /// potentially may be merged with St are placed in StoreNodes. RootNode is
707  /// a chain predecessor to all store candidates.
708  void getStoreMergeCandidates(StoreSDNode *St,
709  SmallVectorImpl<MemOpLink> &StoreNodes,
710  SDNode *&Root);
711 
712  /// Helper function for mergeConsecutiveStores. Checks if candidate stores
713  /// have indirect dependency through their operands. RootNode is the
714  /// predecessor to all stores calculated by getStoreMergeCandidates and is
715  /// used to prune the dependency check. \return True if safe to merge.
716  bool checkMergeStoreCandidatesForDependencies(
717  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
718  SDNode *RootNode);
719 
720  /// This is a helper function for mergeConsecutiveStores. Given a list of
721  /// store candidates, find the first N that are consecutive in memory.
722  /// Returns 0 if there are not at least 2 consecutive stores to try merging.
723  unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
724  int64_t ElementSizeBytes) const;
725 
726  /// This is a helper function for mergeConsecutiveStores. It is used for
727  /// store chains that are composed entirely of constant values.
728  bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
729  unsigned NumConsecutiveStores,
730  EVT MemVT, SDNode *Root, bool AllowVectors);
731 
732  /// This is a helper function for mergeConsecutiveStores. It is used for
733  /// store chains that are composed entirely of extracted vector elements.
734  /// When extracting multiple vector elements, try to store them in one
735  /// vector store rather than a sequence of scalar stores.
736  bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
737  unsigned NumConsecutiveStores, EVT MemVT,
738  SDNode *Root);
739 
740  /// This is a helper function for mergeConsecutiveStores. It is used for
741  /// store chains that are composed entirely of loaded values.
742  bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
743  unsigned NumConsecutiveStores, EVT MemVT,
744  SDNode *Root, bool AllowVectors,
745  bool IsNonTemporalStore, bool IsNonTemporalLoad);
746 
747  /// Merge consecutive store operations into a wide store.
748  /// This optimization uses wide integers or vectors when possible.
749  /// \return true if stores were merged.
750  bool mergeConsecutiveStores(StoreSDNode *St);
751 
752  /// Try to transform a truncation where C is a constant:
753  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
754  ///
755  /// \p N needs to be a truncation and its first operand an AND. Other
756  /// requirements are checked by the function (e.g. that trunc is
757  /// single-use) and if missed an empty SDValue is returned.
758  SDValue distributeTruncateThroughAnd(SDNode *N);
759 
760  /// Helper function to determine whether the target supports operation
761  /// given by \p Opcode for type \p VT, that is, whether the operation
762  /// is legal or custom before legalizing operations, and whether is
763  /// legal (but not custom) after legalization.
764  bool hasOperation(unsigned Opcode, EVT VT) {
765  return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
766  }
767 
768  public:
769  /// Runs the dag combiner on all nodes in the work list
770  void Run(CombineLevel AtLevel);
771 
772  SelectionDAG &getDAG() const { return DAG; }
773 
774  /// Returns a type large enough to hold any valid shift amount - before type
775  /// legalization these can be huge.
776  EVT getShiftAmountTy(EVT LHSTy) {
777  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
778  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
779  }
780 
781  /// This method returns true if we are running before type legalization or
782  /// if the specified VT is legal.
783  bool isTypeLegal(const EVT &VT) {
784  if (!LegalTypes) return true;
785  return TLI.isTypeLegal(VT);
786  }
787 
788  /// Convenience wrapper around TargetLowering::getSetCCResultType
789  EVT getSetCCResultType(EVT VT) const {
790  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
791  }
792 
793  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
794  SDValue OrigLoad, SDValue ExtLoad,
795  ISD::NodeType ExtType);
796  };
797 
798 /// This class is a DAGUpdateListener that removes any deleted
799 /// nodes from the worklist.
800 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
801  DAGCombiner &DC;
802 
803 public:
804  explicit WorklistRemover(DAGCombiner &dc)
805  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
806 
807  void NodeDeleted(SDNode *N, SDNode *E) override {
808  DC.removeFromWorklist(N);
809  }
810 };
811 
812 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
813  DAGCombiner &DC;
814 
815 public:
816  explicit WorklistInserter(DAGCombiner &dc)
817  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
818 
819  // FIXME: Ideally we could add N to the worklist, but this causes exponential
820  // compile time costs in large DAGs, e.g. Halide.
821  void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
822 };
823 
824 } // end anonymous namespace
825 
826 //===----------------------------------------------------------------------===//
827 // TargetLowering::DAGCombinerInfo implementation
828 //===----------------------------------------------------------------------===//
829 
831  ((DAGCombiner*)DC)->AddToWorklist(N);
832 }
833 
835 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
836  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
837 }
838 
840 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
841  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
842 }
843 
845 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
846  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
847 }
848 
851  return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
852 }
853 
856  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
857 }
858 
859 //===----------------------------------------------------------------------===//
860 // Helper Functions
861 //===----------------------------------------------------------------------===//
862 
863 void DAGCombiner::deleteAndRecombine(SDNode *N) {
865 
866  // If the operands of this node are only used by the node, they will now be
867  // dead. Make sure to re-visit them and recursively delete dead nodes.
868  for (const SDValue &Op : N->ops())
869  // For an operand generating multiple values, one of the values may
870  // become dead allowing further simplification (e.g. split index
871  // arithmetic from an indexed load).
872  if (Op->hasOneUse() || Op->getNumValues() > 1)
873  AddToWorklist(Op.getNode());
874 
875  DAG.DeleteNode(N);
876 }
877 
878 // APInts must be the same size for most operations, this helper
879 // function zero extends the shorter of the pair so that they match.
880 // We provide an Offset so that we can create bitwidths that won't overflow.
881 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
882  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
883  LHS = LHS.zextOrSelf(Bits);
884  RHS = RHS.zextOrSelf(Bits);
885 }
886 
887 // Return true if this node is a setcc, or is a select_cc
888 // that selects between the target values used for true and false, making it
889 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
890 // the appropriate nodes based on the type of node we are checking. This
891 // simplifies life a bit for the callers.
892 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
893  SDValue &CC, bool MatchStrict) const {
894  if (N.getOpcode() == ISD::SETCC) {
895  LHS = N.getOperand(0);
896  RHS = N.getOperand(1);
897  CC = N.getOperand(2);
898  return true;
899  }
900 
901  if (MatchStrict &&
902  (N.getOpcode() == ISD::STRICT_FSETCC ||
903  N.getOpcode() == ISD::STRICT_FSETCCS)) {
904  LHS = N.getOperand(1);
905  RHS = N.getOperand(2);
906  CC = N.getOperand(3);
907  return true;
908  }
909 
910  if (N.getOpcode() != ISD::SELECT_CC ||
911  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
912  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
913  return false;
914 
915  if (TLI.getBooleanContents(N.getValueType()) ==
917  return false;
918 
919  LHS = N.getOperand(0);
920  RHS = N.getOperand(1);
921  CC = N.getOperand(4);
922  return true;
923 }
924 
925 /// Return true if this is a SetCC-equivalent operation with only one use.
926 /// If this is true, it allows the users to invert the operation for free when
927 /// it is profitable to do so.
928 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
929  SDValue N0, N1, N2;
930  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
931  return true;
932  return false;
933 }
934 
935 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
936  if (!ScalarTy.isSimple())
937  return false;
938 
939  uint64_t MaskForTy = 0ULL;
940  switch (ScalarTy.getSimpleVT().SimpleTy) {
941  case MVT::i8:
942  MaskForTy = 0xFFULL;
943  break;
944  case MVT::i16:
945  MaskForTy = 0xFFFFULL;
946  break;
947  case MVT::i32:
948  MaskForTy = 0xFFFFFFFFULL;
949  break;
950  default:
951  return false;
952  break;
953  }
954 
955  APInt Val;
956  if (ISD::isConstantSplatVector(N, Val))
957  return Val.getLimitedValue() == MaskForTy;
958 
959  return false;
960 }
961 
962 // Determines if it is a constant integer or a splat/build vector of constant
963 // integers (and undefs).
964 // Do not permit build vector implicit truncation.
965 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
966  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
967  return !(Const->isOpaque() && NoOpaques);
968  if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
969  return false;
970  unsigned BitWidth = N.getScalarValueSizeInBits();
971  for (const SDValue &Op : N->op_values()) {
972  if (Op.isUndef())
973  continue;
974  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
975  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
976  (Const->isOpaque() && NoOpaques))
977  return false;
978  }
979  return true;
980 }
981 
982 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
983 // undef's.
984 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
985  if (V.getOpcode() != ISD::BUILD_VECTOR)
986  return false;
987  return isConstantOrConstantVector(V, NoOpaques) ||
989 }
990 
991 // Determine if this an indexed load with an opaque target constant index.
992 static bool canSplitIdx(LoadSDNode *LD) {
993  return MaySplitLoadIndex &&
994  (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
995  !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
996 }
997 
998 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
999  const SDLoc &DL,
1000  SDValue N0,
1001  SDValue N1) {
1002  // Currently this only tries to ensure we don't undo the GEP splits done by
1003  // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1004  // we check if the following transformation would be problematic:
1005  // (load/store (add, (add, x, offset1), offset2)) ->
1006  // (load/store (add, x, offset1+offset2)).
1007 
1008  if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1009  return false;
1010 
1011  if (N0.hasOneUse())
1012  return false;
1013 
1014  auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1015  auto *C2 = dyn_cast<ConstantSDNode>(N1);
1016  if (!C1 || !C2)
1017  return false;
1018 
1019  const APInt &C1APIntVal = C1->getAPIntValue();
1020  const APInt &C2APIntVal = C2->getAPIntValue();
1021  if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1022  return false;
1023 
1024  const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1025  if (CombinedValueIntVal.getBitWidth() > 64)
1026  return false;
1027  const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1028 
1029  for (SDNode *Node : N0->uses()) {
1030  auto LoadStore = dyn_cast<MemSDNode>(Node);
1031  if (LoadStore) {
1032  // Is x[offset2] already not a legal addressing mode? If so then
1033  // reassociating the constants breaks nothing (we test offset2 because
1034  // that's the one we hope to fold into the load or store).
1036  AM.HasBaseReg = true;
1037  AM.BaseOffs = C2APIntVal.getSExtValue();
1038  EVT VT = LoadStore->getMemoryVT();
1039  unsigned AS = LoadStore->getAddressSpace();
1040  Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1041  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1042  continue;
1043 
1044  // Would x[offset1+offset2] still be a legal addressing mode?
1045  AM.BaseOffs = CombinedValue;
1046  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1047  return true;
1048  }
1049  }
1050 
1051  return false;
1052 }
1053 
1054 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1055 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1056 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1057  SDValue N0, SDValue N1) {
1058  EVT VT = N0.getValueType();
1059 
1060  if (N0.getOpcode() != Opc)
1061  return SDValue();
1062 
1063  SDValue N00 = N0.getOperand(0);
1064  SDValue N01 = N0.getOperand(1);
1065 
1068  // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1069  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1070  return DAG.getNode(Opc, DL, VT, N00, OpNode);
1071  return SDValue();
1072  }
1073  if (N0.hasOneUse()) {
1074  // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1075  // iff (op x, c1) has one use
1076  if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
1077  return DAG.getNode(Opc, DL, VT, OpNode, N01);
1078  return SDValue();
1079  }
1080  }
1081  return SDValue();
1082 }
1083 
1084 // Try to reassociate commutative binops.
1085 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1086  SDValue N1, SDNodeFlags Flags) {
1087  assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1088 
1089  // Floating-point reassociation is not allowed without loose FP math.
1090  if (N0.getValueType().isFloatingPoint() ||
1092  if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1093  return SDValue();
1094 
1095  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1096  return Combined;
1097  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1098  return Combined;
1099  return SDValue();
1100 }
1101 
1102 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1103  bool AddTo) {
1104  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1105  ++NodesCombined;
1106  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1107  To[0].getNode()->dump(&DAG);
1108  dbgs() << " and " << NumTo - 1 << " other values\n");
1109  for (unsigned i = 0, e = NumTo; i != e; ++i)
1110  assert((!To[i].getNode() ||
1111  N->getValueType(i) == To[i].getValueType()) &&
1112  "Cannot combine value to value of different type!");
1113 
1114  WorklistRemover DeadNodes(*this);
1115  DAG.ReplaceAllUsesWith(N, To);
1116  if (AddTo) {
1117  // Push the new nodes and any users onto the worklist
1118  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1119  if (To[i].getNode()) {
1120  AddToWorklist(To[i].getNode());
1121  AddUsersToWorklist(To[i].getNode());
1122  }
1123  }
1124  }
1125 
1126  // Finally, if the node is now dead, remove it from the graph. The node
1127  // may not be dead if the replacement process recursively simplified to
1128  // something else needing this node.
1129  if (N->use_empty())
1130  deleteAndRecombine(N);
1131  return SDValue(N, 0);
1132 }
1133 
1134 void DAGCombiner::
1135 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1136  // Replace the old value with the new one.
1137  ++NodesCombined;
1138  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1139  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1140  dbgs() << '\n');
1141 
1142  // Replace all uses. If any nodes become isomorphic to other nodes and
1143  // are deleted, make sure to remove them from our worklist.
1144  WorklistRemover DeadNodes(*this);
1145  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1146 
1147  // Push the new node and any (possibly new) users onto the worklist.
1148  AddToWorklistWithUsers(TLO.New.getNode());
1149 
1150  // Finally, if the node is now dead, remove it from the graph. The node
1151  // may not be dead if the replacement process recursively simplified to
1152  // something else needing this node.
1153  if (TLO.Old.getNode()->use_empty())
1154  deleteAndRecombine(TLO.Old.getNode());
1155 }
1156 
1157 /// Check the specified integer node value to see if it can be simplified or if
1158 /// things it uses can be simplified by bit propagation. If so, return true.
1159 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1160  const APInt &DemandedElts,
1161  bool AssumeSingleUse) {
1162  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1163  KnownBits Known;
1164  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1165  AssumeSingleUse))
1166  return false;
1167 
1168  // Revisit the node.
1169  AddToWorklist(Op.getNode());
1170 
1171  CommitTargetLoweringOpt(TLO);
1172  return true;
1173 }
1174 
1175 /// Check the specified vector node value to see if it can be simplified or
1176 /// if things it uses can be simplified as it only uses some of the elements.
1177 /// If so, return true.
1178 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1179  const APInt &DemandedElts,
1180  bool AssumeSingleUse) {
1181  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1182  APInt KnownUndef, KnownZero;
1183  if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1184  TLO, 0, AssumeSingleUse))
1185  return false;
1186 
1187  // Revisit the node.
1188  AddToWorklist(Op.getNode());
1189 
1190  CommitTargetLoweringOpt(TLO);
1191  return true;
1192 }
1193 
1194 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1195  SDLoc DL(Load);
1196  EVT VT = Load->getValueType(0);
1197  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1198 
1199  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1200  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1201  WorklistRemover DeadNodes(*this);
1202  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1203  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1204  deleteAndRecombine(Load);
1205  AddToWorklist(Trunc.getNode());
1206 }
1207 
1208 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1209  Replace = false;
1210  SDLoc DL(Op);
1211  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1212  LoadSDNode *LD = cast<LoadSDNode>(Op);
1213  EVT MemVT = LD->getMemoryVT();
1215  : LD->getExtensionType();
1216  Replace = true;
1217  return DAG.getExtLoad(ExtType, DL, PVT,
1218  LD->getChain(), LD->getBasePtr(),
1219  MemVT, LD->getMemOperand());
1220  }
1221 
1222  unsigned Opc = Op.getOpcode();
1223  switch (Opc) {
1224  default: break;
1225  case ISD::AssertSext:
1226  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1227  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1228  break;
1229  case ISD::AssertZext:
1230  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1231  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1232  break;
1233  case ISD::Constant: {
1234  unsigned ExtOpc =
1235  Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1236  return DAG.getNode(ExtOpc, DL, PVT, Op);
1237  }
1238  }
1239 
1240  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1241  return SDValue();
1242  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1243 }
1244 
1245 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1247  return SDValue();
1248  EVT OldVT = Op.getValueType();
1249  SDLoc DL(Op);
1250  bool Replace = false;
1251  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1252  if (!NewOp.getNode())
1253  return SDValue();
1254  AddToWorklist(NewOp.getNode());
1255 
1256  if (Replace)
1257  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1258  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1259  DAG.getValueType(OldVT));
1260 }
1261 
1262 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1263  EVT OldVT = Op.getValueType();
1264  SDLoc DL(Op);
1265  bool Replace = false;
1266  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1267  if (!NewOp.getNode())
1268  return SDValue();
1269  AddToWorklist(NewOp.getNode());
1270 
1271  if (Replace)
1272  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1273  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1274 }
1275 
1276 /// Promote the specified integer binary operation if the target indicates it is
1277 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1278 /// i32 since i16 instructions are longer.
1279 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1280  if (!LegalOperations)
1281  return SDValue();
1282 
1283  EVT VT = Op.getValueType();
1284  if (VT.isVector() || !VT.isInteger())
1285  return SDValue();
1286 
1287  // If operation type is 'undesirable', e.g. i16 on x86, consider
1288  // promoting it.
1289  unsigned Opc = Op.getOpcode();
1290  if (TLI.isTypeDesirableForOp(Opc, VT))
1291  return SDValue();
1292 
1293  EVT PVT = VT;
1294  // Consult target whether it is a good idea to promote this operation and
1295  // what's the right type to promote it to.
1296  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1297  assert(PVT != VT && "Don't know what type to promote to!");
1298 
1299  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1300 
1301  bool Replace0 = false;
1302  SDValue N0 = Op.getOperand(0);
1303  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1304 
1305  bool Replace1 = false;
1306  SDValue N1 = Op.getOperand(1);
1307  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1308  SDLoc DL(Op);
1309 
1310  SDValue RV =
1311  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1312 
1313  // We are always replacing N0/N1's use in N and only need additional
1314  // replacements if there are additional uses.
1315  // Note: We are checking uses of the *nodes* (SDNode) rather than values
1316  // (SDValue) here because the node may reference multiple values
1317  // (for example, the chain value of a load node).
1318  Replace0 &= !N0->hasOneUse();
1319  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1320 
1321  // Combine Op here so it is preserved past replacements.
1322  CombineTo(Op.getNode(), RV);
1323 
1324  // If operands have a use ordering, make sure we deal with
1325  // predecessor first.
1326  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1327  std::swap(N0, N1);
1328  std::swap(NN0, NN1);
1329  }
1330 
1331  if (Replace0) {
1332  AddToWorklist(NN0.getNode());
1333  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1334  }
1335  if (Replace1) {
1336  AddToWorklist(NN1.getNode());
1337  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1338  }
1339  return Op;
1340  }
1341  return SDValue();
1342 }
1343 
1344 /// Promote the specified integer shift operation if the target indicates it is
1345 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1346 /// i32 since i16 instructions are longer.
1347 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1348  if (!LegalOperations)
1349  return SDValue();
1350 
1351  EVT VT = Op.getValueType();
1352  if (VT.isVector() || !VT.isInteger())
1353  return SDValue();
1354 
1355  // If operation type is 'undesirable', e.g. i16 on x86, consider
1356  // promoting it.
1357  unsigned Opc = Op.getOpcode();
1358  if (TLI.isTypeDesirableForOp(Opc, VT))
1359  return SDValue();
1360 
1361  EVT PVT = VT;
1362  // Consult target whether it is a good idea to promote this operation and
1363  // what's the right type to promote it to.
1364  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1365  assert(PVT != VT && "Don't know what type to promote to!");
1366 
1367  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1368 
1369  bool Replace = false;
1370  SDValue N0 = Op.getOperand(0);
1371  SDValue N1 = Op.getOperand(1);
1372  if (Opc == ISD::SRA)
1373  N0 = SExtPromoteOperand(N0, PVT);
1374  else if (Opc == ISD::SRL)
1375  N0 = ZExtPromoteOperand(N0, PVT);
1376  else
1377  N0 = PromoteOperand(N0, PVT, Replace);
1378 
1379  if (!N0.getNode())
1380  return SDValue();
1381 
1382  SDLoc DL(Op);
1383  SDValue RV =
1384  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1385 
1386  if (Replace)
1387  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1388 
1389  // Deal with Op being deleted.
1390  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1391  return RV;
1392  }
1393  return SDValue();
1394 }
1395 
1396 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1397  if (!LegalOperations)
1398  return SDValue();
1399 
1400  EVT VT = Op.getValueType();
1401  if (VT.isVector() || !VT.isInteger())
1402  return SDValue();
1403 
1404  // If operation type is 'undesirable', e.g. i16 on x86, consider
1405  // promoting it.
1406  unsigned Opc = Op.getOpcode();
1407  if (TLI.isTypeDesirableForOp(Opc, VT))
1408  return SDValue();
1409 
1410  EVT PVT = VT;
1411  // Consult target whether it is a good idea to promote this operation and
1412  // what's the right type to promote it to.
1413  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1414  assert(PVT != VT && "Don't know what type to promote to!");
1415  // fold (aext (aext x)) -> (aext x)
1416  // fold (aext (zext x)) -> (zext x)
1417  // fold (aext (sext x)) -> (sext x)
1418  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1419  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1420  }
1421  return SDValue();
1422 }
1423 
1424 bool DAGCombiner::PromoteLoad(SDValue Op) {
1425  if (!LegalOperations)
1426  return false;
1427 
1428  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1429  return false;
1430 
1431  EVT VT = Op.getValueType();
1432  if (VT.isVector() || !VT.isInteger())
1433  return false;
1434 
1435  // If operation type is 'undesirable', e.g. i16 on x86, consider
1436  // promoting it.
1437  unsigned Opc = Op.getOpcode();
1438  if (TLI.isTypeDesirableForOp(Opc, VT))
1439  return false;
1440 
1441  EVT PVT = VT;
1442  // Consult target whether it is a good idea to promote this operation and
1443  // what's the right type to promote it to.
1444  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1445  assert(PVT != VT && "Don't know what type to promote to!");
1446 
1447  SDLoc DL(Op);
1448  SDNode *N = Op.getNode();
1449  LoadSDNode *LD = cast<LoadSDNode>(N);
1450  EVT MemVT = LD->getMemoryVT();
1452  : LD->getExtensionType();
1453  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1454  LD->getChain(), LD->getBasePtr(),
1455  MemVT, LD->getMemOperand());
1456  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1457 
1458  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1459  Result.getNode()->dump(&DAG); dbgs() << '\n');
1460  WorklistRemover DeadNodes(*this);
1461  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1462  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1463  deleteAndRecombine(N);
1464  AddToWorklist(Result.getNode());
1465  return true;
1466  }
1467  return false;
1468 }
1469 
1470 /// Recursively delete a node which has no uses and any operands for
1471 /// which it is the only use.
1472 ///
1473 /// Note that this both deletes the nodes and removes them from the worklist.
1474 /// It also adds any nodes who have had a user deleted to the worklist as they
1475 /// may now have only one use and subject to other combines.
1476 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1477  if (!N->use_empty())
1478  return false;
1479 
1481  Nodes.insert(N);
1482  do {
1483  N = Nodes.pop_back_val();
1484  if (!N)
1485  continue;
1486 
1487  if (N->use_empty()) {
1488  for (const SDValue &ChildN : N->op_values())
1489  Nodes.insert(ChildN.getNode());
1490 
1492  DAG.DeleteNode(N);
1493  } else {
1494  AddToWorklist(N);
1495  }
1496  } while (!Nodes.empty());
1497  return true;
1498 }
1499 
1500 //===----------------------------------------------------------------------===//
1501 // Main DAG Combiner implementation
1502 //===----------------------------------------------------------------------===//
1503 
1504 void DAGCombiner::Run(CombineLevel AtLevel) {
1505  // set the instance variables, so that the various visit routines may use it.
1506  Level = AtLevel;
1507  LegalDAG = Level >= AfterLegalizeDAG;
1508  LegalOperations = Level >= AfterLegalizeVectorOps;
1509  LegalTypes = Level >= AfterLegalizeTypes;
1510 
1511  WorklistInserter AddNodes(*this);
1512 
1513  // Add all the dag nodes to the worklist.
1514  for (SDNode &Node : DAG.allnodes())
1515  AddToWorklist(&Node);
1516 
1517  // Create a dummy node (which is not added to allnodes), that adds a reference
1518  // to the root node, preventing it from being deleted, and tracking any
1519  // changes of the root.
1520  HandleSDNode Dummy(DAG.getRoot());
1521 
1522  // While we have a valid worklist entry node, try to combine it.
1523  while (SDNode *N = getNextWorklistEntry()) {
1524  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1525  // N is deleted from the DAG, since they too may now be dead or may have a
1526  // reduced number of uses, allowing other xforms.
1527  if (recursivelyDeleteUnusedNodes(N))
1528  continue;
1529 
1530  WorklistRemover DeadNodes(*this);
1531 
1532  // If this combine is running after legalizing the DAG, re-legalize any
1533  // nodes pulled off the worklist.
1534  if (LegalDAG) {
1535  SmallSetVector<SDNode *, 16> UpdatedNodes;
1536  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1537 
1538  for (SDNode *LN : UpdatedNodes)
1539  AddToWorklistWithUsers(LN);
1540 
1541  if (!NIsValid)
1542  continue;
1543  }
1544 
1545  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1546 
1547  // Add any operands of the new node which have not yet been combined to the
1548  // worklist as well. Because the worklist uniques things already, this
1549  // won't repeatedly process the same operand.
1550  CombinedNodes.insert(N);
1551  for (const SDValue &ChildN : N->op_values())
1552  if (!CombinedNodes.count(ChildN.getNode()))
1553  AddToWorklist(ChildN.getNode());
1554 
1555  SDValue RV = combine(N);
1556 
1557  if (!RV.getNode())
1558  continue;
1559 
1560  ++NodesCombined;
1561 
1562  // If we get back the same node we passed in, rather than a new node or
1563  // zero, we know that the node must have defined multiple values and
1564  // CombineTo was used. Since CombineTo takes care of the worklist
1565  // mechanics for us, we have no work to do in this case.
1566  if (RV.getNode() == N)
1567  continue;
1568 
1569  assert(N->getOpcode() != ISD::DELETED_NODE &&
1570  RV.getOpcode() != ISD::DELETED_NODE &&
1571  "Node was deleted but visit returned new node!");
1572 
1573  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1574 
1575  if (N->getNumValues() == RV.getNode()->getNumValues())
1576  DAG.ReplaceAllUsesWith(N, RV.getNode());
1577  else {
1578  assert(N->getValueType(0) == RV.getValueType() &&
1579  N->getNumValues() == 1 && "Type mismatch");
1580  DAG.ReplaceAllUsesWith(N, &RV);
1581  }
1582 
1583  // Push the new node and any users onto the worklist. Omit this if the
1584  // new node is the EntryToken (e.g. if a store managed to get optimized
1585  // out), because re-visiting the EntryToken and its users will not uncover
1586  // any additional opportunities, but there may be a large number of such
1587  // users, potentially causing compile time explosion.
1588  if (RV.getOpcode() != ISD::EntryToken) {
1589  AddToWorklist(RV.getNode());
1590  AddUsersToWorklist(RV.getNode());
1591  }
1592 
1593  // Finally, if the node is now dead, remove it from the graph. The node
1594  // may not be dead if the replacement process recursively simplified to
1595  // something else needing this node. This will also take care of adding any
1596  // operands which have lost a user to the worklist.
1597  recursivelyDeleteUnusedNodes(N);
1598  }
1599 
1600  // If the root changed (e.g. it was a dead load, update the root).
1601  DAG.setRoot(Dummy.getValue());
1602  DAG.RemoveDeadNodes();
1603 }
1604 
1605 SDValue DAGCombiner::visit(SDNode *N) {
1606  switch (N->getOpcode()) {
1607  default: break;
1608  case ISD::TokenFactor: return visitTokenFactor(N);
1609  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1610  case ISD::ADD: return visitADD(N);
1611  case ISD::SUB: return visitSUB(N);
1612  case ISD::SADDSAT:
1613  case ISD::UADDSAT: return visitADDSAT(N);
1614  case ISD::SSUBSAT:
1615  case ISD::USUBSAT: return visitSUBSAT(N);
1616  case ISD::ADDC: return visitADDC(N);
1617  case ISD::SADDO:
1618  case ISD::UADDO: return visitADDO(N);
1619  case ISD::SUBC: return visitSUBC(N);
1620  case ISD::SSUBO:
1621  case ISD::USUBO: return visitSUBO(N);
1622  case ISD::ADDE: return visitADDE(N);
1623  case ISD::ADDCARRY: return visitADDCARRY(N);
1624  case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1625  case ISD::SUBE: return visitSUBE(N);
1626  case ISD::SUBCARRY: return visitSUBCARRY(N);
1627  case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1628  case ISD::SMULFIX:
1629  case ISD::SMULFIXSAT:
1630  case ISD::UMULFIX:
1631  case ISD::UMULFIXSAT: return visitMULFIX(N);
1632  case ISD::MUL: return visitMUL(N);
1633  case ISD::SDIV: return visitSDIV(N);
1634  case ISD::UDIV: return visitUDIV(N);
1635  case ISD::SREM:
1636  case ISD::UREM: return visitREM(N);
1637  case ISD::MULHU: return visitMULHU(N);
1638  case ISD::MULHS: return visitMULHS(N);
1639  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1640  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1641  case ISD::SMULO:
1642  case ISD::UMULO: return visitMULO(N);
1643  case ISD::SMIN:
1644  case ISD::SMAX:
1645  case ISD::UMIN:
1646  case ISD::UMAX: return visitIMINMAX(N);
1647  case ISD::AND: return visitAND(N);
1648  case ISD::OR: return visitOR(N);
1649  case ISD::XOR: return visitXOR(N);
1650  case ISD::SHL: return visitSHL(N);
1651  case ISD::SRA: return visitSRA(N);
1652  case ISD::SRL: return visitSRL(N);
1653  case ISD::ROTR:
1654  case ISD::ROTL: return visitRotate(N);
1655  case ISD::FSHL:
1656  case ISD::FSHR: return visitFunnelShift(N);
1657  case ISD::ABS: return visitABS(N);
1658  case ISD::BSWAP: return visitBSWAP(N);
1659  case ISD::BITREVERSE: return visitBITREVERSE(N);
1660  case ISD::CTLZ: return visitCTLZ(N);
1661  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1662  case ISD::CTTZ: return visitCTTZ(N);
1663  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1664  case ISD::CTPOP: return visitCTPOP(N);
1665  case ISD::SELECT: return visitSELECT(N);
1666  case ISD::VSELECT: return visitVSELECT(N);
1667  case ISD::SELECT_CC: return visitSELECT_CC(N);
1668  case ISD::SETCC: return visitSETCC(N);
1669  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1670  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1671  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1672  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1673  case ISD::AssertSext:
1674  case ISD::AssertZext: return visitAssertExt(N);
1675  case ISD::AssertAlign: return visitAssertAlign(N);
1676  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1678  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1679  case ISD::TRUNCATE: return visitTRUNCATE(N);
1680  case ISD::BITCAST: return visitBITCAST(N);
1681  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1682  case ISD::FADD: return visitFADD(N);
1683  case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1684  case ISD::FSUB: return visitFSUB(N);
1685  case ISD::FMUL: return visitFMUL(N);
1686  case ISD::FMA: return visitFMA(N);
1687  case ISD::FDIV: return visitFDIV(N);
1688  case ISD::FREM: return visitFREM(N);
1689  case ISD::FSQRT: return visitFSQRT(N);
1690  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1691  case ISD::FPOW: return visitFPOW(N);
1692  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1693  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1694  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1695  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1696  case ISD::FP_ROUND: return visitFP_ROUND(N);
1697  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1698  case ISD::FNEG: return visitFNEG(N);
1699  case ISD::FABS: return visitFABS(N);
1700  case ISD::FFLOOR: return visitFFLOOR(N);
1701  case ISD::FMINNUM:
1702  case ISD::FMAXNUM:
1703  case ISD::FMINIMUM:
1704  case ISD::FMAXIMUM: return visitFMinMax(N);
1705  case ISD::FCEIL: return visitFCEIL(N);
1706  case ISD::FTRUNC: return visitFTRUNC(N);
1707  case ISD::BRCOND: return visitBRCOND(N);
1708  case ISD::BR_CC: return visitBR_CC(N);
1709  case ISD::LOAD: return visitLOAD(N);
1710  case ISD::STORE: return visitSTORE(N);
1711  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1712  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1713  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1714  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1715  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1716  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1717  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1718  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1719  case ISD::MGATHER: return visitMGATHER(N);
1720  case ISD::MLOAD: return visitMLOAD(N);
1721  case ISD::MSCATTER: return visitMSCATTER(N);
1722  case ISD::MSTORE: return visitMSTORE(N);
1723  case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1724  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1725  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1726  case ISD::FREEZE: return visitFREEZE(N);
1727  case ISD::VECREDUCE_FADD:
1728  case ISD::VECREDUCE_FMUL:
1729  case ISD::VECREDUCE_ADD:
1730  case ISD::VECREDUCE_MUL:
1731  case ISD::VECREDUCE_AND:
1732  case ISD::VECREDUCE_OR:
1733  case ISD::VECREDUCE_XOR:
1734  case ISD::VECREDUCE_SMAX:
1735  case ISD::VECREDUCE_SMIN:
1736  case ISD::VECREDUCE_UMAX:
1737  case ISD::VECREDUCE_UMIN:
1738  case ISD::VECREDUCE_FMAX:
1739  case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1740 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1741 #include "llvm/IR/VPIntrinsics.def"
1742  return visitVPOp(N);
1743  }
1744  return SDValue();
1745 }
1746 
1748  SDValue RV;
1749  if (!DisableGenericCombines)
1750  RV = visit(N);
1751 
1752  // If nothing happened, try a target-specific DAG combine.
1753  if (!RV.getNode()) {
1754  assert(N->getOpcode() != ISD::DELETED_NODE &&
1755  "Node was deleted but visit returned NULL!");
1756 
1757  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1758  TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1759 
1760  // Expose the DAG combiner to the target combiner impls.
1762  DagCombineInfo(DAG, Level, false, this);
1763 
1764  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1765  }
1766  }
1767 
1768  // If nothing happened still, try promoting the operation.
1769  if (!RV.getNode()) {
1770  switch (N->getOpcode()) {
1771  default: break;
1772  case ISD::ADD:
1773  case ISD::SUB:
1774  case ISD::MUL:
1775  case ISD::AND:
1776  case ISD::OR:
1777  case ISD::XOR:
1778  RV = PromoteIntBinOp(SDValue(N, 0));
1779  break;
1780  case ISD::SHL:
1781  case ISD::SRA:
1782  case ISD::SRL:
1783  RV = PromoteIntShiftOp(SDValue(N, 0));
1784  break;
1785  case ISD::SIGN_EXTEND:
1786  case ISD::ZERO_EXTEND:
1787  case ISD::ANY_EXTEND:
1788  RV = PromoteExtend(SDValue(N, 0));
1789  break;
1790  case ISD::LOAD:
1791  if (PromoteLoad(SDValue(N, 0)))
1792  RV = SDValue(N, 0);
1793  break;
1794  }
1795  }
1796 
1797  // If N is a commutative binary node, try to eliminate it if the commuted
1798  // version is already present in the DAG.
1799  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1800  N->getNumValues() == 1) {
1801  SDValue N0 = N->getOperand(0);
1802  SDValue N1 = N->getOperand(1);
1803 
1804  // Constant operands are canonicalized to RHS.
1805  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1806  SDValue Ops[] = {N1, N0};
1807  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1808  N->getFlags());
1809  if (CSENode)
1810  return SDValue(CSENode, 0);
1811  }
1812  }
1813 
1814  return RV;
1815 }
1816 
1817 /// Given a node, return its input chain if it has one, otherwise return a null
1818 /// sd operand.
1820  if (unsigned NumOps = N->getNumOperands()) {
1821  if (N->getOperand(0).getValueType() == MVT::Other)
1822  return N->getOperand(0);
1823  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1824  return N->getOperand(NumOps-1);
1825  for (unsigned i = 1; i < NumOps-1; ++i)
1826  if (N->getOperand(i).getValueType() == MVT::Other)
1827  return N->getOperand(i);
1828  }
1829  return SDValue();
1830 }
1831 
1832 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1833  // If N has two operands, where one has an input chain equal to the other,
1834  // the 'other' chain is redundant.
1835  if (N->getNumOperands() == 2) {
1836  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1837  return N->getOperand(0);
1838  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1839  return N->getOperand(1);
1840  }
1841 
1842  // Don't simplify token factors if optnone.
1843  if (OptLevel == CodeGenOpt::None)
1844  return SDValue();
1845 
1846  // Don't simplify the token factor if the node itself has too many operands.
1847  if (N->getNumOperands() > TokenFactorInlineLimit)
1848  return SDValue();
1849 
1850  // If the sole user is a token factor, we should make sure we have a
1851  // chance to merge them together. This prevents TF chains from inhibiting
1852  // optimizations.
1853  if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1854  AddToWorklist(*(N->use_begin()));
1855 
1856  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1857  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1858  SmallPtrSet<SDNode*, 16> SeenOps;
1859  bool Changed = false; // If we should replace this token factor.
1860 
1861  // Start out with this token factor.
1862  TFs.push_back(N);
1863 
1864  // Iterate through token factors. The TFs grows when new token factors are
1865  // encountered.
1866  for (unsigned i = 0; i < TFs.size(); ++i) {
1867  // Limit number of nodes to inline, to avoid quadratic compile times.
1868  // We have to add the outstanding Token Factors to Ops, otherwise we might
1869  // drop Ops from the resulting Token Factors.
1870  if (Ops.size() > TokenFactorInlineLimit) {
1871  for (unsigned j = i; j < TFs.size(); j++)
1872  Ops.emplace_back(TFs[j], 0);
1873  // Drop unprocessed Token Factors from TFs, so we do not add them to the
1874  // combiner worklist later.
1875  TFs.resize(i);
1876  break;
1877  }
1878 
1879  SDNode *TF = TFs[i];
1880  // Check each of the operands.
1881  for (const SDValue &Op : TF->op_values()) {
1882  switch (Op.getOpcode()) {
1883  case ISD::EntryToken:
1884  // Entry tokens don't need to be added to the list. They are
1885  // redundant.
1886  Changed = true;
1887  break;
1888 
1889  case ISD::TokenFactor:
1890  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1891  // Queue up for processing.
1892  TFs.push_back(Op.getNode());
1893  Changed = true;
1894  break;
1895  }
1897 
1898  default:
1899  // Only add if it isn't already in the list.
1900  if (SeenOps.insert(Op.getNode()).second)
1901  Ops.push_back(Op);
1902  else
1903  Changed = true;
1904  break;
1905  }
1906  }
1907  }
1908 
1909  // Re-visit inlined Token Factors, to clean them up in case they have been
1910  // removed. Skip the first Token Factor, as this is the current node.
1911  for (unsigned i = 1, e = TFs.size(); i < e; i++)
1912  AddToWorklist(TFs[i]);
1913 
1914  // Remove Nodes that are chained to another node in the list. Do so
1915  // by walking up chains breath-first stopping when we've seen
1916  // another operand. In general we must climb to the EntryNode, but we can exit
1917  // early if we find all remaining work is associated with just one operand as
1918  // no further pruning is possible.
1919 
1920  // List of nodes to search through and original Ops from which they originate.
1922  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1923  SmallPtrSet<SDNode *, 16> SeenChains;
1924  bool DidPruneOps = false;
1925 
1926  unsigned NumLeftToConsider = 0;
1927  for (const SDValue &Op : Ops) {
1928  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1929  OpWorkCount.push_back(1);
1930  }
1931 
1932  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1933  // If this is an Op, we can remove the op from the list. Remark any
1934  // search associated with it as from the current OpNumber.
1935  if (SeenOps.contains(Op)) {
1936  Changed = true;
1937  DidPruneOps = true;
1938  unsigned OrigOpNumber = 0;
1939  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1940  OrigOpNumber++;
1941  assert((OrigOpNumber != Ops.size()) &&
1942  "expected to find TokenFactor Operand");
1943  // Re-mark worklist from OrigOpNumber to OpNumber
1944  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1945  if (Worklist[i].second == OrigOpNumber) {
1946  Worklist[i].second = OpNumber;
1947  }
1948  }
1949  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1950  OpWorkCount[OrigOpNumber] = 0;
1951  NumLeftToConsider--;
1952  }
1953  // Add if it's a new chain
1954  if (SeenChains.insert(Op).second) {
1955  OpWorkCount[OpNumber]++;
1956  Worklist.push_back(std::make_pair(Op, OpNumber));
1957  }
1958  };
1959 
1960  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1961  // We need at least be consider at least 2 Ops to prune.
1962  if (NumLeftToConsider <= 1)
1963  break;
1964  auto CurNode = Worklist[i].first;
1965  auto CurOpNumber = Worklist[i].second;
1966  assert((OpWorkCount[CurOpNumber] > 0) &&
1967  "Node should not appear in worklist");
1968  switch (CurNode->getOpcode()) {
1969  case ISD::EntryToken:
1970  // Hitting EntryToken is the only way for the search to terminate without
1971  // hitting
1972  // another operand's search. Prevent us from marking this operand
1973  // considered.
1974  NumLeftToConsider++;
1975  break;
1976  case ISD::TokenFactor:
1977  for (const SDValue &Op : CurNode->op_values())
1978  AddToWorklist(i, Op.getNode(), CurOpNumber);
1979  break;
1980  case ISD::LIFETIME_START:
1981  case ISD::LIFETIME_END:
1982  case ISD::CopyFromReg:
1983  case ISD::CopyToReg:
1984  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1985  break;
1986  default:
1987  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1988  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1989  break;
1990  }
1991  OpWorkCount[CurOpNumber]--;
1992  if (OpWorkCount[CurOpNumber] == 0)
1993  NumLeftToConsider--;
1994  }
1995 
1996  // If we've changed things around then replace token factor.
1997  if (Changed) {
1998  SDValue Result;
1999  if (Ops.empty()) {
2000  // The entry token is the only possible outcome.
2001  Result = DAG.getEntryNode();
2002  } else {
2003  if (DidPruneOps) {
2004  SmallVector<SDValue, 8> PrunedOps;
2005  //
2006  for (const SDValue &Op : Ops) {
2007  if (SeenChains.count(Op.getNode()) == 0)
2008  PrunedOps.push_back(Op);
2009  }
2010  Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2011  } else {
2012  Result = DAG.getTokenFactor(SDLoc(N), Ops);
2013  }
2014  }
2015  return Result;
2016  }
2017  return SDValue();
2018 }
2019 
2020 /// MERGE_VALUES can always be eliminated.
2021 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2022  WorklistRemover DeadNodes(*this);
2023  // Replacing results may cause a different MERGE_VALUES to suddenly
2024  // be CSE'd with N, and carry its uses with it. Iterate until no
2025  // uses remain, to ensure that the node can be safely deleted.
2026  // First add the users of this node to the work list so that they
2027  // can be tried again once they have new operands.
2028  AddUsersToWorklist(N);
2029  do {
2030  // Do as a single replacement to avoid rewalking use lists.
2032  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2033  Ops.push_back(N->getOperand(i));
2034  DAG.ReplaceAllUsesWith(N, Ops.data());
2035  } while (!N->use_empty());
2036  deleteAndRecombine(N);
2037  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2038 }
2039 
2040 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2041 /// ConstantSDNode pointer else nullptr.
2043  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2044  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2045 }
2046 
2047 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2048 /// and that N may be folded in the load / store addressing mode.
2050  const TargetLowering &TLI) {
2051  EVT VT;
2052  unsigned AS;
2053 
2054  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2055  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2056  return false;
2057  VT = LD->getMemoryVT();
2058  AS = LD->getAddressSpace();
2059  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2060  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2061  return false;
2062  VT = ST->getMemoryVT();
2063  AS = ST->getAddressSpace();
2064  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2065  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2066  return false;
2067  VT = LD->getMemoryVT();
2068  AS = LD->getAddressSpace();
2069  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2070  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2071  return false;
2072  VT = ST->getMemoryVT();
2073  AS = ST->getAddressSpace();
2074  } else
2075  return false;
2076 
2078  if (N->getOpcode() == ISD::ADD) {
2079  AM.HasBaseReg = true;
2080  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2081  if (Offset)
2082  // [reg +/- imm]
2083  AM.BaseOffs = Offset->getSExtValue();
2084  else
2085  // [reg +/- reg]
2086  AM.Scale = 1;
2087  } else if (N->getOpcode() == ISD::SUB) {
2088  AM.HasBaseReg = true;
2089  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2090  if (Offset)
2091  // [reg +/- imm]
2092  AM.BaseOffs = -Offset->getSExtValue();
2093  else
2094  // [reg +/- reg]
2095  AM.Scale = 1;
2096  } else
2097  return false;
2098 
2099  return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2100  VT.getTypeForEVT(*DAG.getContext()), AS);
2101 }
2102 
2103 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2104  assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2105  "Unexpected binary operator");
2106 
2107  // Don't do this unless the old select is going away. We want to eliminate the
2108  // binary operator, not replace a binop with a select.
2109  // TODO: Handle ISD::SELECT_CC.
2110  unsigned SelOpNo = 0;
2111  SDValue Sel = BO->getOperand(0);
2112  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2113  SelOpNo = 1;
2114  Sel = BO->getOperand(1);
2115  }
2116 
2117  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2118  return SDValue();
2119 
2120  SDValue CT = Sel.getOperand(1);
2121  if (!isConstantOrConstantVector(CT, true) &&
2123  return SDValue();
2124 
2125  SDValue CF = Sel.getOperand(2);
2126  if (!isConstantOrConstantVector(CF, true) &&
2128  return SDValue();
2129 
2130  // Bail out if any constants are opaque because we can't constant fold those.
2131  // The exception is "and" and "or" with either 0 or -1 in which case we can
2132  // propagate non constant operands into select. I.e.:
2133  // and (select Cond, 0, -1), X --> select Cond, 0, X
2134  // or X, (select Cond, -1, 0) --> select Cond, -1, X
2135  auto BinOpcode = BO->getOpcode();
2136  bool CanFoldNonConst =
2137  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2140 
2141  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2142  if (!CanFoldNonConst &&
2143  !isConstantOrConstantVector(CBO, true) &&
2145  return SDValue();
2146 
2147  EVT VT = BO->getValueType(0);
2148 
2149  // We have a select-of-constants followed by a binary operator with a
2150  // constant. Eliminate the binop by pulling the constant math into the select.
2151  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2152  SDLoc DL(Sel);
2153  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2154  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2155  if (!CanFoldNonConst && !NewCT.isUndef() &&
2156  !isConstantOrConstantVector(NewCT, true) &&
2158  return SDValue();
2159 
2160  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2161  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2162  if (!CanFoldNonConst && !NewCF.isUndef() &&
2163  !isConstantOrConstantVector(NewCF, true) &&
2165  return SDValue();
2166 
2167  SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2168  SelectOp->setFlags(BO->getFlags());
2169  return SelectOp;
2170 }
2171 
2173  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2174  "Expecting add or sub");
2175 
2176  // Match a constant operand and a zext operand for the math instruction:
2177  // add Z, C
2178  // sub C, Z
2179  bool IsAdd = N->getOpcode() == ISD::ADD;
2180  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2181  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2182  auto *CN = dyn_cast<ConstantSDNode>(C);
2183  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2184  return SDValue();
2185 
2186  // Match the zext operand as a setcc of a boolean.
2187  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2188  Z.getOperand(0).getValueType() != MVT::i1)
2189  return SDValue();
2190 
2191  // Match the compare as: setcc (X & 1), 0, eq.
2192  SDValue SetCC = Z.getOperand(0);
2193  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2194  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2195  SetCC.getOperand(0).getOpcode() != ISD::AND ||
2196  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2197  return SDValue();
2198 
2199  // We are adding/subtracting a constant and an inverted low bit. Turn that
2200  // into a subtract/add of the low bit with incremented/decremented constant:
2201  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2202  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2203  EVT VT = C.getValueType();
2204  SDLoc DL(N);
2205  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2206  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2207  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2208  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2209 }
2210 
2211 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2212 /// a shift and add with a different constant.
2214  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2215  "Expecting add or sub");
2216 
2217  // We need a constant operand for the add/sub, and the other operand is a
2218  // logical shift right: add (srl), C or sub C, (srl).
2219  bool IsAdd = N->getOpcode() == ISD::ADD;
2220  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2221  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2222  if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2223  ShiftOp.getOpcode() != ISD::SRL)
2224  return SDValue();
2225 
2226  // The shift must be of a 'not' value.
2227  SDValue Not = ShiftOp.getOperand(0);
2228  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2229  return SDValue();
2230 
2231  // The shift must be moving the sign bit to the least-significant-bit.
2232  EVT VT = ShiftOp.getValueType();
2233  SDValue ShAmt = ShiftOp.getOperand(1);
2234  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2235  if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2236  return SDValue();
2237 
2238  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2239  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2240  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2241  SDLoc DL(N);
2242  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2243  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2244  if (SDValue NewC =
2245  DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2246  {ConstantOp, DAG.getConstant(1, DL, VT)}))
2247  return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2248  return SDValue();
2249 }
2250 
2251 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2252 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2253 /// are no common bits set in the operands).
2254 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2255  SDValue N0 = N->getOperand(0);
2256  SDValue N1 = N->getOperand(1);
2257  EVT VT = N0.getValueType();
2258  SDLoc DL(N);
2259 
2260  // fold (add x, undef) -> undef
2261  if (N0.isUndef())
2262  return N0;
2263  if (N1.isUndef())
2264  return N1;
2265 
2266  // fold (add c1, c2) -> c1+c2
2267  if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2268  return C;
2269 
2270  // canonicalize constant to RHS
2273  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2274 
2275  // fold vector ops
2276  if (VT.isVector()) {
2277  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2278  return FoldedVOp;
2279 
2280  // fold (add x, 0) -> x, vector edition
2282  return N0;
2283  }
2284 
2285  // fold (add x, 0) -> x
2286  if (isNullConstant(N1))
2287  return N0;
2288 
2289  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2290  // fold ((A-c1)+c2) -> (A+(c2-c1))
2291  if (N0.getOpcode() == ISD::SUB &&
2292  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2293  SDValue Sub =
2294  DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2295  assert(Sub && "Constant folding failed");
2296  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2297  }
2298 
2299  // fold ((c1-A)+c2) -> (c1+c2)-A
2300  if (N0.getOpcode() == ISD::SUB &&
2301  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2302  SDValue Add =
2303  DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2304  assert(Add && "Constant folding failed");
2305  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2306  }
2307 
2308  // add (sext i1 X), 1 -> zext (not i1 X)
2309  // We don't transform this pattern:
2310  // add (zext i1 X), -1 -> sext (not i1 X)
2311  // because most (?) targets generate better code for the zext form.
2312  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2313  isOneOrOneSplat(N1)) {
2314  SDValue X = N0.getOperand(0);
2315  if ((!LegalOperations ||
2316  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2317  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2318  X.getScalarValueSizeInBits() == 1) {
2319  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2320  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2321  }
2322  }
2323 
2324  // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2325  // equivalent to (add x, c0).
2326  if (N0.getOpcode() == ISD::OR &&
2327  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2328  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2329  if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2330  {N1, N0.getOperand(1)}))
2331  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2332  }
2333  }
2334 
2335  if (SDValue NewSel = foldBinOpIntoSelect(N))
2336  return NewSel;
2337 
2338  // reassociate add
2339  if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2340  if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2341  return RADD;
2342 
2343  // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2344  // equivalent to (add x, c).
2345  auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2346  if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
2347  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2348  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2349  return DAG.getNode(ISD::ADD, DL, VT,
2350  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2351  N0.getOperand(1));
2352  }
2353  return SDValue();
2354  };
2355  if (SDValue Add = ReassociateAddOr(N0, N1))
2356  return Add;
2357  if (SDValue Add = ReassociateAddOr(N1, N0))
2358  return Add;
2359  }
2360  // fold ((0-A) + B) -> B-A
2361  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2362  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2363 
2364  // fold (A + (0-B)) -> A-B
2365  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2366  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2367 
2368  // fold (A+(B-A)) -> B
2369  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2370  return N1.getOperand(0);
2371 
2372  // fold ((B-A)+A) -> B
2373  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2374  return N0.getOperand(0);
2375 
2376  // fold ((A-B)+(C-A)) -> (C-B)
2377  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2378  N0.getOperand(0) == N1.getOperand(1))
2379  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2380  N0.getOperand(1));
2381 
2382  // fold ((A-B)+(B-C)) -> (A-C)
2383  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2384  N0.getOperand(1) == N1.getOperand(0))
2385  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2386  N1.getOperand(1));
2387 
2388  // fold (A+(B-(A+C))) to (B-C)
2389  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2390  N0 == N1.getOperand(1).getOperand(0))
2391  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2392  N1.getOperand(1).getOperand(1));
2393 
2394  // fold (A+(B-(C+A))) to (B-C)
2395  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2396  N0 == N1.getOperand(1).getOperand(1))
2397  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2398  N1.getOperand(1).getOperand(0));
2399 
2400  // fold (A+((B-A)+or-C)) to (B+or-C)
2401  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2402  N1.getOperand(0).getOpcode() == ISD::SUB &&
2403  N0 == N1.getOperand(0).getOperand(1))
2404  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2405  N1.getOperand(1));
2406 
2407  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2408  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2409  SDValue N00 = N0.getOperand(0);
2410  SDValue N01 = N0.getOperand(1);
2411  SDValue N10 = N1.getOperand(0);
2412  SDValue N11 = N1.getOperand(1);
2413 
2415  return DAG.getNode(ISD::SUB, DL, VT,
2416  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2417  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2418  }
2419 
2420  // fold (add (umax X, C), -C) --> (usubsat X, C)
2421  if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2422  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2423  return (!Max && !Op) ||
2424  (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2425  };
2426  if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2427  /*AllowUndefs*/ true))
2428  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2429  N0.getOperand(1));
2430  }
2431 
2432  if (SimplifyDemandedBits(SDValue(N, 0)))
2433  return SDValue(N, 0);
2434 
2435  if (isOneOrOneSplat(N1)) {
2436  // fold (add (xor a, -1), 1) -> (sub 0, a)
2437  if (isBitwiseNot(N0))
2438  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2439  N0.getOperand(0));
2440 
2441  // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2442  if (N0.getOpcode() == ISD::ADD) {
2443  SDValue A, Xor;
2444 
2445  if (isBitwiseNot(N0.getOperand(0))) {
2446  A = N0.getOperand(1);
2447  Xor = N0.getOperand(0);
2448  } else if (isBitwiseNot(N0.getOperand(1))) {
2449  A = N0.getOperand(0);
2450  Xor = N0.getOperand(1);
2451  }
2452 
2453  if (Xor)
2454  return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2455  }
2456 
2457  // Look for:
2458  // add (add x, y), 1
2459  // And if the target does not like this form then turn into:
2460  // sub y, (xor x, -1)
2461  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2462  N0.getOpcode() == ISD::ADD) {
2463  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2464  DAG.getAllOnesConstant(DL, VT));
2465  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2466  }
2467  }
2468 
2469  // (x - y) + -1 -> add (xor y, -1), x
2470  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2472  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2473  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2474  }
2475 
2476  if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2477  return Combined;
2478 
2479  if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2480  return Combined;
2481 
2482  return SDValue();
2483 }
2484 
2485 SDValue DAGCombiner::visitADD(SDNode *N) {
2486  SDValue N0 = N->getOperand(0);
2487  SDValue N1 = N->getOperand(1);
2488  EVT VT = N0.getValueType();
2489  SDLoc DL(N);
2490 
2491  if (SDValue Combined = visitADDLike(N))
2492  return Combined;
2493 
2494  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2495  return V;
2496 
2497  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2498  return V;
2499 
2500  // fold (a+b) -> (a|b) iff a and b share no bits.
2501  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2502  DAG.haveNoCommonBitsSet(N0, N1))
2503  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2504 
2505  // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2506  if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2507  const APInt &C0 = N0->getConstantOperandAPInt(0);
2508  const APInt &C1 = N1->getConstantOperandAPInt(0);
2509  return DAG.getVScale(DL, VT, C0 + C1);
2510  }
2511 
2512  // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2513  if ((N0.getOpcode() == ISD::ADD) &&
2514  (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2515  (N1.getOpcode() == ISD::VSCALE)) {
2516  const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2517  const APInt &VS1 = N1->getConstantOperandAPInt(0);
2518  SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2519  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2520  }
2521 
2522  // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2523  if (N0.getOpcode() == ISD::STEP_VECTOR &&
2524  N1.getOpcode() == ISD::STEP_VECTOR) {
2525  const APInt &C0 = N0->getConstantOperandAPInt(0);
2526  const APInt &C1 = N1->getConstantOperandAPInt(0);
2527  APInt NewStep = C0 + C1;
2528  return DAG.getStepVector(DL, VT, NewStep);
2529  }
2530 
2531  // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2532  if ((N0.getOpcode() == ISD::ADD) &&
2533  (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2534  (N1.getOpcode() == ISD::STEP_VECTOR)) {
2535  const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2536  const APInt &SV1 = N1->getConstantOperandAPInt(0);
2537  APInt NewStep = SV0 + SV1;
2538  SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2539  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2540  }
2541 
2542  return SDValue();
2543 }
2544 
2545 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2546  unsigned Opcode = N->getOpcode();
2547  SDValue N0 = N->getOperand(0);
2548  SDValue N1 = N->getOperand(1);
2549  EVT VT = N0.getValueType();
2550  SDLoc DL(N);
2551 
2552  // fold (add_sat x, undef) -> -1
2553  if (N0.isUndef() || N1.isUndef())
2554  return DAG.getAllOnesConstant(DL, VT);
2555 
2556  // fold (add_sat c1, c2) -> c3
2557  if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2558  return C;
2559 
2560  // canonicalize constant to RHS
2563  return DAG.getNode(Opcode, DL, VT, N1, N0);
2564 
2565  // fold vector ops
2566  if (VT.isVector()) {
2567  // TODO SimplifyVBinOp
2568 
2569  // fold (add_sat x, 0) -> x, vector edition
2571  return N0;
2572  }
2573 
2574  // fold (add_sat x, 0) -> x
2575  if (isNullConstant(N1))
2576  return N0;
2577 
2578  // If it cannot overflow, transform into an add.
2579  if (Opcode == ISD::UADDSAT)
2580  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2581  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2582 
2583  return SDValue();
2584 }
2585 
2586 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2587  bool Masked = false;
2588 
2589  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2590  while (true) {
2591  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2592  V = V.getOperand(0);
2593  continue;
2594  }
2595 
2596  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2597  Masked = true;
2598  V = V.getOperand(0);
2599  continue;
2600  }
2601 
2602  break;
2603  }
2604 
2605  // If this is not a carry, return.
2606  if (V.getResNo() != 1)
2607  return SDValue();
2608 
2609  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2610  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2611  return SDValue();
2612 
2613  EVT VT = V.getNode()->getValueType(0);
2614  if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2615  return SDValue();
2616 
2617  // If the result is masked, then no matter what kind of bool it is we can
2618  // return. If it isn't, then we need to make sure the bool type is either 0 or
2619  // 1 and not other values.
2620  if (Masked ||
2621  TLI.getBooleanContents(V.getValueType()) ==
2623  return V;
2624 
2625  return SDValue();
2626 }
2627 
2628 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2629 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2630 /// the opcode and bypass the mask operation.
2631 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2632  SelectionDAG &DAG, const SDLoc &DL) {
2633  if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2634  return SDValue();
2635 
2636  EVT VT = N0.getValueType();
2637  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2638  return SDValue();
2639 
2640  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2641  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2642  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2643 }
2644 
2645 /// Helper for doing combines based on N0 and N1 being added to each other.
2646 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2647  SDNode *LocReference) {
2648  EVT VT = N0.getValueType();
2649  SDLoc DL(LocReference);
2650 
2651  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2652  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2654  return DAG.getNode(ISD::SUB, DL, VT, N0,
2655  DAG.getNode(ISD::SHL, DL, VT,
2656  N1.getOperand(0).getOperand(1),
2657  N1.getOperand(1)));
2658 
2659  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2660  return V;
2661 
2662  // Look for:
2663  // add (add x, 1), y
2664  // And if the target does not like this form then turn into:
2665  // sub y, (xor x, -1)
2666  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2667  N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2668  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2669  DAG.getAllOnesConstant(DL, VT));
2670  return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2671  }
2672 
2673  // Hoist one-use subtraction by non-opaque constant:
2674  // (x - C) + y -> (x + y) - C
2675  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2676  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2677  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2678  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2679  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2680  }
2681  // Hoist one-use subtraction from non-opaque constant:
2682  // (C - x) + y -> (y - x) + C
2683  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2684  isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2685  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2686  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2687  }
2688 
2689  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2690  // rather than 'add 0/-1' (the zext should get folded).
2691  // add (sext i1 Y), X --> sub X, (zext i1 Y)
2692  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2693  N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2695  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2696  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2697  }
2698 
2699  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2700  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2701  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2702  if (TN->getVT() == MVT::i1) {
2703  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2704  DAG.getConstant(1, DL, VT));
2705  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2706  }
2707  }
2708 
2709  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2710  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2711  N1.getResNo() == 0)
2712  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2713  N0, N1.getOperand(0), N1.getOperand(2));
2714 
2715  // (add X, Carry) -> (addcarry X, 0, Carry)
2717  if (SDValue Carry = getAsCarry(TLI, N1))
2718  return DAG.getNode(ISD::ADDCARRY, DL,
2719  DAG.getVTList(VT, Carry.getValueType()), N0,
2720  DAG.getConstant(0, DL, VT), Carry);
2721 
2722  return SDValue();
2723 }
2724 
2725 SDValue DAGCombiner::visitADDC(SDNode *N) {
2726  SDValue N0 = N->getOperand(0);
2727  SDValue N1 = N->getOperand(1);
2728  EVT VT = N0.getValueType();
2729  SDLoc DL(N);
2730 
2731  // If the flag result is dead, turn this into an ADD.
2732  if (!N->hasAnyUseOfValue(1))
2733  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2735 
2736  // canonicalize constant to RHS.
2737  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2738  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2739  if (N0C && !N1C)
2740  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2741 
2742  // fold (addc x, 0) -> x + no carry out
2743  if (isNullConstant(N1))
2744  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2745  DL, MVT::Glue));
2746 
2747  // If it cannot overflow, transform into an add.
2748  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2749  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2751 
2752  return SDValue();
2753 }
2754 
2755 /**
2756  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2757  * then the flip also occurs if computing the inverse is the same cost.
2758  * This function returns an empty SDValue in case it cannot flip the boolean
2759  * without increasing the cost of the computation. If you want to flip a boolean
2760  * no matter what, use DAG.getLogicalNOT.
2761  */
2763  const TargetLowering &TLI,
2764  bool Force) {
2765  if (Force && isa<ConstantSDNode>(V))
2766  return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2767 
2768  if (V.getOpcode() != ISD::XOR)
2769  return SDValue();
2770 
2771  ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2772  if (!Const)
2773  return SDValue();
2774 
2775  EVT VT = V.getValueType();
2776 
2777  bool IsFlip = false;
2778  switch(TLI.getBooleanContents(VT)) {
2780  IsFlip = Const->isOne();
2781  break;
2783  IsFlip = Const->isAllOnes();
2784  break;
2786  IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2787  break;
2788  }
2789 
2790  if (IsFlip)
2791  return V.getOperand(0);
2792  if (Force)
2793  return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2794  return SDValue();
2795 }
2796 
2797 SDValue DAGCombiner::visitADDO(SDNode *N) {
2798  SDValue N0 = N->getOperand(0);
2799  SDValue N1 = N->getOperand(1);
2800  EVT VT = N0.getValueType();
2801  bool IsSigned = (ISD::SADDO == N->getOpcode());
2802 
2803  EVT CarryVT = N->getValueType(1);
2804  SDLoc DL(N);
2805 
2806  // If the flag result is dead, turn this into an ADD.
2807  if (!N->hasAnyUseOfValue(1))
2808  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2809  DAG.getUNDEF(CarryVT));
2810 
2811  // canonicalize constant to RHS.
2814  return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2815 
2816  // fold (addo x, 0) -> x + no carry out
2817  if (isNullOrNullSplat(N1))
2818  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2819 
2820  if (!IsSigned) {
2821  // If it cannot overflow, transform into an add.
2822  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2823  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2824  DAG.getConstant(0, DL, CarryVT));
2825 
2826  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2827  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2828  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2829  DAG.getConstant(0, DL, VT), N0.getOperand(0));
2830  return CombineTo(
2831  N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2832  }
2833 
2834  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2835  return Combined;
2836 
2837  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2838  return Combined;
2839  }
2840 
2841  return SDValue();
2842 }
2843 
2844 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2845  EVT VT = N0.getValueType();
2846  if (VT.isVector())
2847  return SDValue();
2848 
2849  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2850  // If Y + 1 cannot overflow.
2851  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2852  SDValue Y = N1.getOperand(0);
2853  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2855  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2856  N1.getOperand(2));
2857  }
2858 
2859  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2861  if (SDValue Carry = getAsCarry(TLI, N1))
2862  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2863  DAG.getConstant(0, SDLoc(N), VT), Carry);
2864 
2865  return SDValue();
2866 }
2867 
2868 SDValue DAGCombiner::visitADDE(SDNode *N) {
2869  SDValue N0 = N->getOperand(0);
2870  SDValue N1 = N->getOperand(1);
2871  SDValue CarryIn = N->getOperand(2);
2872 
2873  // canonicalize constant to RHS
2874  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2875  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2876  if (N0C && !N1C)
2877  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2878  N1, N0, CarryIn);
2879 
2880  // fold (adde x, y, false) -> (addc x, y)
2881  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2882  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2883 
2884  return SDValue();
2885 }
2886 
2887 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2888  SDValue N0 = N->getOperand(0);
2889  SDValue N1 = N->getOperand(1);
2890  SDValue CarryIn = N->getOperand(2);
2891  SDLoc DL(N);
2892 
2893  // canonicalize constant to RHS
2894  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2895  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2896  if (N0C && !N1C)
2897  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2898 
2899  // fold (addcarry x, y, false) -> (uaddo x, y)
2900  if (isNullConstant(CarryIn)) {
2901  if (!LegalOperations ||
2902  TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2903  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2904  }
2905 
2906  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2907  if (isNullConstant(N0) && isNullConstant(N1)) {
2908  EVT VT = N0.getValueType();
2909  EVT CarryVT = CarryIn.getValueType();
2910  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2911  AddToWorklist(CarryExt.getNode());
2912  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2913  DAG.getConstant(1, DL, VT)),
2914  DAG.getConstant(0, DL, CarryVT));
2915  }
2916 
2917  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2918  return Combined;
2919 
2920  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2921  return Combined;
2922 
2923  return SDValue();
2924 }
2925 
2926 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2927  SDValue N0 = N->getOperand(0);
2928  SDValue N1 = N->getOperand(1);
2929  SDValue CarryIn = N->getOperand(2);
2930  SDLoc DL(N);
2931 
2932  // canonicalize constant to RHS
2933  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2934  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2935  if (N0C && !N1C)
2936  return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2937 
2938  // fold (saddo_carry x, y, false) -> (saddo x, y)
2939  if (isNullConstant(CarryIn)) {
2940  if (!LegalOperations ||
2941  TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2942  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2943  }
2944 
2945  return SDValue();
2946 }
2947 
2948 /**
2949  * If we are facing some sort of diamond carry propapagtion pattern try to
2950  * break it up to generate something like:
2951  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2952  *
2953  * The end result is usually an increase in operation required, but because the
2954  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2955  *
2956  * Patterns typically look something like
2957  * (uaddo A, B)
2958  * / \
2959  * Carry Sum
2960  * | \
2961  * | (addcarry *, 0, Z)
2962  * | /
2963  * \ Carry
2964  * | /
2965  * (addcarry X, *, *)
2966  *
2967  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2968  * produce a combine with a single path for carry propagation.
2969  */
2971  SDValue X, SDValue Carry0, SDValue Carry1,
2972  SDNode *N) {
2973  if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2974  return SDValue();
2975  if (Carry1.getOpcode() != ISD::UADDO)
2976  return SDValue();
2977 
2978  SDValue Z;
2979 
2980  /**
2981  * First look for a suitable Z. It will present itself in the form of
2982  * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2983  */
2984  if (Carry0.getOpcode() == ISD::ADDCARRY &&
2985  isNullConstant(Carry0.getOperand(1))) {
2986  Z = Carry0.getOperand(2);
2987  } else if (Carry0.getOpcode() == ISD::UADDO &&
2988  isOneConstant(Carry0.getOperand(1))) {
2989  EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2990  Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2991  } else {
2992  // We couldn't find a suitable Z.
2993  return SDValue();
2994  }
2995 
2996 
2997  auto cancelDiamond = [&](SDValue A,SDValue B) {
2998  SDLoc DL(N);
2999  SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3000  Combiner.AddToWorklist(NewY.getNode());
3001  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3002  DAG.getConstant(0, DL, X.getValueType()),
3003  NewY.getValue(1));
3004  };
3005 
3006  /**
3007  * (uaddo A, B)
3008  * |
3009  * Sum
3010  * |
3011  * (addcarry *, 0, Z)
3012  */
3013  if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3014  return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3015  }
3016 
3017  /**
3018  * (addcarry A, 0, Z)
3019  * |
3020  * Sum
3021  * |
3022  * (uaddo *, B)
3023  */
3024  if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3025  return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3026  }
3027 
3028  if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3029  return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3030  }
3031 
3032  return SDValue();
3033 }
3034 
3035 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3036 // match patterns like:
3037 //
3038 // (uaddo A, B) CarryIn
3039 // | \ |
3040 // | \ |
3041 // PartialSum PartialCarryOutX /
3042 // | | /
3043 // | ____|____________/
3044 // | / |
3045 // (uaddo *, *) \________
3046 // | \ \
3047 // | \ |
3048 // | PartialCarryOutY |
3049 // | \ |
3050 // | \ /
3051 // AddCarrySum | ______/
3052 // | /
3053 // CarryOut = (or *, *)
3054 //
3055 // And generate ADDCARRY (or SUBCARRY) with two result values:
3056 //
3057 // {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3058 //
3059 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3060 // a single path for carry/borrow out propagation:
3062  const TargetLowering &TLI, SDValue Carry0,
3063  SDValue Carry1, SDNode *N) {
3064  if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3065  return SDValue();
3066  unsigned Opcode = Carry0.getOpcode();
3067  if (Opcode != Carry1.getOpcode())
3068  return SDValue();
3069  if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3070  return SDValue();
3071 
3072  // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3073  // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3074  // the above ASCII art.)
3075  if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3076  Carry1.getOperand(1) != Carry0.getValue(0))
3077  std::swap(Carry0, Carry1);
3078  if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3079  Carry1.getOperand(1) != Carry0.getValue(0))
3080  return SDValue();
3081 
3082  // The carry in value must be on the righthand side for subtraction.
3083  unsigned CarryInOperandNum =
3084  Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3085  if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3086  return SDValue();
3087  SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3088 
3089  unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3090  if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3091  return SDValue();
3092 
3093  // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3094  // TODO: make getAsCarry() aware of how partial carries are merged.
3095  if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3096  return SDValue();
3097  CarryIn = CarryIn.getOperand(0);
3098  if (CarryIn.getValueType() != MVT::i1)
3099  return SDValue();
3100 
3101  SDLoc DL(N);
3102  SDValue Merged =
3103  DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3104  Carry0.getOperand(1), CarryIn);
3105 
3106  // Please note that because we have proven that the result of the UADDO/USUBO
3107  // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3108  // therefore prove that if the first UADDO/USUBO overflows, the second
3109  // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3110  // maximum value.
3111  //
3112  // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3113  // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3114  //
3115  // This is important because it means that OR and XOR can be used to merge
3116  // carry flags; and that AND can return a constant zero.
3117  //
3118  // TODO: match other operations that can merge flags (ADD, etc)
3119  DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3120  if (N->getOpcode() == ISD::AND)
3121  return DAG.getConstant(0, DL, MVT::i1);
3122  return Merged.getValue(1);
3123 }
3124 
3125 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3126  SDNode *N) {
3127  // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3128  if (isBitwiseNot(N0))
3129  if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3130  SDLoc DL(N);
3131  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3132  N0.getOperand(0), NotC);
3133  return CombineTo(
3134  N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3135  }
3136 
3137  // Iff the flag result is dead:
3138  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3139  // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3140  // or the dependency between the instructions.
3141  if ((N0.getOpcode() == ISD::ADD ||
3142  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3143  N0.getValue(1) != CarryIn)) &&
3144  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3145  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3146  N0.getOperand(0), N0.getOperand(1), CarryIn);
3147 
3148  /**
3149  * When one of the addcarry argument is itself a carry, we may be facing
3150  * a diamond carry propagation. In which case we try to transform the DAG
3151  * to ensure linear carry propagation if that is possible.
3152  */
3153  if (auto Y = getAsCarry(TLI, N1)) {
3154  // Because both are carries, Y and Z can be swapped.
3155  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3156  return R;
3157  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3158  return R;
3159  }
3160 
3161  return SDValue();
3162 }
3163 
3164 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3165 // clamp/truncation if necessary.
3167  SDValue RHS, SelectionDAG &DAG,
3168  const SDLoc &DL) {
3169  assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3170  "Illegal truncation");
3171 
3172  if (DstVT == SrcVT)
3173  return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3174 
3175  // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3176  // clamping RHS.
3177  APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3178  DstVT.getScalarSizeInBits());
3179  if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3180  return SDValue();
3181 
3182  SDValue SatLimit =
3184  DstVT.getScalarSizeInBits()),
3185  DL, SrcVT);
3186  RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3187  RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3188  LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3189  return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3190 }
3191 
3192 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3193 // usubsat(a,b), optionally as a truncated type.
3194 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3195  if (N->getOpcode() != ISD::SUB ||
3196  !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3197  return SDValue();
3198 
3199  EVT SubVT = N->getValueType(0);
3200  SDValue Op0 = N->getOperand(0);
3201  SDValue Op1 = N->getOperand(1);
3202 
3203  // Try to find umax(a,b) - b or a - umin(a,b) patterns
3204  // they may be converted to usubsat(a,b).
3205  if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3206  SDValue MaxLHS = Op0.getOperand(0);
3207  SDValue MaxRHS = Op0.getOperand(1);
3208  if (MaxLHS == Op1)
3209  return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3210  if (MaxRHS == Op1)
3211  return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3212  }
3213 
3214  if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3215  SDValue MinLHS = Op1.getOperand(0);
3216  SDValue MinRHS = Op1.getOperand(1);
3217  if (MinLHS == Op0)
3218  return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3219  if (MinRHS == Op0)
3220  return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3221  }
3222 
3223  // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3224  if (Op1.getOpcode() == ISD::TRUNCATE &&
3225  Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3226  Op1.getOperand(0).hasOneUse()) {
3227  SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3228  SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3229  if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3230  return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3231  DAG, SDLoc(N));
3232  if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3233  return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3234  DAG, SDLoc(N));
3235  }
3236 
3237  return SDValue();
3238 }
3239 
3240 // Since it may not be valid to emit a fold to zero for vector initializers
3241 // check if we can before folding.
3242 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3243  SelectionDAG &DAG, bool LegalOperations) {
3244  if (!VT.isVector())
3245  return DAG.getConstant(0, DL, VT);
3246  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3247  return DAG.getConstant(0, DL, VT);
3248  return SDValue();
3249 }
3250 
3251 SDValue DAGCombiner::visitSUB(SDNode *N) {
3252  SDValue N0 = N->getOperand(0);
3253  SDValue N1 = N->getOperand(1);
3254  EVT VT = N0.getValueType();
3255  SDLoc DL(N);
3256 
3257  // fold (sub x, x) -> 0
3258  // FIXME: Refactor this and xor and other similar operations together.
3259  if (N0 == N1)
3260  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3261 
3262  // fold (sub c1, c2) -> c3
3263  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3264  return C;
3265 
3266  // fold vector ops
3267  if (VT.isVector()) {
3268  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3269  return FoldedVOp;
3270 
3271  // fold (sub x, 0) -> x, vector edition
3273  return N0;
3274  }
3275 
3276  if (SDValue NewSel = foldBinOpIntoSelect(N))
3277  return NewSel;
3278 
3280 
3281  // fold (sub x, c) -> (add x, -c)
3282  if (N1C) {
3283  return DAG.getNode(ISD::ADD, DL, VT, N0,
3284  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3285  }
3286 
3287  if (isNullOrNullSplat(N0)) {
3288  unsigned BitWidth = VT.getScalarSizeInBits();
3289  // Right-shifting everything out but the sign bit followed by negation is
3290  // the same as flipping arithmetic/logical shift type without the negation:
3291  // -(X >>u 31) -> (X >>s 31)
3292  // -(X >>s 31) -> (X >>u 31)
3293  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3294  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3295  if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3296  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3297  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3298  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3299  }
3300  }
3301 
3302  // 0 - X --> 0 if the sub is NUW.
3303  if (N->getFlags().hasNoUnsignedWrap())
3304  return N0;
3305 
3307  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3308  // N1 must be 0 because negating the minimum signed value is undefined.
3309  if (N->getFlags().hasNoSignedWrap())
3310  return N0;
3311 
3312  // 0 - X --> X if X is 0 or the minimum signed value.
3313  return N1;
3314  }
3315 
3316  // Convert 0 - abs(x).
3317  if (N1->getOpcode() == ISD::ABS &&
3319  if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3320  return Result;
3321 
3322  // Fold neg(splat(neg(x)) -> splat(x)
3323  if (VT.isVector()) {
3324  SDValue N1S = DAG.getSplatValue(N1, true);
3325  if (N1S && N1S.getOpcode() == ISD::SUB &&
3326  isNullConstant(N1S.getOperand(0))) {
3327  if (VT.isScalableVector())
3328  return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3329  return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3330  }
3331  }
3332  }
3333 
3334  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3335  if (isAllOnesOrAllOnesSplat(N0))
3336  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3337 
3338  // fold (A - (0-B)) -> A+B
3339  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3340  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3341 
3342  // fold A-(A-B) -> B
3343  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3344  return N1.getOperand(1);
3345 
3346  // fold (A+B)-A -> B
3347  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3348  return N0.getOperand(1);
3349 
3350  // fold (A+B)-B -> A
3351  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3352  return N0.getOperand(0);
3353 
3354  // fold (A+C1)-C2 -> A+(C1-C2)
3355  if (N0.getOpcode() == ISD::ADD &&
3356  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3357  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3358  SDValue NewC =
3359  DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3360  assert(NewC && "Constant folding failed");
3361  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3362  }
3363 
3364  // fold C2-(A+C1) -> (C2-C1)-A
3365  if (N1.getOpcode() == ISD::ADD) {
3366  SDValue N11 = N1.getOperand(1);
3367  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3368  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3369  SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3370  assert(NewC && "Constant folding failed");
3371  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3372  }
3373  }
3374 
3375  // fold (A-C1)-C2 -> A-(C1+C2)
3376  if (N0.getOpcode() == ISD::SUB &&
3377  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3378  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3379  SDValue NewC =
3380  DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3381  assert(NewC && "Constant folding failed");
3382  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3383  }
3384 
3385  // fold (c1-A)-c2 -> (c1-c2)-A
3386  if (N0.getOpcode() == ISD::SUB &&
3387  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3388  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3389  SDValue NewC =
3390  DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3391  assert(NewC && "Constant folding failed");
3392  return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3393  }
3394 
3395  // fold ((A+(B+or-C))-B) -> A+or-C
3396  if (N0.getOpcode() == ISD::ADD &&
3397  (N0.getOperand(1).getOpcode() == ISD::SUB ||
3398  N0.getOperand(1).getOpcode() == ISD::ADD) &&
3399  N0.getOperand(1).getOperand(0) == N1)
3400  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3401  N0.getOperand(1).getOperand(1));
3402 
3403  // fold ((A+(C+B))-B) -> A+C
3404  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3405  N0.getOperand(1).getOperand(1) == N1)
3406  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3407  N0.getOperand(1).getOperand(0));
3408 
3409  // fold ((A-(B-C))-C) -> A-B
3410  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3411  N0.getOperand(1).getOperand(1) == N1)
3412  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3413  N0.getOperand(1).getOperand(0));
3414 
3415  // fold (A-(B-C)) -> A+(C-B)
3416  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3417  return DAG.getNode(ISD::ADD, DL, VT, N0,
3418  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3419  N1.getOperand(0)));
3420 
3421  // A - (A & B) -> A & (~B)
3422  if (N1.getOpcode() == ISD::AND) {
3423  SDValue A = N1.getOperand(0);
3424  SDValue B = N1.getOperand(1);
3425  if (A != N0)
3426  std::swap(A, B);
3427  if (A == N0 &&
3428  (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3429  SDValue InvB =
3430  DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3431  return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3432  }
3433  }
3434 
3435  // fold (X - (-Y * Z)) -> (X + (Y * Z))
3436  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3437  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3439  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3440  N1.getOperand(0).getOperand(1),
3441  N1.getOperand(1));
3442  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3443  }
3444  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3446  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3447  N1.getOperand(0),
3448  N1.getOperand(1).getOperand(1));
3449  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3450  }
3451  }
3452 
3453  // If either operand of a sub is undef, the result is undef
3454  if (N0.isUndef())
3455  return N0;
3456  if (N1.isUndef())
3457  return N1;
3458 
3459  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3460  return V;
3461 
3462  if (SDValue V = foldAddSubOfSignBit(N, DAG))
3463  return V;
3464 
3465  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3466  return V;
3467 
3468  if (SDValue V = foldSubToUSubSat(VT, N))
3469  return V;
3470 
3471  // (x - y) - 1 -> add (xor y, -1), x
3472  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3473  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3474  DAG.getAllOnesConstant(DL, VT));
3475  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3476  }
3477 
3478  // Look for:
3479  // sub y, (xor x, -1)
3480  // And if the target does not like this form then turn into:
3481  // add (add x, y), 1
3482  if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3483  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3484  return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3485  }
3486 
3487  // Hoist one-use addition by non-opaque constant:
3488  // (x + C) - y -> (x - y) + C
3489  if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3490  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3491  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3492  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3493  }
3494  // y - (x + C) -> (y - x) - C
3495  if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3496  isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3497  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3498  return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3499  }
3500  // (x - C) - y -> (x - y) - C
3501  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3502  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3503  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3504  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3505  return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3506  }
3507  // (C - x) - y -> C - (x + y)
3508  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3509  isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3510  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3511  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3512  }
3513 
3514  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3515  // rather than 'sub 0/1' (the sext should get folded).
3516  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3517  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3518  N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3519  TLI.getBooleanContents(VT) ==
3521  SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3522  return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3523  }
3524 
3525  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3526  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3527  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3528  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3529  SDValue S0 = N1.getOperand(0);
3530  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3532  if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3533  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3534  }
3535  }
3536 
3537  // If the relocation model supports it, consider symbol offsets.
3538  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3539  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3540  // fold (sub Sym, c) -> Sym-c
3541  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3542  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3543  GA->getOffset() -
3544  (uint64_t)N1C->getSExtValue());
3545  // fold (sub Sym+c1, Sym+c2) -> c1-c2
3546  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3547  if (GA->getGlobal() == GB->getGlobal())
3548  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3549  DL, VT);
3550  }
3551 
3552  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3553  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3554  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3555  if (TN->getVT() == MVT::i1) {
3556  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3557  DAG.getConstant(1, DL, VT));
3558  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3559  }
3560  }
3561 
3562  // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3563  if (N1.getOpcode() == ISD::VSCALE) {
3564  const APInt &IntVal = N1.getConstantOperandAPInt(0);
3565  return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3566  }
3567 
3568  // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3569  if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3570  APInt NewStep = -N1.getConstantOperandAPInt(0);
3571  return DAG.getNode(ISD::ADD, DL, VT, N0,
3572  DAG.getStepVector(DL, VT, NewStep));
3573  }
3574 
3575  // Prefer an add for more folding potential and possibly better codegen:
3576  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3577  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3578  SDValue ShAmt = N1.getOperand(1);
3579  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3580  if (ShAmtC &&
3581  ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3582  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3583  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3584  }
3585  }
3586 
3587  if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3588  // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3589  if (SDValue Carry = getAsCarry(TLI, N0)) {
3590  SDValue X = N1;
3591  SDValue Zero = DAG.getConstant(0, DL, VT);
3592  SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3593  return DAG.getNode(ISD::ADDCARRY, DL,
3594  DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3595  Carry);
3596  }
3597  }
3598 
3599  return SDValue();
3600 }
3601 
3602 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3603  SDValue N0 = N->getOperand(0);
3604  SDValue N1 = N->getOperand(1);
3605  EVT VT = N0.getValueType();
3606  SDLoc DL(N);
3607 
3608  // fold (sub_sat x, undef) -> 0
3609  if (N0.isUndef() || N1.isUndef())
3610  return DAG.getConstant(0, DL, VT);
3611 
3612  // fold (sub_sat x, x) -> 0
3613  if (N0 == N1)
3614  return DAG.getConstant(0, DL, VT);
3615 
3616  // fold (sub_sat c1, c2) -> c3
3617  if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3618  return C;
3619 
3620  // fold vector ops
3621  if (VT.isVector()) {
3622  // TODO SimplifyVBinOp
3623 
3624  // fold (sub_sat x, 0) -> x, vector edition
3626  return N0;
3627  }
3628 
3629  // fold (sub_sat x, 0) -> x
3630  if (isNullConstant(N1))
3631  return N0;
3632 
3633  return SDValue();
3634 }
3635 
3636 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3637  SDValue N0 = N->getOperand(0);
3638  SDValue N1 = N->getOperand(1);
3639  EVT VT = N0.getValueType();
3640  SDLoc DL(N);
3641 
3642  // If the flag result is dead, turn this into an SUB.
3643  if (!N->hasAnyUseOfValue(1))
3644  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3646 
3647  // fold (subc x, x) -> 0 + no borrow
3648  if (N0 == N1)
3649  return CombineTo(N, DAG.getConstant(0, DL, VT),
3651 
3652  // fold (subc x, 0) -> x + no borrow
3653  if (isNullConstant(N1))
3654  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3655 
3656  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3657  if (isAllOnesConstant(N0))
3658  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3660 
3661  return SDValue();
3662 }
3663 
3664 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3665  SDValue N0 = N->getOperand(0);
3666  SDValue N1 = N->getOperand(1);
3667  EVT VT = N0.getValueType();
3668  bool IsSigned = (ISD::SSUBO == N->getOpcode());
3669 
3670  EVT CarryVT = N->getValueType(1);
3671  SDLoc DL(N);
3672 
3673  // If the flag result is dead, turn this into an SUB.
3674  if (!N->hasAnyUseOfValue(1))
3675  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3676  DAG.getUNDEF(CarryVT));
3677 
3678  // fold (subo x, x) -> 0 + no borrow
3679  if (N0 == N1)
3680  return CombineTo(N, DAG.getConstant(0, DL, VT),
3681  DAG.getConstant(0, DL, CarryVT));
3682 
3684 
3685  // fold (subox, c) -> (addo x, -c)
3686  if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3687  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3688  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3689  }
3690 
3691  // fold (subo x, 0) -> x + no borrow
3692  if (isNullOrNullSplat(N1))
3693  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3694 
3695  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3696  if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3697  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3698  DAG.getConstant(0, DL, CarryVT));
3699 
3700  return SDValue();
3701 }
3702 
3703 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3704  SDValue N0 = N->getOperand(0);
3705  SDValue N1 = N->getOperand(1);
3706  SDValue CarryIn = N->getOperand(2);
3707 
3708  // fold (sube x, y, false) -> (subc x, y)
3709  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3710  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3711 
3712  return SDValue();
3713 }
3714 
3715 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3716  SDValue N0 = N->getOperand(0);
3717  SDValue N1 = N->getOperand(1);
3718  SDValue CarryIn = N->getOperand(2);
3719 
3720  // fold (subcarry x, y, false) -> (usubo x, y)
3721  if (isNullConstant(CarryIn)) {
3722  if (!LegalOperations ||
3723  TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3724  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3725  }
3726 
3727  return SDValue();
3728 }
3729 
3730 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3731  SDValue N0 = N->getOperand(0);
3732  SDValue N1 = N->getOperand(1);
3733  SDValue CarryIn = N->getOperand(2);
3734 
3735  // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3736  if (isNullConstant(CarryIn)) {
3737  if (!LegalOperations ||
3738  TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3739  return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3740  }
3741 
3742  return SDValue();
3743 }
3744 
3745 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3746 // UMULFIXSAT here.
3747 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3748  SDValue N0 = N->getOperand(0);
3749  SDValue N1 = N->getOperand(1);
3750  SDValue Scale = N->getOperand(2);
3751  EVT VT = N0.getValueType();
3752 
3753  // fold (mulfix x, undef, scale) -> 0
3754  if (N0.isUndef() || N1.isUndef())
3755  return DAG.getConstant(0, SDLoc(N), VT);
3756 
3757  // Canonicalize constant to RHS (vector doesn't have to splat)
3760  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3761 
3762  // fold (mulfix x, 0, scale) -> 0
3763  if (isNullConstant(N1))
3764  return DAG.getConstant(0, SDLoc(N), VT);
3765 
3766  return SDValue();
3767 }
3768 
3769 SDValue DAGCombiner::visitMUL(SDNode *N) {
3770  SDValue N0 = N->getOperand(0);
3771  SDValue N1 = N->getOperand(1);
3772  EVT VT = N0.getValueType();
3773 
3774  // fold (mul x, undef) -> 0
3775  if (N0.isUndef() || N1.isUndef())
3776  return DAG.getConstant(0, SDLoc(N), VT);
3777 
3778  // fold (mul c1, c2) -> c1*c2
3779  if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3780  return C;
3781 
3782  // canonicalize constant to RHS (vector doesn't have to splat)
3785  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3786 
3787  bool N1IsConst = false;
3788  bool N1IsOpaqueConst = false;
3789  APInt ConstValue1;
3790 
3791  // fold vector ops
3792  if (VT.isVector()) {
3793  if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
3794  return FoldedVOp;
3795 
3796  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3797  assert((!N1IsConst ||
3798  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3799  "Splat APInt should be element width");
3800  } else {
3801  N1IsConst = isa<ConstantSDNode>(N1);
3802  if (N1IsConst) {
3803  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3804  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3805  }
3806  }
3807 
3808  // fold (mul x, 0) -> 0
3809  if (N1IsConst && ConstValue1.isZero())
3810  return N1;
3811 
3812  // fold (mul x, 1) -> x
3813  if (N1IsConst && ConstValue1.isOne())
3814  return N0;
3815 
3816  if (SDValue NewSel = foldBinOpIntoSelect(N))
3817  return NewSel;
3818 
3819  // fold (mul x, -1) -> 0-x
3820  if (N1IsConst && ConstValue1.isAllOnes()) {
3821  SDLoc DL(N);
3822  return DAG.getNode(ISD::SUB, DL, VT,
3823  DAG.getConstant(0, DL, VT), N0);
3824  }
3825 
3826  // fold (mul x, (1 << c)) -> x << c
3827  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3828  DAG.isKnownToBeAPowerOfTwo(N1) &&
3829  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3830  SDLoc DL(N);
3831  SDValue LogBase2 = BuildLogBase2(N1, DL);
3832  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3833  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3834  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3835  }
3836 
3837  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3838  if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
3839  unsigned Log2Val = (-ConstValue1).logBase2();
3840  SDLoc DL(N);
3841  // FIXME: If the input is something that is easily negated (e.g. a
3842  // single-use add), we should put the negate there.
3843  return DAG.getNode(ISD::SUB, DL, VT,
3844  DAG.getConstant(0, DL, VT),
3845  DAG.getNode(ISD::SHL, DL, VT, N0,
3846  DAG.getConstant(Log2Val, DL,
3847  getShiftAmountTy(N0.getValueType()))));
3848  }
3849 
3850  // Try to transform:
3851  // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3852  // mul x, (2^N + 1) --> add (shl x, N), x
3853  // mul x, (2^N - 1) --> sub (shl x, N), x
3854  // Examples: x * 33 --> (x << 5) + x
3855  // x * 15 --> (x << 4) - x
3856  // x * -33 --> -((x << 5) + x)
3857  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3858  // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3859  // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3860  // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3861  // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3862  // x * 0xf800 --> (x << 16) - (x << 11)
3863  // x * -0x8800 --> -((x << 15) + (x << 11))
3864  // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3865  if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3866  // TODO: We could handle more general decomposition of any constant by
3867  // having the target set a limit on number of ops and making a
3868  // callback to determine that sequence (similar to sqrt expansion).
3869  unsigned MathOp = ISD::DELETED_NODE;
3870  APInt MulC = ConstValue1.abs();
3871  // The constant `2` should be treated as (2^0 + 1).
3872  unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3873  MulC.lshrInPlace(TZeros);
3874  if ((MulC - 1).isPowerOf2())
3875  MathOp = ISD::ADD;
3876  else if ((MulC + 1).isPowerOf2())
3877  MathOp = ISD::SUB;
3878 
3879  if (MathOp != ISD::DELETED_NODE) {
3880  unsigned ShAmt =
3881  MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3882  ShAmt += TZeros;
3883  assert(ShAmt < VT.getScalarSizeInBits() &&
3884  "multiply-by-constant generated out of bounds shift");
3885  SDLoc DL(N);
3886  SDValue Shl =
3887  DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3888  SDValue R =
3889  TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3890  DAG.getNode(ISD::SHL, DL, VT, N0,
3891  DAG.getConstant(TZeros, DL, VT)))
3892  : DAG.getNode(MathOp, DL, VT, Shl, N0);
3893  if (ConstValue1.isNegative())
3894  R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3895  return R;
3896  }
3897  }
3898 
3899  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3900  if (N0.getOpcode() == ISD::SHL &&
3901  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3902  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3903  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3905  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3906  }
3907 
3908  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3909  // use.
3910  {
3911  SDValue Sh, Y;
3912 
3913  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3914  if (N0.getOpcode() == ISD::SHL &&
3916  N0.getNode()->hasOneUse()) {
3917  Sh = N0; Y = N1;
3918  } else if (N1.getOpcode() == ISD::SHL &&
3920  N1.getNode()->hasOneUse()) {
3921  Sh = N1; Y = N0;
3922  }
3923 
3924  if (Sh.getNode()) {
3925  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3926  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3927  }
3928  }
3929 
3930  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3932  N0.getOpcode() == ISD::ADD &&
3934  isMulAddWithConstProfitable(N, N0, N1))
3935  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3936  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3937  N0.getOperand(0), N1),
3938  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3939  N0.getOperand(1), N1));
3940 
3941  // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3942  if (N0.getOpcode() == ISD::VSCALE)
3943  if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3944  const APInt &C0 = N0.getConstantOperandAPInt(0);
3945  const APInt &C1 = NC1->getAPIntValue();
3946  return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3947  }
3948 
3949  // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
3950  APInt MulVal;
3951  if (N0.getOpcode() == ISD::STEP_VECTOR)
3952  if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
3953  const APInt &C0 = N0.getConstantOperandAPInt(0);
3954  APInt NewStep = C0 * MulVal;
3955  return DAG.getStepVector(SDLoc(N), VT, NewStep);
3956  }
3957 
3958  // Fold ((mul x, 0/undef) -> 0,
3959  // (mul x, 1) -> x) -> x)
3960  // -> and(x, mask)
3961  // We can replace vectors with '0' and '1' factors with a clearing mask.
3962  if (VT.isFixedLengthVector()) {
3963  unsigned NumElts = VT.getVectorNumElements();
3964  SmallBitVector ClearMask;
3965  ClearMask.reserve(NumElts);
3966  auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3967  if (!V || V->isZero()) {
3968  ClearMask.push_back(true);
3969  return true;
3970  }
3971  ClearMask.push_back(false);
3972  return V->isOne();
3973  };
3974  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3975  ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3976  assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
3977  SDLoc DL(N);
3978  EVT LegalSVT = N1.getOperand(0).getValueType();
3979  SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3980  SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3981  SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3982  for (unsigned I = 0; I != NumElts; ++I)
3983  if (ClearMask[I])
3984  Mask[I] = Zero;
3985  return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3986  }
3987  }
3988 
3989  // reassociate mul
3990  if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3991  return RMUL;
3992 
3993  return SDValue();
3994 }
3995 
3996 /// Return true if divmod libcall is available.
3997 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3998  const TargetLowering &TLI) {
3999  RTLIB::Libcall LC;
4000  EVT NodeType = Node->getValueType(0);
4001  if (!NodeType.isSimple())
4002  return false;
4003  switch (NodeType.getSimpleVT().SimpleTy) {
4004  default: return false; // No libcall for vector types.
4005  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4006  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4007  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4008  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4009  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4010  }
4011 
4012  return TLI.getLibcallName(LC) != nullptr;
4013 }
4014 
4015 /// Issue divrem if both quotient and remainder are needed.
4016 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4017  if (Node->use_empty())
4018  return SDValue(); // This is a dead node, leave it alone.
4019 
4020  unsigned Opcode = Node->getOpcode();
4021  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4022  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4023 
4024  // DivMod lib calls can still work on non-legal types if using lib-calls.
4025  EVT VT = Node->getValueType(0);
4026  if (VT.isVector() || !VT.isInteger())
4027  return SDValue();
4028 
4029  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4030  return SDValue();
4031 
4032  // If DIVREM is going to get expanded into a libcall,
4033  // but there is no libcall available, then don't combine.
4034  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4035  !isDivRemLibcallAvailable(Node, isSigned, TLI))
4036  return SDValue();
4037 
4038  // If div is legal, it's better to do the normal expansion
4039  unsigned OtherOpcode = 0;
4040  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4041  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4042  if (TLI.isOperationLegalOrCustom(Opcode, VT))
4043  return SDValue();
4044  } else {
4045  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4046  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4047  return SDValue();
4048  }
4049 
4050  SDValue Op0 = Node->getOperand(0);
4051  SDValue Op1 = Node->getOperand(1);
4052  SDValue combined;
4053  for (SDNode *User : Op0.getNode()->uses()) {
4054  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4055  User->use_empty())
4056  continue;
4057  // Convert the other matching node(s), too;
4058  // otherwise, the DIVREM may get target-legalized into something
4059  // target-specific that we won't be able to recognize.
4060  unsigned UserOpc = User->getOpcode();
4061  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4062  User->getOperand(0) == Op0 &&
4063  User->getOperand(1) == Op1) {
4064  if (!combined) {
4065  if (UserOpc == OtherOpcode) {
4066  SDVTList VTs = DAG.getVTList(VT, VT);
4067  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4068  } else if (UserOpc == DivRemOpc) {
4069  combined = SDValue(User, 0);
4070  } else {
4071  assert(UserOpc == Opcode);
4072  continue;
4073  }
4074  }
4075  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4076  CombineTo(User, combined);
4077  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4078  CombineTo(User, combined.getValue(1));
4079  }
4080  }
4081  return combined;
4082 }
4083 
4085  SDValue N0 = N->getOperand(0);
4086  SDValue N1 = N->getOperand(1);
4087  EVT VT = N->getValueType(0);
4088  SDLoc DL(N);
4089 
4090  unsigned Opc = N->getOpcode();
4091  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4093 
4094  // X / undef -> undef
4095  // X % undef -> undef
4096  // X / 0 -> undef
4097  // X % 0 -> undef
4098  // NOTE: This includes vectors where any divisor element is zero/undef.
4099  if (DAG.isUndef(Opc, {N0, N1}))
4100  return DAG.getUNDEF(VT);
4101 
4102  // undef / X -> 0
4103  // undef % X -> 0
4104  if (N0.isUndef())
4105  return DAG.getConstant(0, DL, VT);
4106 
4107  // 0 / X -> 0
4108  // 0 % X -> 0
4110  if (N0C && N0C->isZero())
4111  return N0;
4112 
4113  // X / X -> 1
4114  // X % X -> 0
4115  if (N0 == N1)
4116  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4117 
4118  // X / 1 -> X
4119  // X % 1 -> 0
4120  // If this is a boolean op (single-bit element type), we can't have
4121  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4122  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4123  // it's a 1.
4124  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4125  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4126 
4127  return SDValue();
4128 }
4129 
4130 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4131  SDValue N0 = N->getOperand(0);
4132  SDValue N1 = N->getOperand(1);
4133  EVT VT = N->getValueType(0);
4134  EVT CCVT = getSetCCResultType(VT);
4135  SDLoc DL(N);
4136 
4137  // fold (sdiv c1, c2) -> c1/c2
4138  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4139  return C;
4140 
4141  // fold vector ops
4142  if (VT.isVector())
4143  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4144  return FoldedVOp;
4145 
4146  // fold (sdiv X, -1) -> 0-X
4148  if (N1C && N1C->isAllOnes())
4149  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4150 
4151  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4152  if (N1C && N1C->getAPIntValue().isMinSignedValue())
4153  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4154  DAG.getConstant(1, DL, VT),
4155  DAG.getConstant(0, DL, VT));
4156 
4157  if (SDValue V = simplifyDivRem(N, DAG))
4158  return V;
4159 
4160  if (SDValue NewSel = foldBinOpIntoSelect(N))
4161  return NewSel;
4162 
4163  // If we know the sign bits of both operands are zero, strength reduce to a
4164  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4165  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4166  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4167 
4168  if (SDValue V = visitSDIVLike(N0, N1, N)) {
4169  // If the corresponding remainder node exists, update its users with
4170  // (Dividend - (Quotient * Divisor).
4171  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4172  { N0, N1 })) {
4173  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4174  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4175  AddToWorklist(Mul.getNode());
4176  AddToWorklist(Sub.getNode());
4177  CombineTo(RemNode, Sub);
4178  }
4179  return V;
4180  }
4181 
4182  // sdiv, srem -> sdivrem
4183  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4184  // true. Otherwise, we break the simplification logic in visitREM().
4186  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4187  if (SDValue DivRem = useDivRem(N))
4188  return DivRem;
4189 
4190  return SDValue();
4191 }
4192 
4193 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4194  SDLoc DL(N);
4195  EVT VT = N->getValueType(0);
4196  EVT CCVT = getSetCCResultType(VT);
4197  unsigned BitWidth = VT.getScalarSizeInBits();
4198 
4199  // Helper for determining whether a value is a power-2 constant scalar or a
4200  // vector of such elements.
4201  auto IsPowerOfTwo = [](ConstantSDNode *C) {
4202  if (C->isZero() || C->isOpaque())
4203  return false;
4204  if (C->getAPIntValue().isPowerOf2())
4205  return true;
4206  if (C->getAPIntValue().isNegatedPowerOf2())
4207  return true;
4208  return false;
4209  };
4210 
4211  // fold (sdiv X, pow2) -> simple ops after legalize
4212  // FIXME: We check for the exact bit here because the generic lowering gives
4213  // better results in that case. The target-specific lowering should learn how
4214  // to handle exact sdivs efficiently.
4215  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4216  // Target-specific implementation of sdiv x, pow2.
4217  if (SDValue Res = BuildSDIVPow2(N))
4218  return Res;
4219 
4220  // Create constants that are functions of the shift amount value.
4221  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4222  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4223  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4224  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4225  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4226  if (!isConstantOrConstantVector(Inexact))
4227  return SDValue();
4228 
4229  // Splat the sign bit into the register
4230  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4231  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4232  AddToWorklist(Sign.getNode());
4233 
4234  // Add (N0 < 0) ? abs2 - 1 : 0;
4235  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4236  AddToWorklist(Srl.getNode());
4237  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4238  AddToWorklist(Add.getNode());
4239  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4240  AddToWorklist(Sra.getNode());
4241 
4242  // Special case: (sdiv X, 1) -> X
4243  // Special Case: (sdiv X, -1) -> 0-X
4244  SDValue One = DAG.getConstant(1, DL, VT);
4245  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4246  SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4247  SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4248  SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4249  Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4250 
4251  // If dividing by a positive value, we're done. Otherwise, the result must
4252  // be negated.
4253  SDValue Zero = DAG.getConstant(0, DL, VT);
4254  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4255 
4256  // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4257  SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4258  SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4259  return Res;
4260  }
4261 
4262  // If integer divide is expensive and we satisfy the requirements, emit an
4263  // alternate sequence. Targets may check function attributes for size/speed
4264  // trade-offs.
4266  if (isConstantOrConstantVector(N1) &&
4267  !TLI.isIntDivCheap(N->getValueType(0), Attr))
4268  if (SDValue Op = BuildSDIV(N))
4269  return Op;
4270 
4271  return SDValue();
4272 }
4273 
4274 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4275  SDValue N0 = N->getOperand(0);
4276  SDValue N1 = N->getOperand(1);
4277  EVT VT = N->getValueType(0);
4278  EVT CCVT = getSetCCResultType(VT);
4279  SDLoc DL(N);
4280 
4281  // fold (udiv c1, c2) -> c1/c2
4282  if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4283  return C;
4284 
4285  // fold vector ops
4286  if (VT.isVector())
4287  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4288  return FoldedVOp;
4289 
4290  // fold (udiv X, -1) -> select(X == -1, 1, 0)
4292  if (N1C && N1C->isAllOnes())
4293  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4294  DAG.getConstant(1, DL, VT),
4295  DAG.getConstant(0, DL, VT));
4296 
4297  if (SDValue V = simplifyDivRem(N, DAG))
4298  return V;
4299 
4300  if (SDValue NewSel = foldBinOpIntoSelect(N))
4301  return NewSel;
4302 
4303  if (SDValue V = visitUDIVLike(N0, N1, N)) {
4304  // If the corresponding remainder node exists, update its users with
4305  // (Dividend - (Quotient * Divisor).
4306  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4307  { N0, N1 })) {
4308  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4309  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4310  AddToWorklist(Mul.getNode());
4311  AddToWorklist(Sub.getNode());
4312  CombineTo(RemNode, Sub);
4313  }
4314  return V;
4315  }
4316 
4317  // sdiv, srem -> sdivrem
4318  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4319  // true. Otherwise, we break the simplification logic in visitREM().
4321  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4322  if (SDValue DivRem = useDivRem(N))
4323  return DivRem;
4324 
4325  return SDValue();
4326 }
4327 
4328 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4329  SDLoc DL(N);
4330  EVT VT = N->getValueType(0);
4331 
4332  // fold (udiv x, (1 << c)) -> x >>u c
4333  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4334  DAG.isKnownToBeAPowerOfTwo(N1)) {
4335  SDValue LogBase2 = BuildLogBase2(N1, DL);
4336  AddToWorklist(LogBase2.getNode());
4337 
4338  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4339  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4340  AddToWorklist(Trunc.getNode());
4341  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4342  }
4343 
4344  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4345  if (N1.getOpcode() == ISD::SHL) {
4346  SDValue N10 = N1.getOperand(0);
4347  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4348  DAG.isKnownToBeAPowerOfTwo(N10)) {
4349  SDValue LogBase2 = BuildLogBase2(N10, DL);
4350  AddToWorklist(LogBase2.getNode());
4351 
4352  EVT ADDVT = N1.getOperand(1).getValueType();
4353  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4354  AddToWorklist(Trunc.getNode());
4355  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4356  AddToWorklist(Add.getNode());
4357  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4358  }
4359  }
4360 
4361  // fold (udiv x, c) -> alternate
4363  if (isConstantOrConstantVector(N1) &&
4364  !TLI.isIntDivCheap(N->getValueType(0), Attr))
4365  if (SDValue Op = BuildUDIV(N))
4366  return Op;
4367 
4368  return