LLVM  13.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/Constant.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CodeGen.h"
60 #include "llvm/Support/Compiler.h"
61 #include "llvm/Support/Debug.h"
63 #include "llvm/Support/KnownBits.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <functional>
73 #include <iterator>
74 #include <string>
75 #include <tuple>
76 #include <utility>
77 
78 using namespace llvm;
79 
80 #define DEBUG_TYPE "dagcombine"
81 
82 STATISTIC(NodesCombined , "Number of dag nodes combined");
83 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
84 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
85 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
86 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
87 STATISTIC(SlicedLoads, "Number of load sliced");
88 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
89 
90 static cl::opt<bool>
91 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92  cl::desc("Enable DAG combiner's use of IR alias analysis"));
93 
94 static cl::opt<bool>
95 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96  cl::desc("Enable DAG combiner's use of TBAA"));
97 
98 #ifndef NDEBUG
100 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101  cl::desc("Only use DAG-combiner alias analysis in this"
102  " function"));
103 #endif
104 
105 /// Hidden option to stress test load slicing, i.e., when this option
106 /// is enabled, load slicing bypasses most of its profitability guards.
107 static cl::opt<bool>
108 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109  cl::desc("Bypass the profitability model of load slicing"),
110  cl::init(false));
111 
112 static cl::opt<bool>
113  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114  cl::desc("DAG combiner may split indexing from loads"));
115 
116 static cl::opt<bool>
117  EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118  cl::desc("DAG combiner enable merging multiple stores "
119  "into a wider store"));
120 
122  "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123  cl::desc("Limit the number of operands to inline for Token Factors"));
124 
126  "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127  cl::desc("Limit the number of times for the same StoreNode and RootNode "
128  "to bail out in store merging dependence check"));
129 
131  "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132  cl::desc("DAG cominber enable reducing the width of load/op/store "
133  "sequence"));
134 
136  "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137  cl::desc("DAG cominber enable load/<replace bytes>/store with "
138  "a narrower store"));
139 
140 namespace {
141 
142  class DAGCombiner {
143  SelectionDAG &DAG;
144  const TargetLowering &TLI;
145  const SelectionDAGTargetInfo *STI;
147  CodeGenOpt::Level OptLevel;
148  bool LegalDAG = false;
149  bool LegalOperations = false;
150  bool LegalTypes = false;
151  bool ForCodeSize;
152  bool DisableGenericCombines;
153 
154  /// Worklist of all of the nodes that need to be simplified.
155  ///
156  /// This must behave as a stack -- new nodes to process are pushed onto the
157  /// back and when processing we pop off of the back.
158  ///
159  /// The worklist will not contain duplicates but may contain null entries
160  /// due to nodes being deleted from the underlying DAG.
161  SmallVector<SDNode *, 64> Worklist;
162 
163  /// Mapping from an SDNode to its position on the worklist.
164  ///
165  /// This is used to find and remove nodes from the worklist (by nulling
166  /// them) when they are deleted from the underlying DAG. It relies on
167  /// stable indices of nodes within the worklist.
168  DenseMap<SDNode *, unsigned> WorklistMap;
169  /// This records all nodes attempted to add to the worklist since we
170  /// considered a new worklist entry. As we keep do not add duplicate nodes
171  /// in the worklist, this is different from the tail of the worklist.
172  SmallSetVector<SDNode *, 32> PruningList;
173 
174  /// Set of nodes which have been combined (at least once).
175  ///
176  /// This is used to allow us to reliably add any operands of a DAG node
177  /// which have not yet been combined to the worklist.
178  SmallPtrSet<SDNode *, 32> CombinedNodes;
179 
180  /// Map from candidate StoreNode to the pair of RootNode and count.
181  /// The count is used to track how many times we have seen the StoreNode
182  /// with the same RootNode bail out in dependence check. If we have seen
183  /// the bail out for the same pair many times over a limit, we won't
184  /// consider the StoreNode with the same RootNode as store merging
185  /// candidate again.
187 
188  // AA - Used for DAG load/store alias analysis.
189  AliasAnalysis *AA;
190 
191  /// When an instruction is simplified, add all users of the instruction to
192  /// the work lists because they might get more simplified now.
193  void AddUsersToWorklist(SDNode *N) {
194  for (SDNode *Node : N->uses())
195  AddToWorklist(Node);
196  }
197 
198  /// Convenient shorthand to add a node and all of its user to the worklist.
199  void AddToWorklistWithUsers(SDNode *N) {
200  AddUsersToWorklist(N);
201  AddToWorklist(N);
202  }
203 
204  // Prune potentially dangling nodes. This is called after
205  // any visit to a node, but should also be called during a visit after any
206  // failed combine which may have created a DAG node.
207  void clearAddedDanglingWorklistEntries() {
208  // Check any nodes added to the worklist to see if they are prunable.
209  while (!PruningList.empty()) {
210  auto *N = PruningList.pop_back_val();
211  if (N->use_empty())
212  recursivelyDeleteUnusedNodes(N);
213  }
214  }
215 
216  SDNode *getNextWorklistEntry() {
217  // Before we do any work, remove nodes that are not in use.
218  clearAddedDanglingWorklistEntries();
219  SDNode *N = nullptr;
220  // The Worklist holds the SDNodes in order, but it may contain null
221  // entries.
222  while (!N && !Worklist.empty()) {
223  N = Worklist.pop_back_val();
224  }
225 
226  if (N) {
227  bool GoodWorklistEntry = WorklistMap.erase(N);
228  (void)GoodWorklistEntry;
229  assert(GoodWorklistEntry &&
230  "Found a worklist entry without a corresponding map entry!");
231  }
232  return N;
233  }
234 
235  /// Call the node-specific routine that folds each particular type of node.
236  SDValue visit(SDNode *N);
237 
238  public:
239  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
240  : DAG(D), TLI(D.getTargetLoweringInfo()),
241  STI(D.getSubtarget().getSelectionDAGInfo()),
242  Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
243  ForCodeSize = DAG.shouldOptForSize();
244  DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
245 
246  MaximumLegalStoreInBits = 0;
247  // We use the minimum store size here, since that's all we can guarantee
248  // for the scalable vector types.
249  for (MVT VT : MVT::all_valuetypes())
250  if (EVT(VT).isSimple() && VT != MVT::Other &&
251  TLI.isTypeLegal(EVT(VT)) &&
252  VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
253  MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
254  }
255 
256  void ConsiderForPruning(SDNode *N) {
257  // Mark this for potential pruning.
258  PruningList.insert(N);
259  }
260 
261  /// Add to the worklist making sure its instance is at the back (next to be
262  /// processed.)
263  void AddToWorklist(SDNode *N) {
264  assert(N->getOpcode() != ISD::DELETED_NODE &&
265  "Deleted Node added to Worklist");
266 
267  // Skip handle nodes as they can't usefully be combined and confuse the
268  // zero-use deletion strategy.
269  if (N->getOpcode() == ISD::HANDLENODE)
270  return;
271 
272  ConsiderForPruning(N);
273 
274  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
275  Worklist.push_back(N);
276  }
277 
278  /// Remove all instances of N from the worklist.
279  void removeFromWorklist(SDNode *N) {
280  CombinedNodes.erase(N);
281  PruningList.remove(N);
282  StoreRootCountMap.erase(N);
283 
284  auto It = WorklistMap.find(N);
285  if (It == WorklistMap.end())
286  return; // Not in the worklist.
287 
288  // Null out the entry rather than erasing it to avoid a linear operation.
289  Worklist[It->second] = nullptr;
290  WorklistMap.erase(It);
291  }
292 
293  void deleteAndRecombine(SDNode *N);
294  bool recursivelyDeleteUnusedNodes(SDNode *N);
295 
296  /// Replaces all uses of the results of one DAG node with new values.
297  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
298  bool AddTo = true);
299 
300  /// Replaces all uses of the results of one DAG node with new values.
301  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
302  return CombineTo(N, &Res, 1, AddTo);
303  }
304 
305  /// Replaces all uses of the results of one DAG node with new values.
306  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
307  bool AddTo = true) {
308  SDValue To[] = { Res0, Res1 };
309  return CombineTo(N, To, 2, AddTo);
310  }
311 
312  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
313 
314  private:
315  unsigned MaximumLegalStoreInBits;
316 
317  /// Check the specified integer node value to see if it can be simplified or
318  /// if things it uses can be simplified by bit propagation.
319  /// If so, return true.
320  bool SimplifyDemandedBits(SDValue Op) {
321  unsigned BitWidth = Op.getScalarValueSizeInBits();
323  return SimplifyDemandedBits(Op, DemandedBits);
324  }
325 
326  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
327  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
328  KnownBits Known;
329  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
330  return false;
331 
332  // Revisit the node.
333  AddToWorklist(Op.getNode());
334 
335  CommitTargetLoweringOpt(TLO);
336  return true;
337  }
338 
339  /// Check the specified vector node value to see if it can be simplified or
340  /// if things it uses can be simplified as it only uses some of the
341  /// elements. If so, return true.
342  bool SimplifyDemandedVectorElts(SDValue Op) {
343  // TODO: For now just pretend it cannot be simplified.
344  if (Op.getValueType().isScalableVector())
345  return false;
346 
347  unsigned NumElts = Op.getValueType().getVectorNumElements();
348  APInt DemandedElts = APInt::getAllOnesValue(NumElts);
349  return SimplifyDemandedVectorElts(Op, DemandedElts);
350  }
351 
352  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
353  const APInt &DemandedElts,
354  bool AssumeSingleUse = false);
355  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
356  bool AssumeSingleUse = false);
357 
358  bool CombineToPreIndexedLoadStore(SDNode *N);
359  bool CombineToPostIndexedLoadStore(SDNode *N);
360  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
361  bool SliceUpLoad(SDNode *N);
362 
363  // Scalars have size 0 to distinguish from singleton vectors.
364  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
365  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
366  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
367 
368  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
369  /// load.
370  ///
371  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
372  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
373  /// \param EltNo index of the vector element to load.
374  /// \param OriginalLoad load that EVE came from to be replaced.
375  /// \returns EVE on success SDValue() on failure.
376  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
377  SDValue EltNo,
378  LoadSDNode *OriginalLoad);
379  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
380  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
381  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
382  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
383  SDValue PromoteIntBinOp(SDValue Op);
384  SDValue PromoteIntShiftOp(SDValue Op);
385  SDValue PromoteExtend(SDValue Op);
386  bool PromoteLoad(SDValue Op);
387 
388  /// Call the node-specific routine that knows how to fold each
389  /// particular type of node. If that doesn't do anything, try the
390  /// target-specific DAG combines.
392 
393  // Visitation implementation - Implement dag node combining for different
394  // node types. The semantics are as follows:
395  // Return Value:
396  // SDValue.getNode() == 0 - No change was made
397  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
398  // otherwise - N should be replaced by the returned Operand.
399  //
400  SDValue visitTokenFactor(SDNode *N);
401  SDValue visitMERGE_VALUES(SDNode *N);
402  SDValue visitADD(SDNode *N);
403  SDValue visitADDLike(SDNode *N);
404  SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
405  SDValue visitSUB(SDNode *N);
406  SDValue visitADDSAT(SDNode *N);
407  SDValue visitSUBSAT(SDNode *N);
408  SDValue visitADDC(SDNode *N);
409  SDValue visitADDO(SDNode *N);
410  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
411  SDValue visitSUBC(SDNode *N);
412  SDValue visitSUBO(SDNode *N);
413  SDValue visitADDE(SDNode *N);
414  SDValue visitADDCARRY(SDNode *N);
415  SDValue visitSADDO_CARRY(SDNode *N);
416  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
417  SDValue visitSUBE(SDNode *N);
418  SDValue visitSUBCARRY(SDNode *N);
419  SDValue visitSSUBO_CARRY(SDNode *N);
420  SDValue visitMUL(SDNode *N);
421  SDValue visitMULFIX(SDNode *N);
422  SDValue useDivRem(SDNode *N);
423  SDValue visitSDIV(SDNode *N);
424  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
425  SDValue visitUDIV(SDNode *N);
426  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
427  SDValue visitREM(SDNode *N);
428  SDValue visitMULHU(SDNode *N);
429  SDValue visitMULHS(SDNode *N);
430  SDValue visitSMUL_LOHI(SDNode *N);
431  SDValue visitUMUL_LOHI(SDNode *N);
432  SDValue visitMULO(SDNode *N);
433  SDValue visitIMINMAX(SDNode *N);
434  SDValue visitAND(SDNode *N);
435  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
436  SDValue visitOR(SDNode *N);
437  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
438  SDValue visitXOR(SDNode *N);
439  SDValue SimplifyVBinOp(SDNode *N);
440  SDValue visitSHL(SDNode *N);
441  SDValue visitSRA(SDNode *N);
442  SDValue visitSRL(SDNode *N);
443  SDValue visitFunnelShift(SDNode *N);
444  SDValue visitRotate(SDNode *N);
445  SDValue visitABS(SDNode *N);
446  SDValue visitBSWAP(SDNode *N);
447  SDValue visitBITREVERSE(SDNode *N);
448  SDValue visitCTLZ(SDNode *N);
449  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
450  SDValue visitCTTZ(SDNode *N);
451  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
452  SDValue visitCTPOP(SDNode *N);
453  SDValue visitSELECT(SDNode *N);
454  SDValue visitVSELECT(SDNode *N);
455  SDValue visitSELECT_CC(SDNode *N);
456  SDValue visitSETCC(SDNode *N);
457  SDValue visitSETCCCARRY(SDNode *N);
458  SDValue visitSIGN_EXTEND(SDNode *N);
459  SDValue visitZERO_EXTEND(SDNode *N);
460  SDValue visitANY_EXTEND(SDNode *N);
461  SDValue visitAssertExt(SDNode *N);
462  SDValue visitAssertAlign(SDNode *N);
463  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
464  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
465  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
466  SDValue visitTRUNCATE(SDNode *N);
467  SDValue visitBITCAST(SDNode *N);
468  SDValue visitFREEZE(SDNode *N);
469  SDValue visitBUILD_PAIR(SDNode *N);
470  SDValue visitFADD(SDNode *N);
471  SDValue visitSTRICT_FADD(SDNode *N);
472  SDValue visitFSUB(SDNode *N);
473  SDValue visitFMUL(SDNode *N);
474  SDValue visitFMA(SDNode *N);
475  SDValue visitFDIV(SDNode *N);
476  SDValue visitFREM(SDNode *N);
477  SDValue visitFSQRT(SDNode *N);
478  SDValue visitFCOPYSIGN(SDNode *N);
479  SDValue visitFPOW(SDNode *N);
480  SDValue visitSINT_TO_FP(SDNode *N);
481  SDValue visitUINT_TO_FP(SDNode *N);
482  SDValue visitFP_TO_SINT(SDNode *N);
483  SDValue visitFP_TO_UINT(SDNode *N);
484  SDValue visitFP_ROUND(SDNode *N);
485  SDValue visitFP_EXTEND(SDNode *N);
486  SDValue visitFNEG(SDNode *N);
487  SDValue visitFABS(SDNode *N);
488  SDValue visitFCEIL(SDNode *N);
489  SDValue visitFTRUNC(SDNode *N);
490  SDValue visitFFLOOR(SDNode *N);
491  SDValue visitFMINNUM(SDNode *N);
492  SDValue visitFMAXNUM(SDNode *N);
493  SDValue visitFMINIMUM(SDNode *N);
494  SDValue visitFMAXIMUM(SDNode *N);
495  SDValue visitBRCOND(SDNode *N);
496  SDValue visitBR_CC(SDNode *N);
497  SDValue visitLOAD(SDNode *N);
498 
499  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
500  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
501 
502  SDValue visitSTORE(SDNode *N);
503  SDValue visitLIFETIME_END(SDNode *N);
504  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
505  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
506  SDValue visitBUILD_VECTOR(SDNode *N);
507  SDValue visitCONCAT_VECTORS(SDNode *N);
508  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
509  SDValue visitVECTOR_SHUFFLE(SDNode *N);
510  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
511  SDValue visitINSERT_SUBVECTOR(SDNode *N);
512  SDValue visitMLOAD(SDNode *N);
513  SDValue visitMSTORE(SDNode *N);
514  SDValue visitMGATHER(SDNode *N);
515  SDValue visitMSCATTER(SDNode *N);
516  SDValue visitFP_TO_FP16(SDNode *N);
517  SDValue visitFP16_TO_FP(SDNode *N);
518  SDValue visitVECREDUCE(SDNode *N);
519 
520  SDValue visitFADDForFMACombine(SDNode *N);
521  SDValue visitFSUBForFMACombine(SDNode *N);
522  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
523 
524  SDValue XformToShuffleWithZero(SDNode *N);
525  bool reassociationCanBreakAddressingModePattern(unsigned Opc,
526  const SDLoc &DL, SDValue N0,
527  SDValue N1);
528  SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
529  SDValue N1);
530  SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
531  SDValue N1, SDNodeFlags Flags);
532 
533  SDValue visitShiftByConstant(SDNode *N);
534 
535  SDValue foldSelectOfConstants(SDNode *N);
536  SDValue foldVSelectOfConstants(SDNode *N);
537  SDValue foldBinOpIntoSelect(SDNode *BO);
538  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
539  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
540  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
541  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
542  SDValue N2, SDValue N3, ISD::CondCode CC,
543  bool NotExtCompare = false);
544  SDValue convertSelectOfFPConstantsToLoadOffset(
545  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
546  ISD::CondCode CC);
547  SDValue foldSignChangeInBitcast(SDNode *N);
548  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
549  SDValue N2, SDValue N3, ISD::CondCode CC);
550  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
551  const SDLoc &DL);
552  SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
553  SDValue unfoldMaskedMerge(SDNode *N);
554  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
555  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
556  const SDLoc &DL, bool foldBooleans);
557  SDValue rebuildSetCC(SDValue N);
558 
559  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
560  SDValue &CC, bool MatchStrict = false) const;
561  bool isOneUseSetCC(SDValue N) const;
562 
563  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
564  unsigned HiOp);
565  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
566  SDValue CombineExtLoad(SDNode *N);
567  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
568  SDValue combineRepeatedFPDivisors(SDNode *N);
569  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
570  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
571  SDValue BuildSDIV(SDNode *N);
572  SDValue BuildSDIVPow2(SDNode *N);
573  SDValue BuildUDIV(SDNode *N);
574  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
575  SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
576  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
577  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
578  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
579  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
580  SDNodeFlags Flags, bool Reciprocal);
581  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
582  SDNodeFlags Flags, bool Reciprocal);
583  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
584  bool DemandHighBits = true);
585  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
586  SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
587  SDValue InnerPos, SDValue InnerNeg,
588  unsigned PosOpcode, unsigned NegOpcode,
589  const SDLoc &DL);
590  SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
591  SDValue InnerPos, SDValue InnerNeg,
592  unsigned PosOpcode, unsigned NegOpcode,
593  const SDLoc &DL);
594  SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
595  SDValue MatchLoadCombine(SDNode *N);
596  SDValue mergeTruncStores(StoreSDNode *N);
597  SDValue ReduceLoadWidth(SDNode *N);
598  SDValue ReduceLoadOpStoreWidth(SDNode *N);
600  SDValue TransformFPLoadStorePair(SDNode *N);
601  SDValue convertBuildVecZextToZext(SDNode *N);
602  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
603  SDValue reduceBuildVecTruncToBitCast(SDNode *N);
604  SDValue reduceBuildVecToShuffle(SDNode *N);
605  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
606  ArrayRef<int> VectorMask, SDValue VecIn1,
607  SDValue VecIn2, unsigned LeftIdx,
608  bool DidSplitVec);
609  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
610 
611  /// Walk up chain skipping non-aliasing memory nodes,
612  /// looking for aliasing nodes and adding them to the Aliases vector.
613  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
614  SmallVectorImpl<SDValue> &Aliases);
615 
616  /// Return true if there is any possibility that the two addresses overlap.
617  bool isAlias(SDNode *Op0, SDNode *Op1) const;
618 
619  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
620  /// chain (aliasing node.)
621  SDValue FindBetterChain(SDNode *N, SDValue Chain);
622 
623  /// Try to replace a store and any possibly adjacent stores on
624  /// consecutive chains with better chains. Return true only if St is
625  /// replaced.
626  ///
627  /// Notice that other chains may still be replaced even if the function
628  /// returns false.
629  bool findBetterNeighborChains(StoreSDNode *St);
630 
631  // Helper for findBetterNeighborChains. Walk up store chain add additional
632  // chained stores that do not overlap and can be parallelized.
633  bool parallelizeChainedStores(StoreSDNode *St);
634 
635  /// Holds a pointer to an LSBaseSDNode as well as information on where it
636  /// is located in a sequence of memory operations connected by a chain.
637  struct MemOpLink {
638  // Ptr to the mem node.
639  LSBaseSDNode *MemNode;
640 
641  // Offset from the base ptr.
642  int64_t OffsetFromBase;
643 
644  MemOpLink(LSBaseSDNode *N, int64_t Offset)
645  : MemNode(N), OffsetFromBase(Offset) {}
646  };
647 
648  // Classify the origin of a stored value.
649  enum class StoreSource { Unknown, Constant, Extract, Load };
650  StoreSource getStoreSource(SDValue StoreVal) {
651  switch (StoreVal.getOpcode()) {
652  case ISD::Constant:
653  case ISD::ConstantFP:
654  return StoreSource::Constant;
657  return StoreSource::Extract;
658  case ISD::LOAD:
659  return StoreSource::Load;
660  default:
661  return StoreSource::Unknown;
662  }
663  }
664 
665  /// This is a helper function for visitMUL to check the profitability
666  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
667  /// MulNode is the original multiply, AddNode is (add x, c1),
668  /// and ConstNode is c2.
669  bool isMulAddWithConstProfitable(SDNode *MulNode,
670  SDValue &AddNode,
671  SDValue &ConstNode);
672 
673  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
674  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
675  /// the type of the loaded value to be extended.
676  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
677  EVT LoadResultTy, EVT &ExtVT);
678 
679  /// Helper function to calculate whether the given Load/Store can have its
680  /// width reduced to ExtVT.
681  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
682  EVT &MemVT, unsigned ShAmt = 0);
683 
684  /// Used by BackwardsPropagateMask to find suitable loads.
685  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
686  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
687  ConstantSDNode *Mask, SDNode *&NodeToMask);
688  /// Attempt to propagate a given AND node back to load leaves so that they
689  /// can be combined into narrow loads.
690  bool BackwardsPropagateMask(SDNode *N);
691 
692  /// Helper function for mergeConsecutiveStores which merges the component
693  /// store chains.
694  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
695  unsigned NumStores);
696 
697  /// This is a helper function for mergeConsecutiveStores. When the source
698  /// elements of the consecutive stores are all constants or all extracted
699  /// vector elements, try to merge them into one larger store introducing
700  /// bitcasts if necessary. \return True if a merged store was created.
701  bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
702  EVT MemVT, unsigned NumStores,
703  bool IsConstantSrc, bool UseVector,
704  bool UseTrunc);
705 
706  /// This is a helper function for mergeConsecutiveStores. Stores that
707  /// potentially may be merged with St are placed in StoreNodes. RootNode is
708  /// a chain predecessor to all store candidates.
709  void getStoreMergeCandidates(StoreSDNode *St,
710  SmallVectorImpl<MemOpLink> &StoreNodes,
711  SDNode *&Root);
712 
713  /// Helper function for mergeConsecutiveStores. Checks if candidate stores
714  /// have indirect dependency through their operands. RootNode is the
715  /// predecessor to all stores calculated by getStoreMergeCandidates and is
716  /// used to prune the dependency check. \return True if safe to merge.
717  bool checkMergeStoreCandidatesForDependencies(
718  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
719  SDNode *RootNode);
720 
721  /// This is a helper function for mergeConsecutiveStores. Given a list of
722  /// store candidates, find the first N that are consecutive in memory.
723  /// Returns 0 if there are not at least 2 consecutive stores to try merging.
724  unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
725  int64_t ElementSizeBytes) const;
726 
727  /// This is a helper function for mergeConsecutiveStores. It is used for
728  /// store chains that are composed entirely of constant values.
729  bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
730  unsigned NumConsecutiveStores,
731  EVT MemVT, SDNode *Root, bool AllowVectors);
732 
733  /// This is a helper function for mergeConsecutiveStores. It is used for
734  /// store chains that are composed entirely of extracted vector elements.
735  /// When extracting multiple vector elements, try to store them in one
736  /// vector store rather than a sequence of scalar stores.
737  bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
738  unsigned NumConsecutiveStores, EVT MemVT,
739  SDNode *Root);
740 
741  /// This is a helper function for mergeConsecutiveStores. It is used for
742  /// store chains that are composed entirely of loaded values.
743  bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
744  unsigned NumConsecutiveStores, EVT MemVT,
745  SDNode *Root, bool AllowVectors,
746  bool IsNonTemporalStore, bool IsNonTemporalLoad);
747 
748  /// Merge consecutive store operations into a wide store.
749  /// This optimization uses wide integers or vectors when possible.
750  /// \return true if stores were merged.
751  bool mergeConsecutiveStores(StoreSDNode *St);
752 
753  /// Try to transform a truncation where C is a constant:
754  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
755  ///
756  /// \p N needs to be a truncation and its first operand an AND. Other
757  /// requirements are checked by the function (e.g. that trunc is
758  /// single-use) and if missed an empty SDValue is returned.
759  SDValue distributeTruncateThroughAnd(SDNode *N);
760 
761  /// Helper function to determine whether the target supports operation
762  /// given by \p Opcode for type \p VT, that is, whether the operation
763  /// is legal or custom before legalizing operations, and whether is
764  /// legal (but not custom) after legalization.
765  bool hasOperation(unsigned Opcode, EVT VT) {
766  return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
767  }
768 
769  public:
770  /// Runs the dag combiner on all nodes in the work list
771  void Run(CombineLevel AtLevel);
772 
773  SelectionDAG &getDAG() const { return DAG; }
774 
775  /// Returns a type large enough to hold any valid shift amount - before type
776  /// legalization these can be huge.
777  EVT getShiftAmountTy(EVT LHSTy) {
778  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
779  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
780  }
781 
782  /// This method returns true if we are running before type legalization or
783  /// if the specified VT is legal.
784  bool isTypeLegal(const EVT &VT) {
785  if (!LegalTypes) return true;
786  return TLI.isTypeLegal(VT);
787  }
788 
789  /// Convenience wrapper around TargetLowering::getSetCCResultType
790  EVT getSetCCResultType(EVT VT) const {
791  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
792  }
793 
794  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
795  SDValue OrigLoad, SDValue ExtLoad,
796  ISD::NodeType ExtType);
797  };
798 
799 /// This class is a DAGUpdateListener that removes any deleted
800 /// nodes from the worklist.
801 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
802  DAGCombiner &DC;
803 
804 public:
805  explicit WorklistRemover(DAGCombiner &dc)
806  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
807 
808  void NodeDeleted(SDNode *N, SDNode *E) override {
809  DC.removeFromWorklist(N);
810  }
811 };
812 
813 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
814  DAGCombiner &DC;
815 
816 public:
817  explicit WorklistInserter(DAGCombiner &dc)
818  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
819 
820  // FIXME: Ideally we could add N to the worklist, but this causes exponential
821  // compile time costs in large DAGs, e.g. Halide.
822  void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
823 };
824 
825 } // end anonymous namespace
826 
827 //===----------------------------------------------------------------------===//
828 // TargetLowering::DAGCombinerInfo implementation
829 //===----------------------------------------------------------------------===//
830 
832  ((DAGCombiner*)DC)->AddToWorklist(N);
833 }
834 
836 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
837  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
838 }
839 
841 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
842  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
843 }
844 
846 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
847  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
848 }
849 
852  return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
853 }
854 
857  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
858 }
859 
860 //===----------------------------------------------------------------------===//
861 // Helper Functions
862 //===----------------------------------------------------------------------===//
863 
864 void DAGCombiner::deleteAndRecombine(SDNode *N) {
866 
867  // If the operands of this node are only used by the node, they will now be
868  // dead. Make sure to re-visit them and recursively delete dead nodes.
869  for (const SDValue &Op : N->ops())
870  // For an operand generating multiple values, one of the values may
871  // become dead allowing further simplification (e.g. split index
872  // arithmetic from an indexed load).
873  if (Op->hasOneUse() || Op->getNumValues() > 1)
874  AddToWorklist(Op.getNode());
875 
876  DAG.DeleteNode(N);
877 }
878 
879 // APInts must be the same size for most operations, this helper
880 // function zero extends the shorter of the pair so that they match.
881 // We provide an Offset so that we can create bitwidths that won't overflow.
882 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
883  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
884  LHS = LHS.zextOrSelf(Bits);
885  RHS = RHS.zextOrSelf(Bits);
886 }
887 
888 // Return true if this node is a setcc, or is a select_cc
889 // that selects between the target values used for true and false, making it
890 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
891 // the appropriate nodes based on the type of node we are checking. This
892 // simplifies life a bit for the callers.
893 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
894  SDValue &CC, bool MatchStrict) const {
895  if (N.getOpcode() == ISD::SETCC) {
896  LHS = N.getOperand(0);
897  RHS = N.getOperand(1);
898  CC = N.getOperand(2);
899  return true;
900  }
901 
902  if (MatchStrict &&
903  (N.getOpcode() == ISD::STRICT_FSETCC ||
904  N.getOpcode() == ISD::STRICT_FSETCCS)) {
905  LHS = N.getOperand(1);
906  RHS = N.getOperand(2);
907  CC = N.getOperand(3);
908  return true;
909  }
910 
911  if (N.getOpcode() != ISD::SELECT_CC ||
912  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
913  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
914  return false;
915 
916  if (TLI.getBooleanContents(N.getValueType()) ==
918  return false;
919 
920  LHS = N.getOperand(0);
921  RHS = N.getOperand(1);
922  CC = N.getOperand(4);
923  return true;
924 }
925 
926 /// Return true if this is a SetCC-equivalent operation with only one use.
927 /// If this is true, it allows the users to invert the operation for free when
928 /// it is profitable to do so.
929 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
930  SDValue N0, N1, N2;
931  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
932  return true;
933  return false;
934 }
935 
936 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
937  if (!ScalarTy.isSimple())
938  return false;
939 
940  uint64_t MaskForTy = 0ULL;
941  switch (ScalarTy.getSimpleVT().SimpleTy) {
942  case MVT::i8:
943  MaskForTy = 0xFFULL;
944  break;
945  case MVT::i16:
946  MaskForTy = 0xFFFFULL;
947  break;
948  case MVT::i32:
949  MaskForTy = 0xFFFFFFFFULL;
950  break;
951  default:
952  return false;
953  break;
954  }
955 
956  APInt Val;
957  if (ISD::isConstantSplatVector(N, Val))
958  return Val.getLimitedValue() == MaskForTy;
959 
960  return false;
961 }
962 
963 // Determines if it is a constant integer or a splat/build vector of constant
964 // integers (and undefs).
965 // Do not permit build vector implicit truncation.
966 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
967  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
968  return !(Const->isOpaque() && NoOpaques);
969  if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
970  return false;
971  unsigned BitWidth = N.getScalarValueSizeInBits();
972  for (const SDValue &Op : N->op_values()) {
973  if (Op.isUndef())
974  continue;
975  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
976  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
977  (Const->isOpaque() && NoOpaques))
978  return false;
979  }
980  return true;
981 }
982 
983 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
984 // undef's.
985 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
986  if (V.getOpcode() != ISD::BUILD_VECTOR)
987  return false;
988  return isConstantOrConstantVector(V, NoOpaques) ||
990 }
991 
992 // Determine if this an indexed load with an opaque target constant index.
993 static bool canSplitIdx(LoadSDNode *LD) {
994  return MaySplitLoadIndex &&
995  (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
996  !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
997 }
998 
999 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1000  const SDLoc &DL,
1001  SDValue N0,
1002  SDValue N1) {
1003  // Currently this only tries to ensure we don't undo the GEP splits done by
1004  // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1005  // we check if the following transformation would be problematic:
1006  // (load/store (add, (add, x, offset1), offset2)) ->
1007  // (load/store (add, x, offset1+offset2)).
1008 
1009  if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1010  return false;
1011 
1012  if (N0.hasOneUse())
1013  return false;
1014 
1015  auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1016  auto *C2 = dyn_cast<ConstantSDNode>(N1);
1017  if (!C1 || !C2)
1018  return false;
1019 
1020  const APInt &C1APIntVal = C1->getAPIntValue();
1021  const APInt &C2APIntVal = C2->getAPIntValue();
1022  if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1023  return false;
1024 
1025  const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1026  if (CombinedValueIntVal.getBitWidth() > 64)
1027  return false;
1028  const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1029 
1030  for (SDNode *Node : N0->uses()) {
1031  auto LoadStore = dyn_cast<MemSDNode>(Node);
1032  if (LoadStore) {
1033  // Is x[offset2] already not a legal addressing mode? If so then
1034  // reassociating the constants breaks nothing (we test offset2 because
1035  // that's the one we hope to fold into the load or store).
1037  AM.HasBaseReg = true;
1038  AM.BaseOffs = C2APIntVal.getSExtValue();
1039  EVT VT = LoadStore->getMemoryVT();
1040  unsigned AS = LoadStore->getAddressSpace();
1041  Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1042  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1043  continue;
1044 
1045  // Would x[offset1+offset2] still be a legal addressing mode?
1046  AM.BaseOffs = CombinedValue;
1047  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1048  return true;
1049  }
1050  }
1051 
1052  return false;
1053 }
1054 
1055 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1056 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1057 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1058  SDValue N0, SDValue N1) {
1059  EVT VT = N0.getValueType();
1060 
1061  if (N0.getOpcode() != Opc)
1062  return SDValue();
1063 
1066  // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1067  if (SDValue OpNode =
1068  DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1069  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1070  return SDValue();
1071  }
1072  if (N0.hasOneUse()) {
1073  // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1074  // iff (op x, c1) has one use
1075  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1076  if (!OpNode.getNode())
1077  return SDValue();
1078  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1079  }
1080  }
1081  return SDValue();
1082 }
1083 
1084 // Try to reassociate commutative binops.
1085 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1086  SDValue N1, SDNodeFlags Flags) {
1087  assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1088 
1089  // Floating-point reassociation is not allowed without loose FP math.
1090  if (N0.getValueType().isFloatingPoint() ||
1092  if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1093  return SDValue();
1094 
1095  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1096  return Combined;
1097  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1098  return Combined;
1099  return SDValue();
1100 }
1101 
1102 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1103  bool AddTo) {
1104  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1105  ++NodesCombined;
1106  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1107  To[0].getNode()->dump(&DAG);
1108  dbgs() << " and " << NumTo - 1 << " other values\n");
1109  for (unsigned i = 0, e = NumTo; i != e; ++i)
1110  assert((!To[i].getNode() ||
1111  N->getValueType(i) == To[i].getValueType()) &&
1112  "Cannot combine value to value of different type!");
1113 
1114  WorklistRemover DeadNodes(*this);
1115  DAG.ReplaceAllUsesWith(N, To);
1116  if (AddTo) {
1117  // Push the new nodes and any users onto the worklist
1118  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1119  if (To[i].getNode()) {
1120  AddToWorklist(To[i].getNode());
1121  AddUsersToWorklist(To[i].getNode());
1122  }
1123  }
1124  }
1125 
1126  // Finally, if the node is now dead, remove it from the graph. The node
1127  // may not be dead if the replacement process recursively simplified to
1128  // something else needing this node.
1129  if (N->use_empty())
1130  deleteAndRecombine(N);
1131  return SDValue(N, 0);
1132 }
1133 
1134 void DAGCombiner::
1135 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1136  // Replace the old value with the new one.
1137  ++NodesCombined;
1138  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1139  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1140  dbgs() << '\n');
1141 
1142  // Replace all uses. If any nodes become isomorphic to other nodes and
1143  // are deleted, make sure to remove them from our worklist.
1144  WorklistRemover DeadNodes(*this);
1145  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1146 
1147  // Push the new node and any (possibly new) users onto the worklist.
1148  AddToWorklistWithUsers(TLO.New.getNode());
1149 
1150  // Finally, if the node is now dead, remove it from the graph. The node
1151  // may not be dead if the replacement process recursively simplified to
1152  // something else needing this node.
1153  if (TLO.Old.getNode()->use_empty())
1154  deleteAndRecombine(TLO.Old.getNode());
1155 }
1156 
1157 /// Check the specified integer node value to see if it can be simplified or if
1158 /// things it uses can be simplified by bit propagation. If so, return true.
1159 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1160  const APInt &DemandedElts,
1161  bool AssumeSingleUse) {
1162  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1163  KnownBits Known;
1164  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1165  AssumeSingleUse))
1166  return false;
1167 
1168  // Revisit the node.
1169  AddToWorklist(Op.getNode());
1170 
1171  CommitTargetLoweringOpt(TLO);
1172  return true;
1173 }
1174 
1175 /// Check the specified vector node value to see if it can be simplified or
1176 /// if things it uses can be simplified as it only uses some of the elements.
1177 /// If so, return true.
1178 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1179  const APInt &DemandedElts,
1180  bool AssumeSingleUse) {
1181  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1182  APInt KnownUndef, KnownZero;
1183  if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1184  TLO, 0, AssumeSingleUse))
1185  return false;
1186 
1187  // Revisit the node.
1188  AddToWorklist(Op.getNode());
1189 
1190  CommitTargetLoweringOpt(TLO);
1191  return true;
1192 }
1193 
1194 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1195  SDLoc DL(Load);
1196  EVT VT = Load->getValueType(0);
1197  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1198 
1199  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1200  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1201  WorklistRemover DeadNodes(*this);
1202  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1203  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1204  deleteAndRecombine(Load);
1205  AddToWorklist(Trunc.getNode());
1206 }
1207 
1208 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1209  Replace = false;
1210  SDLoc DL(Op);
1211  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1212  LoadSDNode *LD = cast<LoadSDNode>(Op);
1213  EVT MemVT = LD->getMemoryVT();
1215  : LD->getExtensionType();
1216  Replace = true;
1217  return DAG.getExtLoad(ExtType, DL, PVT,
1218  LD->getChain(), LD->getBasePtr(),
1219  MemVT, LD->getMemOperand());
1220  }
1221 
1222  unsigned Opc = Op.getOpcode();
1223  switch (Opc) {
1224  default: break;
1225  case ISD::AssertSext:
1226  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1227  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1228  break;
1229  case ISD::AssertZext:
1230  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1231  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1232  break;
1233  case ISD::Constant: {
1234  unsigned ExtOpc =
1235  Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1236  return DAG.getNode(ExtOpc, DL, PVT, Op);
1237  }
1238  }
1239 
1240  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1241  return SDValue();
1242  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1243 }
1244 
1245 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1247  return SDValue();
1248  EVT OldVT = Op.getValueType();
1249  SDLoc DL(Op);
1250  bool Replace = false;
1251  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1252  if (!NewOp.getNode())
1253  return SDValue();
1254  AddToWorklist(NewOp.getNode());
1255 
1256  if (Replace)
1257  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1258  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1259  DAG.getValueType(OldVT));
1260 }
1261 
1262 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1263  EVT OldVT = Op.getValueType();
1264  SDLoc DL(Op);
1265  bool Replace = false;
1266  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1267  if (!NewOp.getNode())
1268  return SDValue();
1269  AddToWorklist(NewOp.getNode());
1270 
1271  if (Replace)
1272  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1273  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1274 }
1275 
1276 /// Promote the specified integer binary operation if the target indicates it is
1277 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1278 /// i32 since i16 instructions are longer.
1279 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1280  if (!LegalOperations)
1281  return SDValue();
1282 
1283  EVT VT = Op.getValueType();
1284  if (VT.isVector() || !VT.isInteger())
1285  return SDValue();
1286 
1287  // If operation type is 'undesirable', e.g. i16 on x86, consider
1288  // promoting it.
1289  unsigned Opc = Op.getOpcode();
1290  if (TLI.isTypeDesirableForOp(Opc, VT))
1291  return SDValue();
1292 
1293  EVT PVT = VT;
1294  // Consult target whether it is a good idea to promote this operation and
1295  // what's the right type to promote it to.
1296  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1297  assert(PVT != VT && "Don't know what type to promote to!");
1298 
1299  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1300 
1301  bool Replace0 = false;
1302  SDValue N0 = Op.getOperand(0);
1303  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1304 
1305  bool Replace1 = false;
1306  SDValue N1 = Op.getOperand(1);
1307  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1308  SDLoc DL(Op);
1309 
1310  SDValue RV =
1311  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1312 
1313  // We are always replacing N0/N1's use in N and only need additional
1314  // replacements if there are additional uses.
1315  // Note: We are checking uses of the *nodes* (SDNode) rather than values
1316  // (SDValue) here because the node may reference multiple values
1317  // (for example, the chain value of a load node).
1318  Replace0 &= !N0->hasOneUse();
1319  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1320 
1321  // Combine Op here so it is preserved past replacements.
1322  CombineTo(Op.getNode(), RV);
1323 
1324  // If operands have a use ordering, make sure we deal with
1325  // predecessor first.
1326  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1327  std::swap(N0, N1);
1328  std::swap(NN0, NN1);
1329  }
1330 
1331  if (Replace0) {
1332  AddToWorklist(NN0.getNode());
1333  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1334  }
1335  if (Replace1) {
1336  AddToWorklist(NN1.getNode());
1337  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1338  }
1339  return Op;
1340  }
1341  return SDValue();
1342 }
1343 
1344 /// Promote the specified integer shift operation if the target indicates it is
1345 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1346 /// i32 since i16 instructions are longer.
1347 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1348  if (!LegalOperations)
1349  return SDValue();
1350 
1351  EVT VT = Op.getValueType();
1352  if (VT.isVector() || !VT.isInteger())
1353  return SDValue();
1354 
1355  // If operation type is 'undesirable', e.g. i16 on x86, consider
1356  // promoting it.
1357  unsigned Opc = Op.getOpcode();
1358  if (TLI.isTypeDesirableForOp(Opc, VT))
1359  return SDValue();
1360 
1361  EVT PVT = VT;
1362  // Consult target whether it is a good idea to promote this operation and
1363  // what's the right type to promote it to.
1364  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1365  assert(PVT != VT && "Don't know what type to promote to!");
1366 
1367  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1368 
1369  bool Replace = false;
1370  SDValue N0 = Op.getOperand(0);
1371  SDValue N1 = Op.getOperand(1);
1372  if (Opc == ISD::SRA)
1373  N0 = SExtPromoteOperand(N0, PVT);
1374  else if (Opc == ISD::SRL)
1375  N0 = ZExtPromoteOperand(N0, PVT);
1376  else
1377  N0 = PromoteOperand(N0, PVT, Replace);
1378 
1379  if (!N0.getNode())
1380  return SDValue();
1381 
1382  SDLoc DL(Op);
1383  SDValue RV =
1384  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1385 
1386  if (Replace)
1387  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1388 
1389  // Deal with Op being deleted.
1390  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1391  return RV;
1392  }
1393  return SDValue();
1394 }
1395 
1396 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1397  if (!LegalOperations)
1398  return SDValue();
1399 
1400  EVT VT = Op.getValueType();
1401  if (VT.isVector() || !VT.isInteger())
1402  return SDValue();
1403 
1404  // If operation type is 'undesirable', e.g. i16 on x86, consider
1405  // promoting it.
1406  unsigned Opc = Op.getOpcode();
1407  if (TLI.isTypeDesirableForOp(Opc, VT))
1408  return SDValue();
1409 
1410  EVT PVT = VT;
1411  // Consult target whether it is a good idea to promote this operation and
1412  // what's the right type to promote it to.
1413  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1414  assert(PVT != VT && "Don't know what type to promote to!");
1415  // fold (aext (aext x)) -> (aext x)
1416  // fold (aext (zext x)) -> (zext x)
1417  // fold (aext (sext x)) -> (sext x)
1418  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1419  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1420  }
1421  return SDValue();
1422 }
1423 
1424 bool DAGCombiner::PromoteLoad(SDValue Op) {
1425  if (!LegalOperations)
1426  return false;
1427 
1428  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1429  return false;
1430 
1431  EVT VT = Op.getValueType();
1432  if (VT.isVector() || !VT.isInteger())
1433  return false;
1434 
1435  // If operation type is 'undesirable', e.g. i16 on x86, consider
1436  // promoting it.
1437  unsigned Opc = Op.getOpcode();
1438  if (TLI.isTypeDesirableForOp(Opc, VT))
1439  return false;
1440 
1441  EVT PVT = VT;
1442  // Consult target whether it is a good idea to promote this operation and
1443  // what's the right type to promote it to.
1444  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1445  assert(PVT != VT && "Don't know what type to promote to!");
1446 
1447  SDLoc DL(Op);
1448  SDNode *N = Op.getNode();
1449  LoadSDNode *LD = cast<LoadSDNode>(N);
1450  EVT MemVT = LD->getMemoryVT();
1452  : LD->getExtensionType();
1453  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1454  LD->getChain(), LD->getBasePtr(),
1455  MemVT, LD->getMemOperand());
1456  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1457 
1458  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1459  Result.getNode()->dump(&DAG); dbgs() << '\n');
1460  WorklistRemover DeadNodes(*this);
1461  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1462  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1463  deleteAndRecombine(N);
1464  AddToWorklist(Result.getNode());
1465  return true;
1466  }
1467  return false;
1468 }
1469 
1470 /// Recursively delete a node which has no uses and any operands for
1471 /// which it is the only use.
1472 ///
1473 /// Note that this both deletes the nodes and removes them from the worklist.
1474 /// It also adds any nodes who have had a user deleted to the worklist as they
1475 /// may now have only one use and subject to other combines.
1476 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1477  if (!N->use_empty())
1478  return false;
1479 
1481  Nodes.insert(N);
1482  do {
1483  N = Nodes.pop_back_val();
1484  if (!N)
1485  continue;
1486 
1487  if (N->use_empty()) {
1488  for (const SDValue &ChildN : N->op_values())
1489  Nodes.insert(ChildN.getNode());
1490 
1492  DAG.DeleteNode(N);
1493  } else {
1494  AddToWorklist(N);
1495  }
1496  } while (!Nodes.empty());
1497  return true;
1498 }
1499 
1500 //===----------------------------------------------------------------------===//
1501 // Main DAG Combiner implementation
1502 //===----------------------------------------------------------------------===//
1503 
1504 void DAGCombiner::Run(CombineLevel AtLevel) {
1505  // set the instance variables, so that the various visit routines may use it.
1506  Level = AtLevel;
1507  LegalDAG = Level >= AfterLegalizeDAG;
1508  LegalOperations = Level >= AfterLegalizeVectorOps;
1509  LegalTypes = Level >= AfterLegalizeTypes;
1510 
1511  WorklistInserter AddNodes(*this);
1512 
1513  // Add all the dag nodes to the worklist.
1514  for (SDNode &Node : DAG.allnodes())
1515  AddToWorklist(&Node);
1516 
1517  // Create a dummy node (which is not added to allnodes), that adds a reference
1518  // to the root node, preventing it from being deleted, and tracking any
1519  // changes of the root.
1520  HandleSDNode Dummy(DAG.getRoot());
1521 
1522  // While we have a valid worklist entry node, try to combine it.
1523  while (SDNode *N = getNextWorklistEntry()) {
1524  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1525  // N is deleted from the DAG, since they too may now be dead or may have a
1526  // reduced number of uses, allowing other xforms.
1527  if (recursivelyDeleteUnusedNodes(N))
1528  continue;
1529 
1530  WorklistRemover DeadNodes(*this);
1531 
1532  // If this combine is running after legalizing the DAG, re-legalize any
1533  // nodes pulled off the worklist.
1534  if (LegalDAG) {
1535  SmallSetVector<SDNode *, 16> UpdatedNodes;
1536  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1537 
1538  for (SDNode *LN : UpdatedNodes)
1539  AddToWorklistWithUsers(LN);
1540 
1541  if (!NIsValid)
1542  continue;
1543  }
1544 
1545  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1546 
1547  // Add any operands of the new node which have not yet been combined to the
1548  // worklist as well. Because the worklist uniques things already, this
1549  // won't repeatedly process the same operand.
1550  CombinedNodes.insert(N);
1551  for (const SDValue &ChildN : N->op_values())
1552  if (!CombinedNodes.count(ChildN.getNode()))
1553  AddToWorklist(ChildN.getNode());
1554 
1555  SDValue RV = combine(N);
1556 
1557  if (!RV.getNode())
1558  continue;
1559 
1560  ++NodesCombined;
1561 
1562  // If we get back the same node we passed in, rather than a new node or
1563  // zero, we know that the node must have defined multiple values and
1564  // CombineTo was used. Since CombineTo takes care of the worklist
1565  // mechanics for us, we have no work to do in this case.
1566  if (RV.getNode() == N)
1567  continue;
1568 
1569  assert(N->getOpcode() != ISD::DELETED_NODE &&
1570  RV.getOpcode() != ISD::DELETED_NODE &&
1571  "Node was deleted but visit returned new node!");
1572 
1573  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1574 
1575  if (N->getNumValues() == RV.getNode()->getNumValues())
1576  DAG.ReplaceAllUsesWith(N, RV.getNode());
1577  else {
1578  assert(N->getValueType(0) == RV.getValueType() &&
1579  N->getNumValues() == 1 && "Type mismatch");
1580  DAG.ReplaceAllUsesWith(N, &RV);
1581  }
1582 
1583  // Push the new node and any users onto the worklist. Omit this if the
1584  // new node is the EntryToken (e.g. if a store managed to get optimized
1585  // out), because re-visiting the EntryToken and its users will not uncover
1586  // any additional opportunities, but there may be a large number of such
1587  // users, potentially causing compile time explosion.
1588  if (RV.getOpcode() != ISD::EntryToken) {
1589  AddToWorklist(RV.getNode());
1590  AddUsersToWorklist(RV.getNode());
1591  }
1592 
1593  // Finally, if the node is now dead, remove it from the graph. The node
1594  // may not be dead if the replacement process recursively simplified to
1595  // something else needing this node. This will also take care of adding any
1596  // operands which have lost a user to the worklist.
1597  recursivelyDeleteUnusedNodes(N);
1598  }
1599 
1600  // If the root changed (e.g. it was a dead load, update the root).
1601  DAG.setRoot(Dummy.getValue());
1602  DAG.RemoveDeadNodes();
1603 }
1604 
1605 SDValue DAGCombiner::visit(SDNode *N) {
1606  switch (N->getOpcode()) {
1607  default: break;
1608  case ISD::TokenFactor: return visitTokenFactor(N);
1609  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1610  case ISD::ADD: return visitADD(N);
1611  case ISD::SUB: return visitSUB(N);
1612  case ISD::SADDSAT:
1613  case ISD::UADDSAT: return visitADDSAT(N);
1614  case ISD::SSUBSAT:
1615  case ISD::USUBSAT: return visitSUBSAT(N);
1616  case ISD::ADDC: return visitADDC(N);
1617  case ISD::SADDO:
1618  case ISD::UADDO: return visitADDO(N);
1619  case ISD::SUBC: return visitSUBC(N);
1620  case ISD::SSUBO:
1621  case ISD::USUBO: return visitSUBO(N);
1622  case ISD::ADDE: return visitADDE(N);
1623  case ISD::ADDCARRY: return visitADDCARRY(N);
1624  case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1625  case ISD::SUBE: return visitSUBE(N);
1626  case ISD::SUBCARRY: return visitSUBCARRY(N);
1627  case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1628  case ISD::SMULFIX:
1629  case ISD::SMULFIXSAT:
1630  case ISD::UMULFIX:
1631  case ISD::UMULFIXSAT: return visitMULFIX(N);
1632  case ISD::MUL: return visitMUL(N);
1633  case ISD::SDIV: return visitSDIV(N);
1634  case ISD::UDIV: return visitUDIV(N);
1635  case ISD::SREM:
1636  case ISD::UREM: return visitREM(N);
1637  case ISD::MULHU: return visitMULHU(N);
1638  case ISD::MULHS: return visitMULHS(N);
1639  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1640  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1641  case ISD::SMULO:
1642  case ISD::UMULO: return visitMULO(N);
1643  case ISD::SMIN:
1644  case ISD::SMAX:
1645  case ISD::UMIN:
1646  case ISD::UMAX: return visitIMINMAX(N);
1647  case ISD::AND: return visitAND(N);
1648  case ISD::OR: return visitOR(N);
1649  case ISD::XOR: return visitXOR(N);
1650  case ISD::SHL: return visitSHL(N);
1651  case ISD::SRA: return visitSRA(N);
1652  case ISD::SRL: return visitSRL(N);
1653  case ISD::ROTR:
1654  case ISD::ROTL: return visitRotate(N);
1655  case ISD::FSHL:
1656  case ISD::FSHR: return visitFunnelShift(N);
1657  case ISD::ABS: return visitABS(N);
1658  case ISD::BSWAP: return visitBSWAP(N);
1659  case ISD::BITREVERSE: return visitBITREVERSE(N);
1660  case ISD::CTLZ: return visitCTLZ(N);
1661  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1662  case ISD::CTTZ: return visitCTTZ(N);
1663  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1664  case ISD::CTPOP: return visitCTPOP(N);
1665  case ISD::SELECT: return visitSELECT(N);
1666  case ISD::VSELECT: return visitVSELECT(N);
1667  case ISD::SELECT_CC: return visitSELECT_CC(N);
1668  case ISD::SETCC: return visitSETCC(N);
1669  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1670  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1671  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1672  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1673  case ISD::AssertSext:
1674  case ISD::AssertZext: return visitAssertExt(N);
1675  case ISD::AssertAlign: return visitAssertAlign(N);
1676  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1677  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1678  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1679  case ISD::TRUNCATE: return visitTRUNCATE(N);
1680  case ISD::BITCAST: return visitBITCAST(N);
1681  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1682  case ISD::FADD: return visitFADD(N);
1683  case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1684  case ISD::FSUB: return visitFSUB(N);
1685  case ISD::FMUL: return visitFMUL(N);
1686  case ISD::FMA: return visitFMA(N);
1687  case ISD::FDIV: return visitFDIV(N);
1688  case ISD::FREM: return visitFREM(N);
1689  case ISD::FSQRT: return visitFSQRT(N);
1690  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1691  case ISD::FPOW: return visitFPOW(N);
1692  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1693  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1694  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1695  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1696  case ISD::FP_ROUND: return visitFP_ROUND(N);
1697  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1698  case ISD::FNEG: return visitFNEG(N);
1699  case ISD::FABS: return visitFABS(N);
1700  case ISD::FFLOOR: return visitFFLOOR(N);
1701  case ISD::FMINNUM: return visitFMINNUM(N);
1702  case ISD::FMAXNUM: return visitFMAXNUM(N);
1703  case ISD::FMINIMUM: return visitFMINIMUM(N);
1704  case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1705  case ISD::FCEIL: return visitFCEIL(N);
1706  case ISD::FTRUNC: return visitFTRUNC(N);
1707  case ISD::BRCOND: return visitBRCOND(N);
1708  case ISD::BR_CC: return visitBR_CC(N);
1709  case ISD::LOAD: return visitLOAD(N);
1710  case ISD::STORE: return visitSTORE(N);
1711  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1712  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1713  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1714  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1715  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1716  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1717  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1718  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1719  case ISD::MGATHER: return visitMGATHER(N);
1720  case ISD::MLOAD: return visitMLOAD(N);
1721  case ISD::MSCATTER: return visitMSCATTER(N);
1722  case ISD::MSTORE: return visitMSTORE(N);
1723  case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1724  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1725  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1726  case ISD::FREEZE: return visitFREEZE(N);
1727  case ISD::VECREDUCE_FADD:
1728  case ISD::VECREDUCE_FMUL:
1729  case ISD::VECREDUCE_ADD:
1730  case ISD::VECREDUCE_MUL:
1731  case ISD::VECREDUCE_AND:
1732  case ISD::VECREDUCE_OR:
1733  case ISD::VECREDUCE_XOR:
1734  case ISD::VECREDUCE_SMAX:
1735  case ISD::VECREDUCE_SMIN:
1736  case ISD::VECREDUCE_UMAX:
1737  case ISD::VECREDUCE_UMIN:
1738  case ISD::VECREDUCE_FMAX:
1739  case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1740  }
1741  return SDValue();
1742 }
1743 
1745  SDValue RV;
1746  if (!DisableGenericCombines)
1747  RV = visit(N);
1748 
1749  // If nothing happened, try a target-specific DAG combine.
1750  if (!RV.getNode()) {
1751  assert(N->getOpcode() != ISD::DELETED_NODE &&
1752  "Node was deleted but visit returned NULL!");
1753 
1754  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1755  TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1756 
1757  // Expose the DAG combiner to the target combiner impls.
1759  DagCombineInfo(DAG, Level, false, this);
1760 
1761  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1762  }
1763  }
1764 
1765  // If nothing happened still, try promoting the operation.
1766  if (!RV.getNode()) {
1767  switch (N->getOpcode()) {
1768  default: break;
1769  case ISD::ADD:
1770  case ISD::SUB:
1771  case ISD::MUL:
1772  case ISD::AND:
1773  case ISD::OR:
1774  case ISD::XOR:
1775  RV = PromoteIntBinOp(SDValue(N, 0));
1776  break;
1777  case ISD::SHL:
1778  case ISD::SRA:
1779  case ISD::SRL:
1780  RV = PromoteIntShiftOp(SDValue(N, 0));
1781  break;
1782  case ISD::SIGN_EXTEND:
1783  case ISD::ZERO_EXTEND:
1784  case ISD::ANY_EXTEND:
1785  RV = PromoteExtend(SDValue(N, 0));
1786  break;
1787  case ISD::LOAD:
1788  if (PromoteLoad(SDValue(N, 0)))
1789  RV = SDValue(N, 0);
1790  break;
1791  }
1792  }
1793 
1794  // If N is a commutative binary node, try to eliminate it if the commuted
1795  // version is already present in the DAG.
1796  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1797  N->getNumValues() == 1) {
1798  SDValue N0 = N->getOperand(0);
1799  SDValue N1 = N->getOperand(1);
1800 
1801  // Constant operands are canonicalized to RHS.
1802  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1803  SDValue Ops[] = {N1, N0};
1804  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1805  N->getFlags());
1806  if (CSENode)
1807  return SDValue(CSENode, 0);
1808  }
1809  }
1810 
1811  return RV;
1812 }
1813 
1814 /// Given a node, return its input chain if it has one, otherwise return a null
1815 /// sd operand.
1817  if (unsigned NumOps = N->getNumOperands()) {
1818  if (N->getOperand(0).getValueType() == MVT::Other)
1819  return N->getOperand(0);
1820  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1821  return N->getOperand(NumOps-1);
1822  for (unsigned i = 1; i < NumOps-1; ++i)
1823  if (N->getOperand(i).getValueType() == MVT::Other)
1824  return N->getOperand(i);
1825  }
1826  return SDValue();
1827 }
1828 
1829 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1830  // If N has two operands, where one has an input chain equal to the other,
1831  // the 'other' chain is redundant.
1832  if (N->getNumOperands() == 2) {
1833  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1834  return N->getOperand(0);
1835  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1836  return N->getOperand(1);
1837  }
1838 
1839  // Don't simplify token factors if optnone.
1840  if (OptLevel == CodeGenOpt::None)
1841  return SDValue();
1842 
1843  // Don't simplify the token factor if the node itself has too many operands.
1844  if (N->getNumOperands() > TokenFactorInlineLimit)
1845  return SDValue();
1846 
1847  // If the sole user is a token factor, we should make sure we have a
1848  // chance to merge them together. This prevents TF chains from inhibiting
1849  // optimizations.
1850  if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1851  AddToWorklist(*(N->use_begin()));
1852 
1853  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1854  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1855  SmallPtrSet<SDNode*, 16> SeenOps;
1856  bool Changed = false; // If we should replace this token factor.
1857 
1858  // Start out with this token factor.
1859  TFs.push_back(N);
1860 
1861  // Iterate through token factors. The TFs grows when new token factors are
1862  // encountered.
1863  for (unsigned i = 0; i < TFs.size(); ++i) {
1864  // Limit number of nodes to inline, to avoid quadratic compile times.
1865  // We have to add the outstanding Token Factors to Ops, otherwise we might
1866  // drop Ops from the resulting Token Factors.
1867  if (Ops.size() > TokenFactorInlineLimit) {
1868  for (unsigned j = i; j < TFs.size(); j++)
1869  Ops.emplace_back(TFs[j], 0);
1870  // Drop unprocessed Token Factors from TFs, so we do not add them to the
1871  // combiner worklist later.
1872  TFs.resize(i);
1873  break;
1874  }
1875 
1876  SDNode *TF = TFs[i];
1877  // Check each of the operands.
1878  for (const SDValue &Op : TF->op_values()) {
1879  switch (Op.getOpcode()) {
1880  case ISD::EntryToken:
1881  // Entry tokens don't need to be added to the list. They are
1882  // redundant.
1883  Changed = true;
1884  break;
1885 
1886  case ISD::TokenFactor:
1887  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1888  // Queue up for processing.
1889  TFs.push_back(Op.getNode());
1890  Changed = true;
1891  break;
1892  }
1894 
1895  default:
1896  // Only add if it isn't already in the list.
1897  if (SeenOps.insert(Op.getNode()).second)
1898  Ops.push_back(Op);
1899  else
1900  Changed = true;
1901  break;
1902  }
1903  }
1904  }
1905 
1906  // Re-visit inlined Token Factors, to clean them up in case they have been
1907  // removed. Skip the first Token Factor, as this is the current node.
1908  for (unsigned i = 1, e = TFs.size(); i < e; i++)
1909  AddToWorklist(TFs[i]);
1910 
1911  // Remove Nodes that are chained to another node in the list. Do so
1912  // by walking up chains breath-first stopping when we've seen
1913  // another operand. In general we must climb to the EntryNode, but we can exit
1914  // early if we find all remaining work is associated with just one operand as
1915  // no further pruning is possible.
1916 
1917  // List of nodes to search through and original Ops from which they originate.
1919  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1920  SmallPtrSet<SDNode *, 16> SeenChains;
1921  bool DidPruneOps = false;
1922 
1923  unsigned NumLeftToConsider = 0;
1924  for (const SDValue &Op : Ops) {
1925  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1926  OpWorkCount.push_back(1);
1927  }
1928 
1929  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1930  // If this is an Op, we can remove the op from the list. Remark any
1931  // search associated with it as from the current OpNumber.
1932  if (SeenOps.contains(Op)) {
1933  Changed = true;
1934  DidPruneOps = true;
1935  unsigned OrigOpNumber = 0;
1936  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1937  OrigOpNumber++;
1938  assert((OrigOpNumber != Ops.size()) &&
1939  "expected to find TokenFactor Operand");
1940  // Re-mark worklist from OrigOpNumber to OpNumber
1941  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1942  if (Worklist[i].second == OrigOpNumber) {
1943  Worklist[i].second = OpNumber;
1944  }
1945  }
1946  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1947  OpWorkCount[OrigOpNumber] = 0;
1948  NumLeftToConsider--;
1949  }
1950  // Add if it's a new chain
1951  if (SeenChains.insert(Op).second) {
1952  OpWorkCount[OpNumber]++;
1953  Worklist.push_back(std::make_pair(Op, OpNumber));
1954  }
1955  };
1956 
1957  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1958  // We need at least be consider at least 2 Ops to prune.
1959  if (NumLeftToConsider <= 1)
1960  break;
1961  auto CurNode = Worklist[i].first;
1962  auto CurOpNumber = Worklist[i].second;
1963  assert((OpWorkCount[CurOpNumber] > 0) &&
1964  "Node should not appear in worklist");
1965  switch (CurNode->getOpcode()) {
1966  case ISD::EntryToken:
1967  // Hitting EntryToken is the only way for the search to terminate without
1968  // hitting
1969  // another operand's search. Prevent us from marking this operand
1970  // considered.
1971  NumLeftToConsider++;
1972  break;
1973  case ISD::TokenFactor:
1974  for (const SDValue &Op : CurNode->op_values())
1975  AddToWorklist(i, Op.getNode(), CurOpNumber);
1976  break;
1977  case ISD::LIFETIME_START:
1978  case ISD::LIFETIME_END:
1979  case ISD::CopyFromReg:
1980  case ISD::CopyToReg:
1981  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1982  break;
1983  default:
1984  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1985  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1986  break;
1987  }
1988  OpWorkCount[CurOpNumber]--;
1989  if (OpWorkCount[CurOpNumber] == 0)
1990  NumLeftToConsider--;
1991  }
1992 
1993  // If we've changed things around then replace token factor.
1994  if (Changed) {
1995  SDValue Result;
1996  if (Ops.empty()) {
1997  // The entry token is the only possible outcome.
1998  Result = DAG.getEntryNode();
1999  } else {
2000  if (DidPruneOps) {
2001  SmallVector<SDValue, 8> PrunedOps;
2002  //
2003  for (const SDValue &Op : Ops) {
2004  if (SeenChains.count(Op.getNode()) == 0)
2005  PrunedOps.push_back(Op);
2006  }
2007  Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2008  } else {
2009  Result = DAG.getTokenFactor(SDLoc(N), Ops);
2010  }
2011  }
2012  return Result;
2013  }
2014  return SDValue();
2015 }
2016 
2017 /// MERGE_VALUES can always be eliminated.
2018 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2019  WorklistRemover DeadNodes(*this);
2020  // Replacing results may cause a different MERGE_VALUES to suddenly
2021  // be CSE'd with N, and carry its uses with it. Iterate until no
2022  // uses remain, to ensure that the node can be safely deleted.
2023  // First add the users of this node to the work list so that they
2024  // can be tried again once they have new operands.
2025  AddUsersToWorklist(N);
2026  do {
2027  // Do as a single replacement to avoid rewalking use lists.
2029  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2030  Ops.push_back(N->getOperand(i));
2031  DAG.ReplaceAllUsesWith(N, Ops.data());
2032  } while (!N->use_empty());
2033  deleteAndRecombine(N);
2034  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2035 }
2036 
2037 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2038 /// ConstantSDNode pointer else nullptr.
2040  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2041  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2042 }
2043 
2044 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2045 /// and that N may be folded in the load / store addressing mode.
2047  const TargetLowering &TLI) {
2048  EVT VT;
2049  unsigned AS;
2050 
2051  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2052  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2053  return false;
2054  VT = LD->getMemoryVT();
2055  AS = LD->getAddressSpace();
2056  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2057  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2058  return false;
2059  VT = ST->getMemoryVT();
2060  AS = ST->getAddressSpace();
2061  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2062  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2063  return false;
2064  VT = LD->getMemoryVT();
2065  AS = LD->getAddressSpace();
2066  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2067  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2068  return false;
2069  VT = ST->getMemoryVT();
2070  AS = ST->getAddressSpace();
2071  } else
2072  return false;
2073 
2075  if (N->getOpcode() == ISD::ADD) {
2076  AM.HasBaseReg = true;
2077  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2078  if (Offset)
2079  // [reg +/- imm]
2080  AM.BaseOffs = Offset->getSExtValue();
2081  else
2082  // [reg +/- reg]
2083  AM.Scale = 1;
2084  } else if (N->getOpcode() == ISD::SUB) {
2085  AM.HasBaseReg = true;
2086  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2087  if (Offset)
2088  // [reg +/- imm]
2089  AM.BaseOffs = -Offset->getSExtValue();
2090  else
2091  // [reg +/- reg]
2092  AM.Scale = 1;
2093  } else
2094  return false;
2095 
2096  return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2097  VT.getTypeForEVT(*DAG.getContext()), AS);
2098 }
2099 
2100 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2101  assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2102  "Unexpected binary operator");
2103 
2104  // Don't do this unless the old select is going away. We want to eliminate the
2105  // binary operator, not replace a binop with a select.
2106  // TODO: Handle ISD::SELECT_CC.
2107  unsigned SelOpNo = 0;
2108  SDValue Sel = BO->getOperand(0);
2109  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2110  SelOpNo = 1;
2111  Sel = BO->getOperand(1);
2112  }
2113 
2114  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2115  return SDValue();
2116 
2117  SDValue CT = Sel.getOperand(1);
2118  if (!isConstantOrConstantVector(CT, true) &&
2120  return SDValue();
2121 
2122  SDValue CF = Sel.getOperand(2);
2123  if (!isConstantOrConstantVector(CF, true) &&
2125  return SDValue();
2126 
2127  // Bail out if any constants are opaque because we can't constant fold those.
2128  // The exception is "and" and "or" with either 0 or -1 in which case we can
2129  // propagate non constant operands into select. I.e.:
2130  // and (select Cond, 0, -1), X --> select Cond, 0, X
2131  // or X, (select Cond, -1, 0) --> select Cond, -1, X
2132  auto BinOpcode = BO->getOpcode();
2133  bool CanFoldNonConst =
2134  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2137 
2138  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2139  if (!CanFoldNonConst &&
2140  !isConstantOrConstantVector(CBO, true) &&
2142  return SDValue();
2143 
2144  EVT VT = BO->getValueType(0);
2145 
2146  // We have a select-of-constants followed by a binary operator with a
2147  // constant. Eliminate the binop by pulling the constant math into the select.
2148  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2149  SDLoc DL(Sel);
2150  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2151  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2152  if (!CanFoldNonConst && !NewCT.isUndef() &&
2153  !isConstantOrConstantVector(NewCT, true) &&
2155  return SDValue();
2156 
2157  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2158  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2159  if (!CanFoldNonConst && !NewCF.isUndef() &&
2160  !isConstantOrConstantVector(NewCF, true) &&
2162  return SDValue();
2163 
2164  SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2165  SelectOp->setFlags(BO->getFlags());
2166  return SelectOp;
2167 }
2168 
2170  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2171  "Expecting add or sub");
2172 
2173  // Match a constant operand and a zext operand for the math instruction:
2174  // add Z, C
2175  // sub C, Z
2176  bool IsAdd = N->getOpcode() == ISD::ADD;
2177  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2178  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2179  auto *CN = dyn_cast<ConstantSDNode>(C);
2180  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2181  return SDValue();
2182 
2183  // Match the zext operand as a setcc of a boolean.
2184  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2185  Z.getOperand(0).getValueType() != MVT::i1)
2186  return SDValue();
2187 
2188  // Match the compare as: setcc (X & 1), 0, eq.
2189  SDValue SetCC = Z.getOperand(0);
2190  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2191  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2192  SetCC.getOperand(0).getOpcode() != ISD::AND ||
2193  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2194  return SDValue();
2195 
2196  // We are adding/subtracting a constant and an inverted low bit. Turn that
2197  // into a subtract/add of the low bit with incremented/decremented constant:
2198  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2199  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2200  EVT VT = C.getValueType();
2201  SDLoc DL(N);
2202  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2203  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2204  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2205  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2206 }
2207 
2208 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2209 /// a shift and add with a different constant.
2211  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2212  "Expecting add or sub");
2213 
2214  // We need a constant operand for the add/sub, and the other operand is a
2215  // logical shift right: add (srl), C or sub C, (srl).
2216  bool IsAdd = N->getOpcode() == ISD::ADD;
2217  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2218  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2219  if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2220  ShiftOp.getOpcode() != ISD::SRL)
2221  return SDValue();
2222 
2223  // The shift must be of a 'not' value.
2224  SDValue Not = ShiftOp.getOperand(0);
2225  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2226  return SDValue();
2227 
2228  // The shift must be moving the sign bit to the least-significant-bit.
2229  EVT VT = ShiftOp.getValueType();
2230  SDValue ShAmt = ShiftOp.getOperand(1);
2231  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2232  if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2233  return SDValue();
2234 
2235  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2236  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2237  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2238  SDLoc DL(N);
2239  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2240  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2241  if (SDValue NewC =
2242  DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2243  {ConstantOp, DAG.getConstant(1, DL, VT)}))
2244  return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2245  return SDValue();
2246 }
2247 
2248 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2249 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2250 /// are no common bits set in the operands).
2251 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2252  SDValue N0 = N->getOperand(0);
2253  SDValue N1 = N->getOperand(1);
2254  EVT VT = N0.getValueType();
2255  SDLoc DL(N);
2256 
2257  // fold vector ops
2258  if (VT.isVector()) {
2259  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2260  return FoldedVOp;
2261 
2262  // fold (add x, 0) -> x, vector edition
2264  return N0;
2266  return N1;
2267  }
2268 
2269  // fold (add x, undef) -> undef
2270  if (N0.isUndef())
2271  return N0;
2272 
2273  if (N1.isUndef())
2274  return N1;
2275 
2277  // canonicalize constant to RHS
2279  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2280  // fold (add c1, c2) -> c1+c2
2281  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2282  }
2283 
2284  // fold (add x, 0) -> x
2285  if (isNullConstant(N1))
2286  return N0;
2287 
2288  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2289  // fold ((A-c1)+c2) -> (A+(c2-c1))
2290  if (N0.getOpcode() == ISD::SUB &&
2291  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2292  SDValue Sub =
2293  DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2294  assert(Sub && "Constant folding failed");
2295  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2296  }
2297 
2298  // fold ((c1-A)+c2) -> (c1+c2)-A
2299  if (N0.getOpcode() == ISD::SUB &&
2300  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2301  SDValue Add =
2302  DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2303  assert(Add && "Constant folding failed");
2304  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2305  }
2306 
2307  // add (sext i1 X), 1 -> zext (not i1 X)
2308  // We don't transform this pattern:
2309  // add (zext i1 X), -1 -> sext (not i1 X)
2310  // because most (?) targets generate better code for the zext form.
2311  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2312  isOneOrOneSplat(N1)) {
2313  SDValue X = N0.getOperand(0);
2314  if ((!LegalOperations ||
2315  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2316  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2317  X.getScalarValueSizeInBits() == 1) {
2318  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2319  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2320  }
2321  }
2322 
2323  // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2324  // equivalent to (add x, c0).
2325  if (N0.getOpcode() == ISD::OR &&
2326  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2327  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2328  if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2329  {N1, N0.getOperand(1)}))
2330  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2331  }
2332  }
2333 
2334  if (SDValue NewSel = foldBinOpIntoSelect(N))
2335  return NewSel;
2336 
2337  // reassociate add
2338  if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2339  if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2340  return RADD;
2341  }
2342  // fold ((0-A) + B) -> B-A
2343  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2344  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2345 
2346  // fold (A + (0-B)) -> A-B
2347  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2348  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2349 
2350  // fold (A+(B-A)) -> B
2351  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2352  return N1.getOperand(0);
2353 
2354  // fold ((B-A)+A) -> B
2355  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2356  return N0.getOperand(0);
2357 
2358  // fold ((A-B)+(C-A)) -> (C-B)
2359  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2360  N0.getOperand(0) == N1.getOperand(1))
2361  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2362  N0.getOperand(1));
2363 
2364  // fold ((A-B)+(B-C)) -> (A-C)
2365  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2366  N0.getOperand(1) == N1.getOperand(0))
2367  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2368  N1.getOperand(1));
2369 
2370  // fold (A+(B-(A+C))) to (B-C)
2371  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2372  N0 == N1.getOperand(1).getOperand(0))
2373  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2374  N1.getOperand(1).getOperand(1));
2375 
2376  // fold (A+(B-(C+A))) to (B-C)
2377  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2378  N0 == N1.getOperand(1).getOperand(1))
2379  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2380  N1.getOperand(1).getOperand(0));
2381 
2382  // fold (A+((B-A)+or-C)) to (B+or-C)
2383  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2384  N1.getOperand(0).getOpcode() == ISD::SUB &&
2385  N0 == N1.getOperand(0).getOperand(1))
2386  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2387  N1.getOperand(1));
2388 
2389  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2390  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2391  SDValue N00 = N0.getOperand(0);
2392  SDValue N01 = N0.getOperand(1);
2393  SDValue N10 = N1.getOperand(0);
2394  SDValue N11 = N1.getOperand(1);
2395 
2397  return DAG.getNode(ISD::SUB, DL, VT,
2398  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2399  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2400  }
2401 
2402  // fold (add (umax X, C), -C) --> (usubsat X, C)
2403  if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2404  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2405  return (!Max && !Op) ||
2406  (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2407  };
2408  if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2409  /*AllowUndefs*/ true))
2410  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2411  N0.getOperand(1));
2412  }
2413 
2414  if (SimplifyDemandedBits(SDValue(N, 0)))
2415  return SDValue(N, 0);
2416 
2417  if (isOneOrOneSplat(N1)) {
2418  // fold (add (xor a, -1), 1) -> (sub 0, a)
2419  if (isBitwiseNot(N0))
2420  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2421  N0.getOperand(0));
2422 
2423  // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2424  if (N0.getOpcode() == ISD::ADD ||
2425  N0.getOpcode() == ISD::UADDO ||
2426  N0.getOpcode() == ISD::SADDO) {
2427  SDValue A, Xor;
2428 
2429  if (isBitwiseNot(N0.getOperand(0))) {
2430  A = N0.getOperand(1);
2431  Xor = N0.getOperand(0);
2432  } else if (isBitwiseNot(N0.getOperand(1))) {
2433  A = N0.getOperand(0);
2434  Xor = N0.getOperand(1);
2435  }
2436 
2437  if (Xor)
2438  return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2439  }
2440 
2441  // Look for:
2442  // add (add x, y), 1
2443  // And if the target does not like this form then turn into:
2444  // sub y, (xor x, -1)
2445  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2446  N0.getOpcode() == ISD::ADD) {
2447  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2448  DAG.getAllOnesConstant(DL, VT));
2449  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2450  }
2451  }
2452 
2453  // (x - y) + -1 -> add (xor y, -1), x
2454  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2456  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2457  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2458  }
2459 
2460  if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2461  return Combined;
2462 
2463  if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2464  return Combined;
2465 
2466  return SDValue();
2467 }
2468 
2469 SDValue DAGCombiner::visitADD(SDNode *N) {
2470  SDValue N0 = N->getOperand(0);
2471  SDValue N1 = N->getOperand(1);
2472  EVT VT = N0.getValueType();
2473  SDLoc DL(N);
2474 
2475  if (SDValue Combined = visitADDLike(N))
2476  return Combined;
2477 
2478  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2479  return V;
2480 
2481  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2482  return V;
2483 
2484  // fold (a+b) -> (a|b) iff a and b share no bits.
2485  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2486  DAG.haveNoCommonBitsSet(N0, N1))
2487  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2488 
2489  // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2490  if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2491  const APInt &C0 = N0->getConstantOperandAPInt(0);
2492  const APInt &C1 = N1->getConstantOperandAPInt(0);
2493  return DAG.getVScale(DL, VT, C0 + C1);
2494  }
2495 
2496  // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2497  if ((N0.getOpcode() == ISD::ADD) &&
2498  (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2499  (N1.getOpcode() == ISD::VSCALE)) {
2500  const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2501  const APInt &VS1 = N1->getConstantOperandAPInt(0);
2502  SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2503  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2504  }
2505 
2506  // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2507  if (N0.getOpcode() == ISD::STEP_VECTOR &&
2508  N1.getOpcode() == ISD::STEP_VECTOR) {
2509  const APInt &C0 = N0->getConstantOperandAPInt(0);
2510  const APInt &C1 = N1->getConstantOperandAPInt(0);
2511  EVT SVT = N0.getOperand(0).getValueType();
2512  SDValue NewStep = DAG.getConstant(C0 + C1, DL, SVT);
2513  return DAG.getStepVector(DL, VT, NewStep);
2514  }
2515 
2516  // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2517  if ((N0.getOpcode() == ISD::ADD) &&
2518  (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2519  (N1.getOpcode() == ISD::STEP_VECTOR)) {
2520  const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2521  const APInt &SV1 = N1->getConstantOperandAPInt(0);
2522  EVT SVT = N1.getOperand(0).getValueType();
2523  assert(N1.getOperand(0).getValueType() ==
2524  N0.getOperand(1)->getOperand(0).getValueType() &&
2525  "Different operand types of STEP_VECTOR.");
2526  SDValue NewStep = DAG.getConstant(SV0 + SV1, DL, SVT);
2527  SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2528  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2529  }
2530 
2531  return SDValue();
2532 }
2533 
2534 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2535  unsigned Opcode = N->getOpcode();
2536  SDValue N0 = N->getOperand(0);
2537  SDValue N1 = N->getOperand(1);
2538  EVT VT = N0.getValueType();
2539  SDLoc DL(N);
2540 
2541  // fold vector ops
2542  if (VT.isVector()) {
2543  // TODO SimplifyVBinOp
2544 
2545  // fold (add_sat x, 0) -> x, vector edition
2547  return N0;
2549  return N1;
2550  }
2551 
2552  // fold (add_sat x, undef) -> -1
2553  if (N0.isUndef() || N1.isUndef())
2554  return DAG.getAllOnesConstant(DL, VT);
2555 
2557  // canonicalize constant to RHS
2559  return DAG.getNode(Opcode, DL, VT, N1, N0);
2560  // fold (add_sat c1, c2) -> c3
2561  return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2562  }
2563 
2564  // fold (add_sat x, 0) -> x
2565  if (isNullConstant(N1))
2566  return N0;
2567 
2568  // If it cannot overflow, transform into an add.
2569  if (Opcode == ISD::UADDSAT)
2570  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2571  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2572 
2573  return SDValue();
2574 }
2575 
2576 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2577  bool Masked = false;
2578 
2579  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2580  while (true) {
2581  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2582  V = V.getOperand(0);
2583  continue;
2584  }
2585 
2586  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2587  Masked = true;
2588  V = V.getOperand(0);
2589  continue;
2590  }
2591 
2592  break;
2593  }
2594 
2595  // If this is not a carry, return.
2596  if (V.getResNo() != 1)
2597  return SDValue();
2598 
2599  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2600  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2601  return SDValue();
2602 
2603  EVT VT = V.getNode()->getValueType(0);
2604  if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2605  return SDValue();
2606 
2607  // If the result is masked, then no matter what kind of bool it is we can
2608  // return. If it isn't, then we need to make sure the bool type is either 0 or
2609  // 1 and not other values.
2610  if (Masked ||
2611  TLI.getBooleanContents(V.getValueType()) ==
2613  return V;
2614 
2615  return SDValue();
2616 }
2617 
2618 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2619 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2620 /// the opcode and bypass the mask operation.
2621 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2622  SelectionDAG &DAG, const SDLoc &DL) {
2623  if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2624  return SDValue();
2625 
2626  EVT VT = N0.getValueType();
2627  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2628  return SDValue();
2629 
2630  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2631  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2632  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2633 }
2634 
2635 /// Helper for doing combines based on N0 and N1 being added to each other.
2636 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2637  SDNode *LocReference) {
2638  EVT VT = N0.getValueType();
2639  SDLoc DL(LocReference);
2640 
2641  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2642  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2644  return DAG.getNode(ISD::SUB, DL, VT, N0,
2645  DAG.getNode(ISD::SHL, DL, VT,
2646  N1.getOperand(0).getOperand(1),
2647  N1.getOperand(1)));
2648 
2649  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2650  return V;
2651 
2652  // Look for:
2653  // add (add x, 1), y
2654  // And if the target does not like this form then turn into:
2655  // sub y, (xor x, -1)
2656  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2657  N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2658  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2659  DAG.getAllOnesConstant(DL, VT));
2660  return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2661  }
2662 
2663  // Hoist one-use subtraction by non-opaque constant:
2664  // (x - C) + y -> (x + y) - C
2665  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2666  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2667  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2668  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2669  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2670  }
2671  // Hoist one-use subtraction from non-opaque constant:
2672  // (C - x) + y -> (y - x) + C
2673  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2674  isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2675  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2676  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2677  }
2678 
2679  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2680  // rather than 'add 0/-1' (the zext should get folded).
2681  // add (sext i1 Y), X --> sub X, (zext i1 Y)
2682  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2683  N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2685  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2686  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2687  }
2688 
2689  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2690  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2691  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2692  if (TN->getVT() == MVT::i1) {
2693  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2694  DAG.getConstant(1, DL, VT));
2695  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2696  }
2697  }
2698 
2699  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2700  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2701  N1.getResNo() == 0)
2702  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2703  N0, N1.getOperand(0), N1.getOperand(2));
2704 
2705  // (add X, Carry) -> (addcarry X, 0, Carry)
2707  if (SDValue Carry = getAsCarry(TLI, N1))
2708  return DAG.getNode(ISD::ADDCARRY, DL,
2709  DAG.getVTList(VT, Carry.getValueType()), N0,
2710  DAG.getConstant(0, DL, VT), Carry);
2711 
2712  return SDValue();
2713 }
2714 
2715 SDValue DAGCombiner::visitADDC(SDNode *N) {
2716  SDValue N0 = N->getOperand(0);
2717  SDValue N1 = N->getOperand(1);
2718  EVT VT = N0.getValueType();
2719  SDLoc DL(N);
2720 
2721  // If the flag result is dead, turn this into an ADD.
2722  if (!N->hasAnyUseOfValue(1))
2723  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2725 
2726  // canonicalize constant to RHS.
2727  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2728  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2729  if (N0C && !N1C)
2730  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2731 
2732  // fold (addc x, 0) -> x + no carry out
2733  if (isNullConstant(N1))
2734  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2735  DL, MVT::Glue));
2736 
2737  // If it cannot overflow, transform into an add.
2738  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2739  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2741 
2742  return SDValue();
2743 }
2744 
2745 /**
2746  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2747  * then the flip also occurs if computing the inverse is the same cost.
2748  * This function returns an empty SDValue in case it cannot flip the boolean
2749  * without increasing the cost of the computation. If you want to flip a boolean
2750  * no matter what, use DAG.getLogicalNOT.
2751  */
2753  const TargetLowering &TLI,
2754  bool Force) {
2755  if (Force && isa<ConstantSDNode>(V))
2756  return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2757 
2758  if (V.getOpcode() != ISD::XOR)
2759  return SDValue();
2760 
2761  ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2762  if (!Const)
2763  return SDValue();
2764 
2765  EVT VT = V.getValueType();
2766 
2767  bool IsFlip = false;
2768  switch(TLI.getBooleanContents(VT)) {
2770  IsFlip = Const->isOne();
2771  break;
2773  IsFlip = Const->isAllOnesValue();
2774  break;
2776  IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2777  break;
2778  }
2779 
2780  if (IsFlip)
2781  return V.getOperand(0);
2782  if (Force)
2783  return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2784  return SDValue();
2785 }
2786 
2787 SDValue DAGCombiner::visitADDO(SDNode *N) {
2788  SDValue N0 = N->getOperand(0);
2789  SDValue N1 = N->getOperand(1);
2790  EVT VT = N0.getValueType();
2791  bool IsSigned = (ISD::SADDO == N->getOpcode());
2792 
2793  EVT CarryVT = N->getValueType(1);
2794  SDLoc DL(N);
2795 
2796  // If the flag result is dead, turn this into an ADD.
2797  if (!N->hasAnyUseOfValue(1))
2798  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2799  DAG.getUNDEF(CarryVT));
2800 
2801  // canonicalize constant to RHS.
2804  return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2805 
2806  // fold (addo x, 0) -> x + no carry out
2807  if (isNullOrNullSplat(N1))
2808  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2809 
2810  if (!IsSigned) {
2811  // If it cannot overflow, transform into an add.
2812  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2813  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2814  DAG.getConstant(0, DL, CarryVT));
2815 
2816  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2817  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2818  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2819  DAG.getConstant(0, DL, VT), N0.getOperand(0));
2820  return CombineTo(
2821  N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2822  }
2823 
2824  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2825  return Combined;
2826 
2827  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2828  return Combined;
2829  }
2830 
2831  return SDValue();
2832 }
2833 
2834 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2835  EVT VT = N0.getValueType();
2836  if (VT.isVector())
2837  return SDValue();
2838 
2839  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2840  // If Y + 1 cannot overflow.
2841  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2842  SDValue Y = N1.getOperand(0);
2843  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2845  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2846  N1.getOperand(2));
2847  }
2848 
2849  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2851  if (SDValue Carry = getAsCarry(TLI, N1))
2852  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2853  DAG.getConstant(0, SDLoc(N), VT), Carry);
2854 
2855  return SDValue();
2856 }
2857 
2858 SDValue DAGCombiner::visitADDE(SDNode *N) {
2859  SDValue N0 = N->getOperand(0);
2860  SDValue N1 = N->getOperand(1);
2861  SDValue CarryIn = N->getOperand(2);
2862 
2863  // canonicalize constant to RHS
2864  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2865  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2866  if (N0C && !N1C)
2867  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2868  N1, N0, CarryIn);
2869 
2870  // fold (adde x, y, false) -> (addc x, y)
2871  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2872  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2873 
2874  return SDValue();
2875 }
2876 
2877 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2878  SDValue N0 = N->getOperand(0);
2879  SDValue N1 = N->getOperand(1);
2880  SDValue CarryIn = N->getOperand(2);
2881  SDLoc DL(N);
2882 
2883  // canonicalize constant to RHS
2884  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2885  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2886  if (N0C && !N1C)
2887  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2888 
2889  // fold (addcarry x, y, false) -> (uaddo x, y)
2890  if (isNullConstant(CarryIn)) {
2891  if (!LegalOperations ||
2892  TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2893  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2894  }
2895 
2896  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2897  if (isNullConstant(N0) && isNullConstant(N1)) {
2898  EVT VT = N0.getValueType();
2899  EVT CarryVT = CarryIn.getValueType();
2900  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2901  AddToWorklist(CarryExt.getNode());
2902  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2903  DAG.getConstant(1, DL, VT)),
2904  DAG.getConstant(0, DL, CarryVT));
2905  }
2906 
2907  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2908  return Combined;
2909 
2910  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2911  return Combined;
2912 
2913  return SDValue();
2914 }
2915 
2916 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2917  SDValue N0 = N->getOperand(0);
2918  SDValue N1 = N->getOperand(1);
2919  SDValue CarryIn = N->getOperand(2);
2920  SDLoc DL(N);
2921 
2922  // canonicalize constant to RHS
2923  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2924  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2925  if (N0C && !N1C)
2926  return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2927 
2928  // fold (saddo_carry x, y, false) -> (saddo x, y)
2929  if (isNullConstant(CarryIn)) {
2930  if (!LegalOperations ||
2931  TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2932  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2933  }
2934 
2935  return SDValue();
2936 }
2937 
2938 /**
2939  * If we are facing some sort of diamond carry propapagtion pattern try to
2940  * break it up to generate something like:
2941  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2942  *
2943  * The end result is usually an increase in operation required, but because the
2944  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2945  *
2946  * Patterns typically look something like
2947  * (uaddo A, B)
2948  * / \
2949  * Carry Sum
2950  * | \
2951  * | (addcarry *, 0, Z)
2952  * | /
2953  * \ Carry
2954  * | /
2955  * (addcarry X, *, *)
2956  *
2957  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2958  * produce a combine with a single path for carry propagation.
2959  */
2961  SDValue X, SDValue Carry0, SDValue Carry1,
2962  SDNode *N) {
2963  if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2964  return SDValue();
2965  if (Carry1.getOpcode() != ISD::UADDO)
2966  return SDValue();
2967 
2968  SDValue Z;
2969 
2970  /**
2971  * First look for a suitable Z. It will present itself in the form of
2972  * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2973  */
2974  if (Carry0.getOpcode() == ISD::ADDCARRY &&
2975  isNullConstant(Carry0.getOperand(1))) {
2976  Z = Carry0.getOperand(2);
2977  } else if (Carry0.getOpcode() == ISD::UADDO &&
2978  isOneConstant(Carry0.getOperand(1))) {
2979  EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2980  Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2981  } else {
2982  // We couldn't find a suitable Z.
2983  return SDValue();
2984  }
2985 
2986 
2987  auto cancelDiamond = [&](SDValue A,SDValue B) {
2988  SDLoc DL(N);
2989  SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2990  Combiner.AddToWorklist(NewY.getNode());
2991  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2992  DAG.getConstant(0, DL, X.getValueType()),
2993  NewY.getValue(1));
2994  };
2995 
2996  /**
2997  * (uaddo A, B)
2998  * |
2999  * Sum
3000  * |
3001  * (addcarry *, 0, Z)
3002  */
3003  if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3004  return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3005  }
3006 
3007  /**
3008  * (addcarry A, 0, Z)
3009  * |
3010  * Sum
3011  * |
3012  * (uaddo *, B)
3013  */
3014  if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3015  return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3016  }
3017 
3018  if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3019  return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3020  }
3021 
3022  return SDValue();
3023 }
3024 
3025 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3026 // match patterns like:
3027 //
3028 // (uaddo A, B) CarryIn
3029 // | \ |
3030 // | \ |
3031 // PartialSum PartialCarryOutX /
3032 // | | /
3033 // | ____|____________/
3034 // | / |
3035 // (uaddo *, *) \________
3036 // | \ \
3037 // | \ |
3038 // | PartialCarryOutY |
3039 // | \ |
3040 // | \ /
3041 // AddCarrySum | ______/
3042 // | /
3043 // CarryOut = (or *, *)
3044 //
3045 // And generate ADDCARRY (or SUBCARRY) with two result values:
3046 //
3047 // {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3048 //
3049 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3050 // a single path for carry/borrow out propagation:
3052  const TargetLowering &TLI, SDValue Carry0,
3053  SDValue Carry1, SDNode *N) {
3054  if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3055  return SDValue();
3056  unsigned Opcode = Carry0.getOpcode();
3057  if (Opcode != Carry1.getOpcode())
3058  return SDValue();
3059  if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3060  return SDValue();
3061 
3062  // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3063  // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3064  // the above ASCII art.)
3065  if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3066  Carry1.getOperand(1) != Carry0.getValue(0))
3067  std::swap(Carry0, Carry1);
3068  if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3069  Carry1.getOperand(1) != Carry0.getValue(0))
3070  return SDValue();
3071 
3072  // The carry in value must be on the righthand side for subtraction.
3073  unsigned CarryInOperandNum =
3074  Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3075  if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3076  return SDValue();
3077  SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3078 
3079  unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3080  if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3081  return SDValue();
3082 
3083  // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3084  // TODO: make getAsCarry() aware of how partial carries are merged.
3085  if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3086  return SDValue();
3087  CarryIn = CarryIn.getOperand(0);
3088  if (CarryIn.getValueType() != MVT::i1)
3089  return SDValue();
3090 
3091  SDLoc DL(N);
3092  SDValue Merged =
3093  DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3094  Carry0.getOperand(1), CarryIn);
3095 
3096  // Please note that because we have proven that the result of the UADDO/USUBO
3097  // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3098  // therefore prove that if the first UADDO/USUBO overflows, the second
3099  // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3100  // maximum value.
3101  //
3102  // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3103  // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3104  //
3105  // This is important because it means that OR and XOR can be used to merge
3106  // carry flags; and that AND can return a constant zero.
3107  //
3108  // TODO: match other operations that can merge flags (ADD, etc)
3109  DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3110  if (N->getOpcode() == ISD::AND)
3111  return DAG.getConstant(0, DL, MVT::i1);
3112  return Merged.getValue(1);
3113 }
3114 
3115 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3116  SDNode *N) {
3117  // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3118  if (isBitwiseNot(N0))
3119  if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3120  SDLoc DL(N);
3121  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3122  N0.getOperand(0), NotC);
3123  return CombineTo(
3124  N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3125  }
3126 
3127  // Iff the flag result is dead:
3128  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3129  // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3130  // or the dependency between the instructions.
3131  if ((N0.getOpcode() == ISD::ADD ||
3132  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3133  N0.getValue(1) != CarryIn)) &&
3134  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3135  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3136  N0.getOperand(0), N0.getOperand(1), CarryIn);
3137 
3138  /**
3139  * When one of the addcarry argument is itself a carry, we may be facing
3140  * a diamond carry propagation. In which case we try to transform the DAG
3141  * to ensure linear carry propagation if that is possible.
3142  */
3143  if (auto Y = getAsCarry(TLI, N1)) {
3144  // Because both are carries, Y and Z can be swapped.
3145  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3146  return R;
3147  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3148  return R;
3149  }
3150 
3151  return SDValue();
3152 }
3153 
3154 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3155 // clamp/truncation if necessary.
3156 static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3157  SDValue RHS, SelectionDAG &DAG,
3158  const SDLoc &DL) {
3159  assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3160  "Illegal truncation");
3161 
3162  if (DstVT == SrcVT)
3163  return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3164 
3165  // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3166  // clamping RHS.
3167  APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3168  DstVT.getScalarSizeInBits());
3169  if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3170  return SDValue();
3171 
3172  SDValue SatLimit =
3174  DstVT.getScalarSizeInBits()),
3175  DL, SrcVT);
3176  RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3177  RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3178  LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3179  return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3180 }
3181 
3182 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3183 // usubsat(a,b), optionally as a truncated type.
3184 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3185  if (N->getOpcode() != ISD::SUB ||
3186  !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3187  return SDValue();
3188 
3189  EVT SubVT = N->getValueType(0);
3190  SDValue Op0 = N->getOperand(0);
3191  SDValue Op1 = N->getOperand(1);
3192 
3193  // Try to find umax(a,b) - b or a - umin(a,b) patterns
3194  // they may be converted to usubsat(a,b).
3195  if (Op0.getOpcode() == ISD::UMAX) {
3196  SDValue MaxLHS = Op0.getOperand(0);
3197  SDValue MaxRHS = Op0.getOperand(1);
3198  if (MaxLHS == Op1)
3199  return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3200  if (MaxRHS == Op1)
3201  return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3202  }
3203 
3204  if (Op1.getOpcode() == ISD::UMIN) {
3205  SDValue MinLHS = Op1.getOperand(0);
3206  SDValue MinRHS = Op1.getOperand(1);
3207  if (MinLHS == Op0)
3208  return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3209  if (MinRHS == Op0)
3210  return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3211  }
3212 
3213  // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3214  if (Op1.getOpcode() == ISD::TRUNCATE &&
3215  Op1.getOperand(0).getOpcode() == ISD::UMIN) {
3216  SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3217  SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3218  if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3219  return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3220  DAG, SDLoc(N));
3221  if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3222  return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3223  DAG, SDLoc(N));
3224  }
3225 
3226  return SDValue();
3227 }
3228 
3229 // Since it may not be valid to emit a fold to zero for vector initializers
3230 // check if we can before folding.
3231 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3232  SelectionDAG &DAG, bool LegalOperations) {
3233  if (!VT.isVector())
3234  return DAG.getConstant(0, DL, VT);
3235  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3236  return DAG.getConstant(0, DL, VT);
3237  return SDValue();
3238 }
3239 
3240 SDValue DAGCombiner::visitSUB(SDNode *N) {
3241  SDValue N0 = N->getOperand(0);
3242  SDValue N1 = N->getOperand(1);
3243  EVT VT = N0.getValueType();
3244  SDLoc DL(N);
3245 
3246  // fold vector ops
3247  if (VT.isVector()) {
3248  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3249  return FoldedVOp;
3250 
3251  // fold (sub x, 0) -> x, vector edition
3253  return N0;
3254  }
3255 
3256  // fold (sub x, x) -> 0
3257  // FIXME: Refactor this and xor and other similar operations together.
3258  if (N0 == N1)
3259  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3260 
3261  // fold (sub c1, c2) -> c3
3262  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3263  return C;
3264 
3265  if (SDValue NewSel = foldBinOpIntoSelect(N))
3266  return NewSel;
3267 
3269 
3270  // fold (sub x, c) -> (add x, -c)
3271  if (N1C) {
3272  return DAG.getNode(ISD::ADD, DL, VT, N0,
3273  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3274  }
3275 
3276  if (isNullOrNullSplat(N0)) {
3277  unsigned BitWidth = VT.getScalarSizeInBits();
3278  // Right-shifting everything out but the sign bit followed by negation is
3279  // the same as flipping arithmetic/logical shift type without the negation:
3280  // -(X >>u 31) -> (X >>s 31)
3281  // -(X >>s 31) -> (X >>u 31)
3282  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3283  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3284  if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3285  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3286  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3287  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3288  }
3289  }
3290 
3291  // 0 - X --> 0 if the sub is NUW.
3292  if (N->getFlags().hasNoUnsignedWrap())
3293  return N0;
3294 
3296  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3297  // N1 must be 0 because negating the minimum signed value is undefined.
3298  if (N->getFlags().hasNoSignedWrap())
3299  return N0;
3300 
3301  // 0 - X --> X if X is 0 or the minimum signed value.
3302  return N1;
3303  }
3304 
3305  // Convert 0 - abs(x).
3306  SDValue Result;
3307  if (N1->getOpcode() == ISD::ABS &&
3308  !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
3309  TLI.expandABS(N1.getNode(), Result, DAG, true))
3310  return Result;
3311  }
3312 
3313  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3314  if (isAllOnesOrAllOnesSplat(N0))
3315  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3316 
3317  // fold (A - (0-B)) -> A+B
3318  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3319  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3320 
3321  // fold A-(A-B) -> B
3322  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3323  return N1.getOperand(1);
3324 
3325  // fold (A+B)-A -> B
3326  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3327  return N0.getOperand(1);
3328 
3329  // fold (A+B)-B -> A
3330  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3331  return N0.getOperand(0);
3332 
3333  // fold (A+C1)-C2 -> A+(C1-C2)
3334  if (N0.getOpcode() == ISD::ADD &&
3335  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3336  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3337  SDValue NewC =
3338  DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3339  assert(NewC && "Constant folding failed");
3340  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3341  }
3342 
3343  // fold C2-(A+C1) -> (C2-C1)-A
3344  if (N1.getOpcode() == ISD::ADD) {
3345  SDValue N11 = N1.getOperand(1);
3346  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3347  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3348  SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3349  assert(NewC && "Constant folding failed");
3350  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3351  }
3352  }
3353 
3354  // fold (A-C1)-C2 -> A-(C1+C2)
3355  if (N0.getOpcode() == ISD::SUB &&
3356  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3357  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3358  SDValue NewC =
3359  DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3360  assert(NewC && "Constant folding failed");
3361  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3362  }
3363 
3364  // fold (c1-A)-c2 -> (c1-c2)-A
3365  if (N0.getOpcode() == ISD::SUB &&
3366  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3367  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3368  SDValue NewC =
3369  DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3370  assert(NewC && "Constant folding failed");
3371  return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3372  }
3373 
3374  // fold ((A+(B+or-C))-B) -> A+or-C
3375  if (N0.getOpcode() == ISD::ADD &&
3376  (N0.getOperand(1).getOpcode() == ISD::SUB ||
3377  N0.getOperand(1).getOpcode() == ISD::ADD) &&
3378  N0.getOperand(1).getOperand(0) == N1)
3379  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3380  N0.getOperand(1).getOperand(1));
3381 
3382  // fold ((A+(C+B))-B) -> A+C
3383  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3384  N0.getOperand(1).getOperand(1) == N1)
3385  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3386  N0.getOperand(1).getOperand(0));
3387 
3388  // fold ((A-(B-C))-C) -> A-B
3389  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3390  N0.getOperand(1).getOperand(1) == N1)
3391  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3392  N0.getOperand(1).getOperand(0));
3393 
3394  // fold (A-(B-C)) -> A+(C-B)
3395  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3396  return DAG.getNode(ISD::ADD, DL, VT, N0,
3397  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3398  N1.getOperand(0)));
3399 
3400  // A - (A & B) -> A & (~B)
3401  if (N1.getOpcode() == ISD::AND) {
3402  SDValue A = N1.getOperand(0);
3403  SDValue B = N1.getOperand(1);
3404  if (A != N0)
3405  std::swap(A, B);
3406  if (A == N0 &&
3407  (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3408  SDValue InvB =
3409  DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3410  return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3411  }
3412  }
3413 
3414  // fold (X - (-Y * Z)) -> (X + (Y * Z))
3415  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3416  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3418  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3419  N1.getOperand(0).getOperand(1),
3420  N1.getOperand(1));
3421  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3422  }
3423  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3425  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3426  N1.getOperand(0),
3427  N1.getOperand(1).getOperand(1));
3428  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3429  }
3430  }
3431 
3432  // If either operand of a sub is undef, the result is undef
3433  if (N0.isUndef())
3434  return N0;
3435  if (N1.isUndef())
3436  return N1;
3437 
3438  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3439  return V;
3440 
3441  if (SDValue V = foldAddSubOfSignBit(N, DAG))
3442  return V;
3443 
3444  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3445  return V;
3446 
3447  if (SDValue V = foldSubToUSubSat(VT, N))
3448  return V;
3449 
3450  // (x - y) - 1 -> add (xor y, -1), x
3451  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3452  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3453  DAG.getAllOnesConstant(DL, VT));
3454  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3455  }
3456 
3457  // Look for:
3458  // sub y, (xor x, -1)
3459  // And if the target does not like this form then turn into:
3460  // add (add x, y), 1
3461  if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3462  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3463  return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3464  }
3465 
3466  // Hoist one-use addition by non-opaque constant:
3467  // (x + C) - y -> (x - y) + C
3468  if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3469  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3470  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3471  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3472  }
3473  // y - (x + C) -> (y - x) - C
3474  if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3475  isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3476  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3477  return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3478  }
3479  // (x - C) - y -> (x - y) - C
3480  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3481  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3482  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3483  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3484  return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3485  }
3486  // (C - x) - y -> C - (x + y)
3487  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3488  isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3489  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3490  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3491  }
3492 
3493  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3494  // rather than 'sub 0/1' (the sext should get folded).
3495  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3496  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3497  N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3498  TLI.getBooleanContents(VT) ==
3500  SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3501  return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3502  }
3503 
3504  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3505  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3506  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3507  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3508  SDValue S0 = N1.getOperand(0);
3509  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3511  if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3512  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3513  }
3514  }
3515 
3516  // If the relocation model supports it, consider symbol offsets.
3517  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3518  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3519  // fold (sub Sym, c) -> Sym-c
3520  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3521  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3522  GA->getOffset() -
3523  (uint64_t)N1C->getSExtValue());
3524  // fold (sub Sym+c1, Sym+c2) -> c1-c2
3525  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3526  if (GA->getGlobal() == GB->getGlobal())
3527  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3528  DL, VT);
3529  }
3530 
3531  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3532  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3533  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3534  if (TN->getVT() == MVT::i1) {
3535  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3536  DAG.getConstant(1, DL, VT));
3537  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3538  }
3539  }
3540 
3541  // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3542  if (N1.getOpcode() == ISD::VSCALE) {
3543  const APInt &IntVal = N1.getConstantOperandAPInt(0);
3544  return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3545  }
3546 
3547  // Prefer an add for more folding potential and possibly better codegen:
3548  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3549  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3550  SDValue ShAmt = N1.getOperand(1);
3551  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3552  if (ShAmtC &&
3553  ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3554  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3555  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3556  }
3557  }
3558 
3559  if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3560  // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3561  if (SDValue Carry = getAsCarry(TLI, N0)) {
3562  SDValue X = N1;
3563  SDValue Zero = DAG.getConstant(0, DL, VT);
3564  SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3565  return DAG.getNode(ISD::ADDCARRY, DL,
3566  DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3567  Carry);
3568  }
3569  }
3570 
3571  return SDValue();
3572 }
3573 
3574 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3575  SDValue N0 = N->getOperand(0);
3576  SDValue N1 = N->getOperand(1);
3577  EVT VT = N0.getValueType();
3578  SDLoc DL(N);
3579 
3580  // fold vector ops
3581  if (VT.isVector()) {
3582  // TODO SimplifyVBinOp
3583 
3584  // fold (sub_sat x, 0) -> x, vector edition
3586  return N0;
3587  }
3588 
3589  // fold (sub_sat x, undef) -> 0
3590  if (N0.isUndef() || N1.isUndef())
3591  return DAG.getConstant(0, DL, VT);
3592 
3593  // fold (sub_sat x, x) -> 0
3594  if (N0 == N1)
3595  return DAG.getConstant(0, DL, VT);
3596 
3597  // fold (sub_sat c1, c2) -> c3
3598  if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3599  return C;
3600 
3601  // fold (sub_sat x, 0) -> x
3602  if (isNullConstant(N1))
3603  return N0;
3604 
3605  return SDValue();
3606 }
3607 
3608 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3609  SDValue N0 = N->getOperand(0);
3610  SDValue N1 = N->getOperand(1);
3611  EVT VT = N0.getValueType();
3612  SDLoc DL(N);
3613 
3614  // If the flag result is dead, turn this into an SUB.
3615  if (!N->hasAnyUseOfValue(1))
3616  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3618 
3619  // fold (subc x, x) -> 0 + no borrow
3620  if (N0 == N1)
3621  return CombineTo(N, DAG.getConstant(0, DL, VT),
3623 
3624  // fold (subc x, 0) -> x + no borrow
3625  if (isNullConstant(N1))
3626  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3627 
3628  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3629  if (isAllOnesConstant(N0))
3630  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3632 
3633  return SDValue();
3634 }
3635 
3636 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3637  SDValue N0 = N->getOperand(0);
3638  SDValue N1 = N->getOperand(1);
3639  EVT VT = N0.getValueType();
3640  bool IsSigned = (ISD::SSUBO == N->getOpcode());
3641 
3642  EVT CarryVT = N->getValueType(1);
3643  SDLoc DL(N);
3644 
3645  // If the flag result is dead, turn this into an SUB.
3646  if (!N->hasAnyUseOfValue(1))
3647  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3648  DAG.getUNDEF(CarryVT));
3649 
3650  // fold (subo x, x) -> 0 + no borrow
3651  if (N0 == N1)
3652  return CombineTo(N, DAG.getConstant(0, DL, VT),
3653  DAG.getConstant(0, DL, CarryVT));
3654 
3656 
3657  // fold (subox, c) -> (addo x, -c)
3658  if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3659  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3660  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3661  }
3662 
3663  // fold (subo x, 0) -> x + no borrow
3664  if (isNullOrNullSplat(N1))
3665  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3666 
3667  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3668  if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3669  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3670  DAG.getConstant(0, DL, CarryVT));
3671 
3672  return SDValue();
3673 }
3674 
3675 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3676  SDValue N0 = N->getOperand(0);
3677  SDValue N1 = N->getOperand(1);
3678  SDValue CarryIn = N->getOperand(2);
3679 
3680  // fold (sube x, y, false) -> (subc x, y)
3681  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3682  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3683 
3684  return SDValue();
3685 }
3686 
3687 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3688  SDValue N0 = N->getOperand(0);
3689  SDValue N1 = N->getOperand(1);
3690  SDValue CarryIn = N->getOperand(2);
3691 
3692  // fold (subcarry x, y, false) -> (usubo x, y)
3693  if (isNullConstant(CarryIn)) {
3694  if (!LegalOperations ||
3695  TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3696  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3697  }
3698 
3699  return SDValue();
3700 }
3701 
3702 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3703  SDValue N0 = N->getOperand(0);
3704  SDValue N1 = N->getOperand(1);
3705  SDValue CarryIn = N->getOperand(2);
3706 
3707  // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3708  if (isNullConstant(CarryIn)) {
3709  if (!LegalOperations ||
3710  TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3711  return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3712  }
3713 
3714  return SDValue();
3715 }
3716 
3717 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3718 // UMULFIXSAT here.
3719 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3720  SDValue N0 = N->getOperand(0);
3721  SDValue N1 = N->getOperand(1);
3722  SDValue Scale = N->getOperand(2);
3723  EVT VT = N0.getValueType();
3724 
3725  // fold (mulfix x, undef, scale) -> 0
3726  if (N0.isUndef() || N1.isUndef())
3727  return DAG.getConstant(0, SDLoc(N), VT);
3728 
3729  // Canonicalize constant to RHS (vector doesn't have to splat)
3732  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3733 
3734  // fold (mulfix x, 0, scale) -> 0
3735  if (isNullConstant(N1))
3736  return DAG.getConstant(0, SDLoc(N), VT);
3737 
3738  return SDValue();
3739 }
3740 
3741 SDValue DAGCombiner::visitMUL(SDNode *N) {
3742  SDValue N0 = N->getOperand(0);
3743  SDValue N1 = N->getOperand(1);
3744  EVT VT = N0.getValueType();
3745 
3746  // fold (mul x, undef) -> 0
3747  if (N0.isUndef() || N1.isUndef())
3748  return DAG.getConstant(0, SDLoc(N), VT);
3749 
3750  bool N1IsConst = false;
3751  bool N1IsOpaqueConst = false;
3752  APInt ConstValue1;
3753 
3754  // fold vector ops
3755  if (VT.isVector()) {
3756  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3757  return FoldedVOp;
3758 
3759  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3760  assert((!N1IsConst ||
3761  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3762  "Splat APInt should be element width");
3763  } else {
3764  N1IsConst = isa<ConstantSDNode>(N1);
3765  if (N1IsConst) {
3766  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3767  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3768  }
3769  }
3770 
3771  // fold (mul c1, c2) -> c1*c2
3772  if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3773  return C;
3774 
3775  // canonicalize constant to RHS (vector doesn't have to splat)
3778  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3779 
3780  // fold (mul x, 0) -> 0
3781  if (N1IsConst && ConstValue1.isNullValue())
3782  return N1;
3783 
3784  // fold (mul x, 1) -> x
3785  if (N1IsConst && ConstValue1.isOneValue())
3786  return N0;
3787 
3788  if (SDValue NewSel = foldBinOpIntoSelect(N))
3789  return NewSel;
3790 
3791  // fold (mul x, -1) -> 0-x
3792  if (N1IsConst && ConstValue1.isAllOnesValue()) {
3793  SDLoc DL(N);
3794  return DAG.getNode(ISD::SUB, DL, VT,
3795  DAG.getConstant(0, DL, VT), N0);
3796  }
3797 
3798  // fold (mul x, (1 << c)) -> x << c
3799  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3800  DAG.isKnownToBeAPowerOfTwo(N1) &&
3801  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3802  SDLoc DL(N);
3803  SDValue LogBase2 = BuildLogBase2(N1, DL);
3804  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3805  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3806  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3807  }
3808 
3809  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3810  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3811  unsigned Log2Val = (-ConstValue1).logBase2();
3812  SDLoc DL(N);
3813  // FIXME: If the input is something that is easily negated (e.g. a
3814  // single-use add), we should put the negate there.
3815  return DAG.getNode(ISD::SUB, DL, VT,
3816  DAG.getConstant(0, DL, VT),
3817  DAG.getNode(ISD::SHL, DL, VT, N0,
3818  DAG.getConstant(Log2Val, DL,
3819  getShiftAmountTy(N0.getValueType()))));
3820  }
3821 
3822  // Try to transform:
3823  // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3824  // mul x, (2^N + 1) --> add (shl x, N), x
3825  // mul x, (2^N - 1) --> sub (shl x, N), x
3826  // Examples: x * 33 --> (x << 5) + x
3827  // x * 15 --> (x << 4) - x
3828  // x * -33 --> -((x << 5) + x)
3829  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3830  // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3831  // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3832  // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3833  // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3834  // x * 0xf800 --> (x << 16) - (x << 11)
3835  // x * -0x8800 --> -((x << 15) + (x << 11))
3836  // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3837  if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3838  // TODO: We could handle more general decomposition of any constant by
3839  // having the target set a limit on number of ops and making a
3840  // callback to determine that sequence (similar to sqrt expansion).
3841  unsigned MathOp = ISD::DELETED_NODE;
3842  APInt MulC = ConstValue1.abs();
3843  // The constant `2` should be treated as (2^0 + 1).
3844  unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3845  MulC.lshrInPlace(TZeros);
3846  if ((MulC - 1).isPowerOf2())
3847  MathOp = ISD::ADD;
3848  else if ((MulC + 1).isPowerOf2())
3849  MathOp = ISD::SUB;
3850 
3851  if (MathOp != ISD::DELETED_NODE) {
3852  unsigned ShAmt =
3853  MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3854  ShAmt += TZeros;
3855  assert(ShAmt < VT.getScalarSizeInBits() &&
3856  "multiply-by-constant generated out of bounds shift");
3857  SDLoc DL(N);
3858  SDValue Shl =
3859  DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3860  SDValue R =
3861  TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3862  DAG.getNode(ISD::SHL, DL, VT, N0,
3863  DAG.getConstant(TZeros, DL, VT)))
3864  : DAG.getNode(MathOp, DL, VT, Shl, N0);
3865  if (ConstValue1.isNegative())
3866  R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3867  return R;
3868  }
3869  }
3870 
3871  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3872  if (N0.getOpcode() == ISD::SHL &&
3873  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3874  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3875  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3877  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3878  }
3879 
3880  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3881  // use.
3882  {
3883  SDValue Sh(nullptr, 0), Y(nullptr, 0);
3884 
3885  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3886  if (N0.getOpcode() == ISD::SHL &&
3888  N0.getNode()->hasOneUse()) {
3889  Sh = N0; Y = N1;
3890  } else if (N1.getOpcode() == ISD::SHL &&
3892  N1.getNode()->hasOneUse()) {
3893  Sh = N1; Y = N0;
3894  }
3895 
3896  if (Sh.getNode()) {
3897  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3898  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3899  }
3900  }
3901 
3902  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3904  N0.getOpcode() == ISD::ADD &&
3906  isMulAddWithConstProfitable(N, N0, N1))
3907  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3908  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3909  N0.getOperand(0), N1),
3910  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3911  N0.getOperand(1), N1));
3912 
3913  // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3914  if (N0.getOpcode() == ISD::VSCALE)
3915  if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3916  const APInt &C0 = N0.getConstantOperandAPInt(0);
3917  const APInt &C1 = NC1->getAPIntValue();
3918  return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3919  }
3920 
3921  // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
3922  APInt MulVal;
3923  if (N0.getOpcode() == ISD::STEP_VECTOR)
3924  if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
3925  const APInt &C0 = N0.getConstantOperandAPInt(0);
3926  EVT SVT = N0.getOperand(0).getValueType();
3927  SDValue NewStep = DAG.getConstant(
3928  C0 * MulVal.sextOrTrunc(SVT.getSizeInBits()), SDLoc(N), SVT);
3929  return DAG.getStepVector(SDLoc(N), VT, NewStep);
3930  }
3931 
3932  // Fold ((mul x, 0/undef) -> 0,
3933  // (mul x, 1) -> x) -> x)
3934  // -> and(x, mask)
3935  // We can replace vectors with '0' and '1' factors with a clearing mask.
3936  if (VT.isFixedLengthVector()) {
3937  unsigned NumElts = VT.getVectorNumElements();
3938  SmallBitVector ClearMask;
3939  ClearMask.reserve(NumElts);
3940  auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3941  if (!V || V->isNullValue()) {
3942  ClearMask.push_back(true);
3943  return true;
3944  }
3945  ClearMask.push_back(false);
3946  return V->isOne();
3947  };
3948  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3949  ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3950  assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
3951  SDLoc DL(N);
3952  EVT LegalSVT = N1.getOperand(0).getValueType();
3953  SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3954  SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3955  SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3956  for (unsigned I = 0; I != NumElts; ++I)
3957  if (ClearMask[I])
3958  Mask[I] = Zero;
3959  return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3960  }
3961  }
3962 
3963  // reassociate mul
3964  if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3965  return RMUL;
3966 
3967  return SDValue();
3968 }
3969 
3970 /// Return true if divmod libcall is available.
3971 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3972  const TargetLowering &TLI) {
3973  RTLIB::Libcall LC;
3974  EVT NodeType = Node->getValueType(0);
3975  if (!NodeType.isSimple())
3976  return false;
3977  switch (NodeType.getSimpleVT().SimpleTy) {
3978  default: return false; // No libcall for vector types.
3979  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3980  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3981  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3982  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3983  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3984  }
3985 
3986  return TLI.getLibcallName(LC) != nullptr;
3987 }
3988 
3989 /// Issue divrem if both quotient and remainder are needed.
3990 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3991  if (Node->use_empty())
3992  return SDValue(); // This is a dead node, leave it alone.
3993 
3994  unsigned Opcode = Node->getOpcode();
3995  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3996  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3997 
3998  // DivMod lib calls can still work on non-legal types if using lib-calls.
3999  EVT VT = Node->getValueType(0);
4000  if (VT.isVector() || !VT.isInteger())
4001  return SDValue();
4002 
4003  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4004  return SDValue();
4005 
4006  // If DIVREM is going to get expanded into a libcall,
4007  // but there is no libcall available, then don't combine.
4008  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4009  !isDivRemLibcallAvailable(Node, isSigned, TLI))
4010  return SDValue();
4011 
4012  // If div is legal, it's better to do the normal expansion
4013  unsigned OtherOpcode = 0;
4014  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4015  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4016  if (TLI.isOperationLegalOrCustom(Opcode, VT))
4017  return SDValue();
4018  } else {
4019  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4020  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4021  return SDValue();
4022  }
4023 
4024  SDValue Op0 = Node->getOperand(0);
4025  SDValue Op1 = Node->getOperand(1);
4026  SDValue combined;
4027  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
4028  UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
4029  SDNode *User = *UI;
4030  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4031  User->use_empty())
4032  continue;
4033  // Convert the other matching node(s), too;
4034  // otherwise, the DIVREM may get target-legalized into something
4035  // target-specific that we won't be able to recognize.
4036  unsigned UserOpc = User->getOpcode();
4037  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4038  User->getOperand(0) == Op0 &&
4039  User->getOperand(1) == Op1) {
4040  if (!combined) {
4041  if (UserOpc == OtherOpcode) {
4042  SDVTList VTs = DAG.getVTList(VT, VT);
4043  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4044  } else if (UserOpc == DivRemOpc) {
4045  combined = SDValue(User, 0);
4046  } else {
4047  assert(UserOpc == Opcode);
4048  continue;
4049  }
4050  }
4051  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4052  CombineTo(User, combined);
4053  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4054  CombineTo(User, combined.getValue(1));
4055  }
4056  }
4057  return combined;
4058 }
4059 
4061  SDValue N0 = N->getOperand(0);
4062  SDValue N1 = N->getOperand(1);
4063  EVT VT = N->getValueType(0);
4064  SDLoc DL(N);
4065 
4066  unsigned Opc = N->getOpcode();
4067  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4069 
4070  // X / undef -> undef
4071  // X % undef -> undef
4072  // X / 0 -> undef
4073  // X % 0 -> undef
4074  // NOTE: This includes vectors where any divisor element is zero/undef.
4075  if (DAG.isUndef(Opc, {N0, N1}))
4076  return DAG.getUNDEF(VT);
4077 
4078  // undef / X -> 0
4079  // undef % X -> 0
4080  if (N0.isUndef())
4081  return DAG.getConstant(0, DL, VT);
4082 
4083  // 0 / X -> 0
4084  // 0 % X -> 0
4086  if (N0C && N0C->isNullValue())
4087  return N0;
4088 
4089  // X / X -> 1
4090  // X % X -> 0
4091  if (N0 == N1)
4092  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4093 
4094  // X / 1 -> X
4095  // X % 1 -> 0
4096  // If this is a boolean op (single-bit element type), we can't have
4097  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4098  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4099  // it's a 1.
4100  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4101  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4102 
4103  return SDValue();
4104 }
4105 
4106 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4107  SDValue N0 = N->getOperand(0);
4108  SDValue N1 = N->getOperand(1);
4109  EVT VT = N->getValueType(0);
4110  EVT CCVT = getSetCCResultType(VT);
4111 
4112  // fold vector ops
4113  if (VT.isVector())
4114  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4115  return FoldedVOp;
4116 
4117  SDLoc DL(N);
4118 
4119  // fold (sdiv c1, c2) -> c1/c2
4121  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4122  return C;
4123 
4124  // fold (sdiv X, -1) -> 0-X
4125  if (N1C && N1C->isAllOnesValue())
4126  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4127 
4128  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4129  if (N1C && N1C->getAPIntValue().isMinSignedValue())
4130  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4131  DAG.getConstant(1, DL, VT),
4132  DAG.getConstant(0, DL, VT));
4133 
4134  if (SDValue V = simplifyDivRem(N, DAG))
4135  return V;
4136 
4137  if (SDValue NewSel = foldBinOpIntoSelect(N))
4138  return NewSel;
4139 
4140  // If we know the sign bits of both operands are zero, strength reduce to a
4141  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4142  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4143  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4144 
4145  if (SDValue V = visitSDIVLike(N0, N1, N)) {
4146  // If the corresponding remainder node exists, update its users with
4147  // (Dividend - (Quotient * Divisor).
4148  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4149  { N0, N1 })) {
4150  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4151  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4152  AddToWorklist(Mul.getNode());
4153  AddToWorklist(Sub.getNode());
4154  CombineTo(RemNode, Sub);
4155  }
4156  return V;
4157  }
4158 
4159  // sdiv, srem -> sdivrem
4160  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4161  // true. Otherwise, we break the simplification logic in visitREM().
4163  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4164  if (SDValue DivRem = useDivRem(N))
4165  return DivRem;
4166 
4167  return SDValue();
4168 }
4169 
4170 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4171  SDLoc DL(N);
4172  EVT VT = N->getValueType(0);
4173  EVT CCVT = getSetCCResultType(VT);
4174  unsigned BitWidth = VT.getScalarSizeInBits();
4175 
4176  // Helper for determining whether a value is a power-2 constant scalar or a
4177  // vector of such elements.
4178  auto IsPowerOfTwo = [](ConstantSDNode *C) {
4179  if (C->isNullValue() || C->isOpaque())
4180  return false;
4181  if (C->getAPIntValue().isPowerOf2())
4182  return true;
4183  if ((-C->getAPIntValue()).isPowerOf2())
4184  return true;
4185  return false;
4186  };
4187 
4188  // fold (sdiv X, pow2) -> simple ops after legalize
4189  // FIXME: We check for the exact bit here because the generic lowering gives
4190  // better results in that case. The target-specific lowering should learn how
4191  // to handle exact sdivs efficiently.
4192  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4193  // Target-specific implementation of sdiv x, pow2.
4194  if (SDValue Res = BuildSDIVPow2(N))
4195  return Res;
4196 
4197  // Create constants that are functions of the shift amount value.
4198  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4199  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4200  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4201  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4202  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4203  if (!isConstantOrConstantVector(Inexact))
4204  return SDValue();
4205 
4206  // Splat the sign bit into the register
4207  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4208  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4209  AddToWorklist(Sign.getNode());
4210 
4211  // Add (N0 < 0) ? abs2 - 1 : 0;
4212  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4213  AddToWorklist(Srl.getNode());
4214  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4215  AddToWorklist(Add.getNode());
4216  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4217  AddToWorklist(Sra.getNode());
4218 
4219  // Special case: (sdiv X, 1) -> X
4220  // Special Case: (sdiv X, -1) -> 0-X
4221  SDValue One = DAG.getConstant(1, DL, VT);
4222  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4223  SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4224  SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4225  SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4226  Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4227 
4228  // If dividing by a positive value, we're done. Otherwise, the result must
4229  // be negated.
4230  SDValue Zero = DAG.getConstant(0, DL, VT);
4231  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4232 
4233  // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4234  SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4235  SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4236  return Res;
4237  }
4238 
4239  // If integer divide is expensive and we satisfy the requirements, emit an
4240  // alternate sequence. Targets may check function attributes for size/speed
4241  // trade-offs.
4243  if (isConstantOrConstantVector(N1) &&
4244  !TLI.isIntDivCheap(N->getValueType(0), Attr))
4245  if (SDValue Op = BuildSDIV(N))
4246  return Op;
4247 
4248  return SDValue();
4249 }
4250 
4251 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4252  SDValue N0 = N->getOperand(0);
4253  SDValue N1 = N->getOperand(1);
4254  EVT VT = N->getValueType(0);
4255  EVT CCVT = getSetCCResultType(VT);
4256 
4257  // fold vector ops
4258  if (VT.isVector())
4259  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4260  return FoldedVOp;
4261 
4262  SDLoc DL(N);
4263 
4264  // fold (udiv c1, c2) -> c1/c2
4266  if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4267  return C;
4268 
4269  // fold (udiv X, -1) -> select(X == -1, 1, 0)
4270  if (N1C && N1C->getAPIntValue().isAllOnesValue())
4271  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4272  DAG.getConstant(1, DL, VT),
4273  DAG.getConstant(0, DL, VT));
4274 
4275  if (SDValue V = simplifyDivRem(N, DAG))
4276  return V;
4277 
4278  if (SDValue NewSel = foldBinOpIntoSelect(N))
4279  return NewSel;
4280 
4281  if (SDValue V = visitUDIVLike(N0, N1, N)) {
4282  // If the corresponding remainder node exists, update its users with
4283  // (Dividend - (Quotient * Divisor).
4284  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4285  { N0, N1 })) {
4286  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4287  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4288  AddToWorklist(Mul.getNode());
4289  AddToWorklist(Sub.getNode());
4290  CombineTo(RemNode, Sub);
4291  }
4292  return V;
4293  }
4294 
4295  // sdiv, srem -> sdivrem
4296  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4297  // true. Otherwise, we break the simplification logic in visitREM().
4299  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4300  if (SDValue DivRem = useDivRem(N))
4301  return DivRem;
4302 
4303  return SDValue();
4304 }
4305 
4306 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4307  SDLoc DL(N);
4308  EVT VT = N->getValueType(0);
4309 
4310  // fold (udiv x, (1 << c)) -> x >>u c
4311  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4312  DAG.isKnownToBeAPowerOfTwo(N1)) {
4313  SDValue LogBase2 = BuildLogBase2(N1, DL);
4314  AddToWorklist(LogBase2.getNode());
4315 
4316  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4317  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4318  AddToWorklist(Trunc.getNode());
4319  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4320  }
4321 
4322  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4323  if (N1.getOpcode() == ISD::SHL) {
4324  SDValue N10 = N1.getOperand(0);
4325  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4326  DAG.isKnownToBeAPowerOfTwo(N10)) {
4327  SDValue LogBase2 = BuildLogBase2(N10, DL);
4328  AddToWorklist(LogBase2.getNode());
4329 
4330  EVT ADDVT = N1.getOperand(1).getValueType();
4331  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4332  AddToWorklist(Trunc.getNode());
4333  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4334  AddToWorklist(Add.getNode());
4335  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4336  }
4337  }
4338 
4339  // fold (udiv x, c) -> alternate
4341  if (isConstantOrConstantVector(N1) &&
4342  !TLI.isIntDivCheap(N->getValueType(0), Attr))
4343  if (SDValue Op = BuildUDIV(N))
4344  return Op;
4345 
4346  return SDValue();
4347 }
4348 
4349 // handles ISD::SREM and ISD::UREM
4350 SDValue DAGCombiner::visitREM(SDNode *N) {
4351  unsigned Opcode = N->getOpcode();
4352  SDValue N0 = N->getOperand(0);
4353  SDValue N1 = N->getOperand(1);
4354  EVT VT = N->getValueType(0);
4355  EVT CCVT = getSetCCResultType(VT);
4356 
4357  bool isSigned = (Opcode == ISD::SREM);
4358  SDLoc DL(N);
4359 
4360  // fold (rem c1, c2) -> c1%c2
4362  if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4363  return C;
4364 
4365  // fold (urem X, -1) -> select(X == -1, 0, x)
4366  if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4367  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4368  DAG.getConstant(0, DL, VT), N0);
4369 
4370  if (SDValue V = simplifyDivRem(N, DAG))
4371  return V;
4372 
4373  if (SDValue NewSel = foldBinOpIntoSelect(N))
4374  return NewSel;
4375 
4376  if (isSigned) {
4377  // If we know the sign bits of both operands are zero, strength reduce to a
4378  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4379  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4380  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4381  } else {
4382  if (DAG.