LLVM  16.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
49 #include "llvm/IR/Attributes.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/KnownBits.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 #include <variant>
76 
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "dagcombine"
80 
81 STATISTIC(NodesCombined , "Number of dag nodes combined");
82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
84 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
85 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
86 STATISTIC(SlicedLoads, "Number of load sliced");
87 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
88 
89 static cl::opt<bool>
90 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
91  cl::desc("Enable DAG combiner's use of IR alias analysis"));
92 
93 static cl::opt<bool>
94 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
95  cl::desc("Enable DAG combiner's use of TBAA"));
96 
97 #ifndef NDEBUG
99 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
100  cl::desc("Only use DAG-combiner alias analysis in this"
101  " function"));
102 #endif
103 
104 /// Hidden option to stress test load slicing, i.e., when this option
105 /// is enabled, load slicing bypasses most of its profitability guards.
106 static cl::opt<bool>
107 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
108  cl::desc("Bypass the profitability model of load slicing"),
109  cl::init(false));
110 
111 static cl::opt<bool>
112  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
113  cl::desc("DAG combiner may split indexing from loads"));
114 
115 static cl::opt<bool>
116  EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
117  cl::desc("DAG combiner enable merging multiple stores "
118  "into a wider store"));
119 
121  "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
122  cl::desc("Limit the number of operands to inline for Token Factors"));
123 
125  "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
126  cl::desc("Limit the number of times for the same StoreNode and RootNode "
127  "to bail out in store merging dependence check"));
128 
130  "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
131  cl::desc("DAG combiner enable reducing the width of load/op/store "
132  "sequence"));
133 
135  "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
136  cl::desc("DAG combiner enable load/<replace bytes>/store with "
137  "a narrower store"));
138 
140  "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
141  cl::desc(
142  "Enable merging extends and rounds into FCOPYSIGN on vector types"));
143 
144 namespace {
145 
146  class DAGCombiner {
147  SelectionDAG &DAG;
148  const TargetLowering &TLI;
149  const SelectionDAGTargetInfo *STI;
151  CodeGenOpt::Level OptLevel;
152  bool LegalDAG = false;
153  bool LegalOperations = false;
154  bool LegalTypes = false;
155  bool ForCodeSize;
156  bool DisableGenericCombines;
157 
158  /// Worklist of all of the nodes that need to be simplified.
159  ///
160  /// This must behave as a stack -- new nodes to process are pushed onto the
161  /// back and when processing we pop off of the back.
162  ///
163  /// The worklist will not contain duplicates but may contain null entries
164  /// due to nodes being deleted from the underlying DAG.
165  SmallVector<SDNode *, 64> Worklist;
166 
167  /// Mapping from an SDNode to its position on the worklist.
168  ///
169  /// This is used to find and remove nodes from the worklist (by nulling
170  /// them) when they are deleted from the underlying DAG. It relies on
171  /// stable indices of nodes within the worklist.
172  DenseMap<SDNode *, unsigned> WorklistMap;
173  /// This records all nodes attempted to add to the worklist since we
174  /// considered a new worklist entry. As we keep do not add duplicate nodes
175  /// in the worklist, this is different from the tail of the worklist.
176  SmallSetVector<SDNode *, 32> PruningList;
177 
178  /// Set of nodes which have been combined (at least once).
179  ///
180  /// This is used to allow us to reliably add any operands of a DAG node
181  /// which have not yet been combined to the worklist.
182  SmallPtrSet<SDNode *, 32> CombinedNodes;
183 
184  /// Map from candidate StoreNode to the pair of RootNode and count.
185  /// The count is used to track how many times we have seen the StoreNode
186  /// with the same RootNode bail out in dependence check. If we have seen
187  /// the bail out for the same pair many times over a limit, we won't
188  /// consider the StoreNode with the same RootNode as store merging
189  /// candidate again.
191 
192  // AA - Used for DAG load/store alias analysis.
193  AliasAnalysis *AA;
194 
195  /// When an instruction is simplified, add all users of the instruction to
196  /// the work lists because they might get more simplified now.
197  void AddUsersToWorklist(SDNode *N) {
198  for (SDNode *Node : N->uses())
199  AddToWorklist(Node);
200  }
201 
202  /// Convenient shorthand to add a node and all of its user to the worklist.
203  void AddToWorklistWithUsers(SDNode *N) {
204  AddUsersToWorklist(N);
205  AddToWorklist(N);
206  }
207 
208  // Prune potentially dangling nodes. This is called after
209  // any visit to a node, but should also be called during a visit after any
210  // failed combine which may have created a DAG node.
211  void clearAddedDanglingWorklistEntries() {
212  // Check any nodes added to the worklist to see if they are prunable.
213  while (!PruningList.empty()) {
214  auto *N = PruningList.pop_back_val();
215  if (N->use_empty())
216  recursivelyDeleteUnusedNodes(N);
217  }
218  }
219 
220  SDNode *getNextWorklistEntry() {
221  // Before we do any work, remove nodes that are not in use.
222  clearAddedDanglingWorklistEntries();
223  SDNode *N = nullptr;
224  // The Worklist holds the SDNodes in order, but it may contain null
225  // entries.
226  while (!N && !Worklist.empty()) {
227  N = Worklist.pop_back_val();
228  }
229 
230  if (N) {
231  bool GoodWorklistEntry = WorklistMap.erase(N);
232  (void)GoodWorklistEntry;
233  assert(GoodWorklistEntry &&
234  "Found a worklist entry without a corresponding map entry!");
235  }
236  return N;
237  }
238 
239  /// Call the node-specific routine that folds each particular type of node.
240  SDValue visit(SDNode *N);
241 
242  public:
243  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
244  : DAG(D), TLI(D.getTargetLoweringInfo()),
245  STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
246  ForCodeSize = DAG.shouldOptForSize();
247  DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
248 
249  MaximumLegalStoreInBits = 0;
250  // We use the minimum store size here, since that's all we can guarantee
251  // for the scalable vector types.
252  for (MVT VT : MVT::all_valuetypes())
253  if (EVT(VT).isSimple() && VT != MVT::Other &&
254  TLI.isTypeLegal(EVT(VT)) &&
255  VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
256  MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
257  }
258 
259  void ConsiderForPruning(SDNode *N) {
260  // Mark this for potential pruning.
261  PruningList.insert(N);
262  }
263 
264  /// Add to the worklist making sure its instance is at the back (next to be
265  /// processed.)
266  void AddToWorklist(SDNode *N) {
267  assert(N->getOpcode() != ISD::DELETED_NODE &&
268  "Deleted Node added to Worklist");
269 
270  // Skip handle nodes as they can't usefully be combined and confuse the
271  // zero-use deletion strategy.
272  if (N->getOpcode() == ISD::HANDLENODE)
273  return;
274 
275  ConsiderForPruning(N);
276 
277  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
278  Worklist.push_back(N);
279  }
280 
281  /// Remove all instances of N from the worklist.
282  void removeFromWorklist(SDNode *N) {
283  CombinedNodes.erase(N);
284  PruningList.remove(N);
285  StoreRootCountMap.erase(N);
286 
287  auto It = WorklistMap.find(N);
288  if (It == WorklistMap.end())
289  return; // Not in the worklist.
290 
291  // Null out the entry rather than erasing it to avoid a linear operation.
292  Worklist[It->second] = nullptr;
293  WorklistMap.erase(It);
294  }
295 
296  void deleteAndRecombine(SDNode *N);
297  bool recursivelyDeleteUnusedNodes(SDNode *N);
298 
299  /// Replaces all uses of the results of one DAG node with new values.
300  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
301  bool AddTo = true);
302 
303  /// Replaces all uses of the results of one DAG node with new values.
304  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
305  return CombineTo(N, &Res, 1, AddTo);
306  }
307 
308  /// Replaces all uses of the results of one DAG node with new values.
309  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
310  bool AddTo = true) {
311  SDValue To[] = { Res0, Res1 };
312  return CombineTo(N, To, 2, AddTo);
313  }
314 
315  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
316 
317  private:
318  unsigned MaximumLegalStoreInBits;
319 
320  /// Check the specified integer node value to see if it can be simplified or
321  /// if things it uses can be simplified by bit propagation.
322  /// If so, return true.
323  bool SimplifyDemandedBits(SDValue Op) {
324  unsigned BitWidth = Op.getScalarValueSizeInBits();
326  return SimplifyDemandedBits(Op, DemandedBits);
327  }
328 
329  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
330  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
331  KnownBits Known;
332  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
333  return false;
334 
335  // Revisit the node.
336  AddToWorklist(Op.getNode());
337 
338  CommitTargetLoweringOpt(TLO);
339  return true;
340  }
341 
342  /// Check the specified vector node value to see if it can be simplified or
343  /// if things it uses can be simplified as it only uses some of the
344  /// elements. If so, return true.
345  bool SimplifyDemandedVectorElts(SDValue Op) {
346  // TODO: For now just pretend it cannot be simplified.
347  if (Op.getValueType().isScalableVector())
348  return false;
349 
350  unsigned NumElts = Op.getValueType().getVectorNumElements();
351  APInt DemandedElts = APInt::getAllOnes(NumElts);
352  return SimplifyDemandedVectorElts(Op, DemandedElts);
353  }
354 
355  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
356  const APInt &DemandedElts,
357  bool AssumeSingleUse = false);
358  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
359  bool AssumeSingleUse = false);
360 
361  bool CombineToPreIndexedLoadStore(SDNode *N);
362  bool CombineToPostIndexedLoadStore(SDNode *N);
363  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
364  bool SliceUpLoad(SDNode *N);
365 
366  // Scalars have size 0 to distinguish from singleton vectors.
367  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
368  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
369  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
370 
371  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
372  /// load.
373  ///
374  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
375  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
376  /// \param EltNo index of the vector element to load.
377  /// \param OriginalLoad load that EVE came from to be replaced.
378  /// \returns EVE on success SDValue() on failure.
379  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
380  SDValue EltNo,
381  LoadSDNode *OriginalLoad);
382  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
383  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
384  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
385  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
386  SDValue PromoteIntBinOp(SDValue Op);
387  SDValue PromoteIntShiftOp(SDValue Op);
388  SDValue PromoteExtend(SDValue Op);
389  bool PromoteLoad(SDValue Op);
390 
391  /// Call the node-specific routine that knows how to fold each
392  /// particular type of node. If that doesn't do anything, try the
393  /// target-specific DAG combines.
395 
396  // Visitation implementation - Implement dag node combining for different
397  // node types. The semantics are as follows:
398  // Return Value:
399  // SDValue.getNode() == 0 - No change was made
400  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
401  // otherwise - N should be replaced by the returned Operand.
402  //
403  SDValue visitTokenFactor(SDNode *N);
404  SDValue visitMERGE_VALUES(SDNode *N);
405  SDValue visitADD(SDNode *N);
406  SDValue visitADDLike(SDNode *N);
407  SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
408  SDValue visitSUB(SDNode *N);
409  SDValue visitADDSAT(SDNode *N);
410  SDValue visitSUBSAT(SDNode *N);
411  SDValue visitADDC(SDNode *N);
412  SDValue visitADDO(SDNode *N);
413  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
414  SDValue visitSUBC(SDNode *N);
415  SDValue visitSUBO(SDNode *N);
416  SDValue visitADDE(SDNode *N);
417  SDValue visitADDCARRY(SDNode *N);
418  SDValue visitSADDO_CARRY(SDNode *N);
419  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
420  SDValue visitSUBE(SDNode *N);
421  SDValue visitSUBCARRY(SDNode *N);
422  SDValue visitSSUBO_CARRY(SDNode *N);
423  SDValue visitMUL(SDNode *N);
424  SDValue visitMULFIX(SDNode *N);
425  SDValue useDivRem(SDNode *N);
426  SDValue visitSDIV(SDNode *N);
427  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
428  SDValue visitUDIV(SDNode *N);
429  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
430  SDValue visitREM(SDNode *N);
431  SDValue visitMULHU(SDNode *N);
432  SDValue visitMULHS(SDNode *N);
433  SDValue visitAVG(SDNode *N);
434  SDValue visitSMUL_LOHI(SDNode *N);
435  SDValue visitUMUL_LOHI(SDNode *N);
436  SDValue visitMULO(SDNode *N);
437  SDValue visitIMINMAX(SDNode *N);
438  SDValue visitAND(SDNode *N);
439  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
440  SDValue visitOR(SDNode *N);
441  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
442  SDValue visitXOR(SDNode *N);
443  SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
444  SDValue visitSHL(SDNode *N);
445  SDValue visitSRA(SDNode *N);
446  SDValue visitSRL(SDNode *N);
447  SDValue visitFunnelShift(SDNode *N);
448  SDValue visitSHLSAT(SDNode *N);
449  SDValue visitRotate(SDNode *N);
450  SDValue visitABS(SDNode *N);
451  SDValue visitBSWAP(SDNode *N);
452  SDValue visitBITREVERSE(SDNode *N);
453  SDValue visitCTLZ(SDNode *N);
454  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
455  SDValue visitCTTZ(SDNode *N);
456  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
457  SDValue visitCTPOP(SDNode *N);
458  SDValue visitSELECT(SDNode *N);
459  SDValue visitVSELECT(SDNode *N);
460  SDValue visitSELECT_CC(SDNode *N);
461  SDValue visitSETCC(SDNode *N);
462  SDValue visitSETCCCARRY(SDNode *N);
463  SDValue visitSIGN_EXTEND(SDNode *N);
464  SDValue visitZERO_EXTEND(SDNode *N);
465  SDValue visitANY_EXTEND(SDNode *N);
466  SDValue visitAssertExt(SDNode *N);
467  SDValue visitAssertAlign(SDNode *N);
468  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
469  SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
470  SDValue visitTRUNCATE(SDNode *N);
471  SDValue visitBITCAST(SDNode *N);
472  SDValue visitFREEZE(SDNode *N);
473  SDValue visitBUILD_PAIR(SDNode *N);
474  SDValue visitFADD(SDNode *N);
475  SDValue visitSTRICT_FADD(SDNode *N);
476  SDValue visitFSUB(SDNode *N);
477  SDValue visitFMUL(SDNode *N);
478  SDValue visitFMA(SDNode *N);
479  SDValue visitFDIV(SDNode *N);
480  SDValue visitFREM(SDNode *N);
481  SDValue visitFSQRT(SDNode *N);
482  SDValue visitFCOPYSIGN(SDNode *N);
483  SDValue visitFPOW(SDNode *N);
484  SDValue visitSINT_TO_FP(SDNode *N);
485  SDValue visitUINT_TO_FP(SDNode *N);
486  SDValue visitFP_TO_SINT(SDNode *N);
487  SDValue visitFP_TO_UINT(SDNode *N);
488  SDValue visitFP_ROUND(SDNode *N);
489  SDValue visitFP_EXTEND(SDNode *N);
490  SDValue visitFNEG(SDNode *N);
491  SDValue visitFABS(SDNode *N);
492  SDValue visitFCEIL(SDNode *N);
493  SDValue visitFTRUNC(SDNode *N);
494  SDValue visitFFLOOR(SDNode *N);
495  SDValue visitFMinMax(SDNode *N);
496  SDValue visitBRCOND(SDNode *N);
497  SDValue visitBR_CC(SDNode *N);
498  SDValue visitLOAD(SDNode *N);
499 
500  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
501  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
502 
503  SDValue visitSTORE(SDNode *N);
504  SDValue visitLIFETIME_END(SDNode *N);
505  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
506  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
507  SDValue visitBUILD_VECTOR(SDNode *N);
508  SDValue visitCONCAT_VECTORS(SDNode *N);
509  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
510  SDValue visitVECTOR_SHUFFLE(SDNode *N);
511  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
512  SDValue visitINSERT_SUBVECTOR(SDNode *N);
513  SDValue visitMLOAD(SDNode *N);
514  SDValue visitMSTORE(SDNode *N);
515  SDValue visitMGATHER(SDNode *N);
516  SDValue visitMSCATTER(SDNode *N);
517  SDValue visitVPGATHER(SDNode *N);
518  SDValue visitVPSCATTER(SDNode *N);
519  SDValue visitFP_TO_FP16(SDNode *N);
520  SDValue visitFP16_TO_FP(SDNode *N);
521  SDValue visitFP_TO_BF16(SDNode *N);
522  SDValue visitVECREDUCE(SDNode *N);
523  SDValue visitVPOp(SDNode *N);
524 
525  SDValue visitFADDForFMACombine(SDNode *N);
526  SDValue visitFSUBForFMACombine(SDNode *N);
527  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
528 
529  SDValue XformToShuffleWithZero(SDNode *N);
530  bool reassociationCanBreakAddressingModePattern(unsigned Opc,
531  const SDLoc &DL,
532  SDNode *N,
533  SDValue N0,
534  SDValue N1);
535  SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
536  SDValue N1);
537  SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
538  SDValue N1, SDNodeFlags Flags);
539 
540  SDValue visitShiftByConstant(SDNode *N);
541 
542  SDValue foldSelectOfConstants(SDNode *N);
543  SDValue foldVSelectOfConstants(SDNode *N);
544  SDValue foldBinOpIntoSelect(SDNode *BO);
545  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
546  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
547  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
548  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
550  bool NotExtCompare = false);
551  SDValue convertSelectOfFPConstantsToLoadOffset(
552  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
553  ISD::CondCode CC);
554  SDValue foldSignChangeInBitcast(SDNode *N);
555  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
556  SDValue N2, SDValue N3, ISD::CondCode CC);
557  SDValue foldSelectOfBinops(SDNode *N);
558  SDValue foldSextSetcc(SDNode *N);
559  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
560  const SDLoc &DL);
561  SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
562  SDValue unfoldMaskedMerge(SDNode *N);
563  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
564  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
565  const SDLoc &DL, bool foldBooleans);
566  SDValue rebuildSetCC(SDValue N);
567 
568  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
569  SDValue &CC, bool MatchStrict = false) const;
570  bool isOneUseSetCC(SDValue N) const;
571 
572  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
573  unsigned HiOp);
574  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
575  SDValue CombineExtLoad(SDNode *N);
576  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
577  SDValue combineRepeatedFPDivisors(SDNode *N);
578  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
579  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
580  SDValue BuildSDIV(SDNode *N);
581  SDValue BuildSDIVPow2(SDNode *N);
582  SDValue BuildUDIV(SDNode *N);
583  SDValue BuildSREMPow2(SDNode *N);
584  SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
585  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
586  SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
587  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
588  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
589  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
590  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
591  SDNodeFlags Flags, bool Reciprocal);
592  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
593  SDNodeFlags Flags, bool Reciprocal);
594  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
595  bool DemandHighBits = true);
596  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
597  SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
598  SDValue InnerPos, SDValue InnerNeg, bool HasPos,
599  unsigned PosOpcode, unsigned NegOpcode,
600  const SDLoc &DL);
601  SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
602  SDValue InnerPos, SDValue InnerNeg, bool HasPos,
603  unsigned PosOpcode, unsigned NegOpcode,
604  const SDLoc &DL);
605  SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
606  SDValue MatchLoadCombine(SDNode *N);
607  SDValue mergeTruncStores(StoreSDNode *N);
608  SDValue reduceLoadWidth(SDNode *N);
609  SDValue ReduceLoadOpStoreWidth(SDNode *N);
611  SDValue TransformFPLoadStorePair(SDNode *N);
612  SDValue convertBuildVecZextToZext(SDNode *N);
613  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
614  SDValue reduceBuildVecTruncToBitCast(SDNode *N);
615  SDValue reduceBuildVecToShuffle(SDNode *N);
616  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
617  ArrayRef<int> VectorMask, SDValue VecIn1,
618  SDValue VecIn2, unsigned LeftIdx,
619  bool DidSplitVec);
620  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
621 
622  /// Walk up chain skipping non-aliasing memory nodes,
623  /// looking for aliasing nodes and adding them to the Aliases vector.
624  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
625  SmallVectorImpl<SDValue> &Aliases);
626 
627  /// Return true if there is any possibility that the two addresses overlap.
628  bool mayAlias(SDNode *Op0, SDNode *Op1) const;
629 
630  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
631  /// chain (aliasing node.)
632  SDValue FindBetterChain(SDNode *N, SDValue Chain);
633 
634  /// Try to replace a store and any possibly adjacent stores on
635  /// consecutive chains with better chains. Return true only if St is
636  /// replaced.
637  ///
638  /// Notice that other chains may still be replaced even if the function
639  /// returns false.
640  bool findBetterNeighborChains(StoreSDNode *St);
641 
642  // Helper for findBetterNeighborChains. Walk up store chain add additional
643  // chained stores that do not overlap and can be parallelized.
644  bool parallelizeChainedStores(StoreSDNode *St);
645 
646  /// Holds a pointer to an LSBaseSDNode as well as information on where it
647  /// is located in a sequence of memory operations connected by a chain.
648  struct MemOpLink {
649  // Ptr to the mem node.
650  LSBaseSDNode *MemNode;
651 
652  // Offset from the base ptr.
653  int64_t OffsetFromBase;
654 
655  MemOpLink(LSBaseSDNode *N, int64_t Offset)
656  : MemNode(N), OffsetFromBase(Offset) {}
657  };
658 
659  // Classify the origin of a stored value.
660  enum class StoreSource { Unknown, Constant, Extract, Load };
661  StoreSource getStoreSource(SDValue StoreVal) {
662  switch (StoreVal.getOpcode()) {
663  case ISD::Constant:
664  case ISD::ConstantFP:
665  return StoreSource::Constant;
668  return StoreSource::Extract;
669  case ISD::LOAD:
670  return StoreSource::Load;
671  default:
672  return StoreSource::Unknown;
673  }
674  }
675 
676  /// This is a helper function for visitMUL to check the profitability
677  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
678  /// MulNode is the original multiply, AddNode is (add x, c1),
679  /// and ConstNode is c2.
680  bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
681  SDValue ConstNode);
682 
683  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
684  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
685  /// the type of the loaded value to be extended.
686  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
687  EVT LoadResultTy, EVT &ExtVT);
688 
689  /// Helper function to calculate whether the given Load/Store can have its
690  /// width reduced to ExtVT.
691  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
692  EVT &MemVT, unsigned ShAmt = 0);
693 
694  /// Used by BackwardsPropagateMask to find suitable loads.
695  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
696  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
697  ConstantSDNode *Mask, SDNode *&NodeToMask);
698  /// Attempt to propagate a given AND node back to load leaves so that they
699  /// can be combined into narrow loads.
700  bool BackwardsPropagateMask(SDNode *N);
701 
702  /// Helper function for mergeConsecutiveStores which merges the component
703  /// store chains.
704  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
705  unsigned NumStores);
706 
707  /// This is a helper function for mergeConsecutiveStores. When the source
708  /// elements of the consecutive stores are all constants or all extracted
709  /// vector elements, try to merge them into one larger store introducing
710  /// bitcasts if necessary. \return True if a merged store was created.
711  bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
712  EVT MemVT, unsigned NumStores,
713  bool IsConstantSrc, bool UseVector,
714  bool UseTrunc);
715 
716  /// This is a helper function for mergeConsecutiveStores. Stores that
717  /// potentially may be merged with St are placed in StoreNodes. RootNode is
718  /// a chain predecessor to all store candidates.
719  void getStoreMergeCandidates(StoreSDNode *St,
720  SmallVectorImpl<MemOpLink> &StoreNodes,
721  SDNode *&Root);
722 
723  /// Helper function for mergeConsecutiveStores. Checks if candidate stores
724  /// have indirect dependency through their operands. RootNode is the
725  /// predecessor to all stores calculated by getStoreMergeCandidates and is
726  /// used to prune the dependency check. \return True if safe to merge.
727  bool checkMergeStoreCandidatesForDependencies(
728  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
729  SDNode *RootNode);
730 
731  /// This is a helper function for mergeConsecutiveStores. Given a list of
732  /// store candidates, find the first N that are consecutive in memory.
733  /// Returns 0 if there are not at least 2 consecutive stores to try merging.
734  unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
735  int64_t ElementSizeBytes) const;
736 
737  /// This is a helper function for mergeConsecutiveStores. It is used for
738  /// store chains that are composed entirely of constant values.
739  bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
740  unsigned NumConsecutiveStores,
741  EVT MemVT, SDNode *Root, bool AllowVectors);
742 
743  /// This is a helper function for mergeConsecutiveStores. It is used for
744  /// store chains that are composed entirely of extracted vector elements.
745  /// When extracting multiple vector elements, try to store them in one
746  /// vector store rather than a sequence of scalar stores.
747  bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
748  unsigned NumConsecutiveStores, EVT MemVT,
749  SDNode *Root);
750 
751  /// This is a helper function for mergeConsecutiveStores. It is used for
752  /// store chains that are composed entirely of loaded values.
753  bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
754  unsigned NumConsecutiveStores, EVT MemVT,
755  SDNode *Root, bool AllowVectors,
756  bool IsNonTemporalStore, bool IsNonTemporalLoad);
757 
758  /// Merge consecutive store operations into a wide store.
759  /// This optimization uses wide integers or vectors when possible.
760  /// \return true if stores were merged.
761  bool mergeConsecutiveStores(StoreSDNode *St);
762 
763  /// Try to transform a truncation where C is a constant:
764  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
765  ///
766  /// \p N needs to be a truncation and its first operand an AND. Other
767  /// requirements are checked by the function (e.g. that trunc is
768  /// single-use) and if missed an empty SDValue is returned.
769  SDValue distributeTruncateThroughAnd(SDNode *N);
770 
771  /// Helper function to determine whether the target supports operation
772  /// given by \p Opcode for type \p VT, that is, whether the operation
773  /// is legal or custom before legalizing operations, and whether is
774  /// legal (but not custom) after legalization.
775  bool hasOperation(unsigned Opcode, EVT VT) {
776  return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
777  }
778 
779  public:
780  /// Runs the dag combiner on all nodes in the work list
781  void Run(CombineLevel AtLevel);
782 
783  SelectionDAG &getDAG() const { return DAG; }
784 
785  /// Returns a type large enough to hold any valid shift amount - before type
786  /// legalization these can be huge.
787  EVT getShiftAmountTy(EVT LHSTy) {
788  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
789  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
790  }
791 
792  /// This method returns true if we are running before type legalization or
793  /// if the specified VT is legal.
794  bool isTypeLegal(const EVT &VT) {
795  if (!LegalTypes) return true;
796  return TLI.isTypeLegal(VT);
797  }
798 
799  /// Convenience wrapper around TargetLowering::getSetCCResultType
800  EVT getSetCCResultType(EVT VT) const {
801  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
802  }
803 
804  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
805  SDValue OrigLoad, SDValue ExtLoad,
806  ISD::NodeType ExtType);
807  };
808 
809 /// This class is a DAGUpdateListener that removes any deleted
810 /// nodes from the worklist.
811 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
812  DAGCombiner &DC;
813 
814 public:
815  explicit WorklistRemover(DAGCombiner &dc)
816  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
817 
818  void NodeDeleted(SDNode *N, SDNode *E) override {
819  DC.removeFromWorklist(N);
820  }
821 };
822 
823 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
824  DAGCombiner &DC;
825 
826 public:
827  explicit WorklistInserter(DAGCombiner &dc)
828  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
829 
830  // FIXME: Ideally we could add N to the worklist, but this causes exponential
831  // compile time costs in large DAGs, e.g. Halide.
832  void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
833 };
834 
835 } // end anonymous namespace
836 
837 //===----------------------------------------------------------------------===//
838 // TargetLowering::DAGCombinerInfo implementation
839 //===----------------------------------------------------------------------===//
840 
842  ((DAGCombiner*)DC)->AddToWorklist(N);
843 }
844 
846 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
847  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
848 }
849 
851 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
852  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
853 }
854 
856 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
857  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
858 }
859 
862  return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
863 }
864 
867  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
868 }
869 
870 //===----------------------------------------------------------------------===//
871 // Helper Functions
872 //===----------------------------------------------------------------------===//
873 
874 void DAGCombiner::deleteAndRecombine(SDNode *N) {
875  removeFromWorklist(N);
876 
877  // If the operands of this node are only used by the node, they will now be
878  // dead. Make sure to re-visit them and recursively delete dead nodes.
879  for (const SDValue &Op : N->ops())
880  // For an operand generating multiple values, one of the values may
881  // become dead allowing further simplification (e.g. split index
882  // arithmetic from an indexed load).
883  if (Op->hasOneUse() || Op->getNumValues() > 1)
884  AddToWorklist(Op.getNode());
885 
886  DAG.DeleteNode(N);
887 }
888 
889 // APInts must be the same size for most operations, this helper
890 // function zero extends the shorter of the pair so that they match.
891 // We provide an Offset so that we can create bitwidths that won't overflow.
892 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
893  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
894  LHS = LHS.zext(Bits);
895  RHS = RHS.zext(Bits);
896 }
897 
898 // Return true if this node is a setcc, or is a select_cc
899 // that selects between the target values used for true and false, making it
900 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
901 // the appropriate nodes based on the type of node we are checking. This
902 // simplifies life a bit for the callers.
903 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
904  SDValue &CC, bool MatchStrict) const {
905  if (N.getOpcode() == ISD::SETCC) {
906  LHS = N.getOperand(0);
907  RHS = N.getOperand(1);
908  CC = N.getOperand(2);
909  return true;
910  }
911 
912  if (MatchStrict &&
913  (N.getOpcode() == ISD::STRICT_FSETCC ||
914  N.getOpcode() == ISD::STRICT_FSETCCS)) {
915  LHS = N.getOperand(1);
916  RHS = N.getOperand(2);
917  CC = N.getOperand(3);
918  return true;
919  }
920 
921  if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
922  !TLI.isConstFalseVal(N.getOperand(3)))
923  return false;
924 
925  if (TLI.getBooleanContents(N.getValueType()) ==
927  return false;
928 
929  LHS = N.getOperand(0);
930  RHS = N.getOperand(1);
931  CC = N.getOperand(4);
932  return true;
933 }
934 
935 /// Return true if this is a SetCC-equivalent operation with only one use.
936 /// If this is true, it allows the users to invert the operation for free when
937 /// it is profitable to do so.
938 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
939  SDValue N0, N1, N2;
940  if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
941  return true;
942  return false;
943 }
944 
945 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
946  if (!ScalarTy.isSimple())
947  return false;
948 
949  uint64_t MaskForTy = 0ULL;
950  switch (ScalarTy.getSimpleVT().SimpleTy) {
951  case MVT::i8:
952  MaskForTy = 0xFFULL;
953  break;
954  case MVT::i16:
955  MaskForTy = 0xFFFFULL;
956  break;
957  case MVT::i32:
958  MaskForTy = 0xFFFFFFFFULL;
959  break;
960  default:
961  return false;
962  break;
963  }
964 
965  APInt Val;
966  if (ISD::isConstantSplatVector(N, Val))
967  return Val.getLimitedValue() == MaskForTy;
968 
969  return false;
970 }
971 
972 // Determines if it is a constant integer or a splat/build vector of constant
973 // integers (and undefs).
974 // Do not permit build vector implicit truncation.
975 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
976  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
977  return !(Const->isOpaque() && NoOpaques);
978  if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
979  return false;
980  unsigned BitWidth = N.getScalarValueSizeInBits();
981  for (const SDValue &Op : N->op_values()) {
982  if (Op.isUndef())
983  continue;
984  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
985  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
986  (Const->isOpaque() && NoOpaques))
987  return false;
988  }
989  return true;
990 }
991 
992 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
993 // undef's.
994 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
995  if (V.getOpcode() != ISD::BUILD_VECTOR)
996  return false;
997  return isConstantOrConstantVector(V, NoOpaques) ||
999 }
1000 
1001 // Determine if this an indexed load with an opaque target constant index.
1002 static bool canSplitIdx(LoadSDNode *LD) {
1003  return MaySplitLoadIndex &&
1004  (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1005  !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1006 }
1007 
1008 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1009  const SDLoc &DL,
1010  SDNode *N,
1011  SDValue N0,
1012  SDValue N1) {
1013  // Currently this only tries to ensure we don't undo the GEP splits done by
1014  // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1015  // we check if the following transformation would be problematic:
1016  // (load/store (add, (add, x, offset1), offset2)) ->
1017  // (load/store (add, x, offset1+offset2)).
1018 
1019  // (load/store (add, (add, x, y), offset2)) ->
1020  // (load/store (add, (add, x, offset2), y)).
1021 
1022  if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1023  return false;
1024 
1025  auto *C2 = dyn_cast<ConstantSDNode>(N1);
1026  if (!C2)
1027  return false;
1028 
1029  const APInt &C2APIntVal = C2->getAPIntValue();
1030  if (C2APIntVal.getSignificantBits() > 64)
1031  return false;
1032 
1033  if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1034  if (N0.hasOneUse())
1035  return false;
1036 
1037  const APInt &C1APIntVal = C1->getAPIntValue();
1038  const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1039  if (CombinedValueIntVal.getSignificantBits() > 64)
1040  return false;
1041  const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1042 
1043  for (SDNode *Node : N->uses()) {
1044  if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1045  // Is x[offset2] already not a legal addressing mode? If so then
1046  // reassociating the constants breaks nothing (we test offset2 because
1047  // that's the one we hope to fold into the load or store).
1049  AM.HasBaseReg = true;
1050  AM.BaseOffs = C2APIntVal.getSExtValue();
1051  EVT VT = LoadStore->getMemoryVT();
1052  unsigned AS = LoadStore->getAddressSpace();
1053  Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1054  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1055  continue;
1056 
1057  // Would x[offset1+offset2] still be a legal addressing mode?
1058  AM.BaseOffs = CombinedValue;
1059  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1060  return true;
1061  }
1062  }
1063  } else {
1064  if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1065  if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1066  return false;
1067 
1068  for (SDNode *Node : N->uses()) {
1069  auto *LoadStore = dyn_cast<MemSDNode>(Node);
1070  if (!LoadStore)
1071  return false;
1072 
1073  // Is x[offset2] a legal addressing mode? If so then
1074  // reassociating the constants breaks address pattern
1076  AM.HasBaseReg = true;
1077  AM.BaseOffs = C2APIntVal.getSExtValue();
1078  EVT VT = LoadStore->getMemoryVT();
1079  unsigned AS = LoadStore->getAddressSpace();
1080  Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1081  if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1082  return false;
1083  }
1084  return true;
1085  }
1086 
1087  return false;
1088 }
1089 
1090 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1091 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1092 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1093  SDValue N0, SDValue N1) {
1094  EVT VT = N0.getValueType();
1095 
1096  if (N0.getOpcode() != Opc)
1097  return SDValue();
1098 
1099  SDValue N00 = N0.getOperand(0);
1100  SDValue N01 = N0.getOperand(1);
1101 
1104  // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1105  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1106  return DAG.getNode(Opc, DL, VT, N00, OpNode);
1107  return SDValue();
1108  }
1109  if (TLI.isReassocProfitable(DAG, N0, N1)) {
1110  // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1111  // iff (op x, c1) has one use
1112  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1);
1113  return DAG.getNode(Opc, DL, VT, OpNode, N01);
1114  }
1115  }
1116 
1117  // Check for repeated operand logic simplifications.
1118  if (Opc == ISD::AND || Opc == ISD::OR) {
1119  // (N00 & N01) & N00 --> N00 & N01
1120  // (N00 & N01) & N01 --> N00 & N01
1121  // (N00 | N01) | N00 --> N00 | N01
1122  // (N00 | N01) | N01 --> N00 | N01
1123  if (N1 == N00 || N1 == N01)
1124  return N0;
1125  }
1126  if (Opc == ISD::XOR) {
1127  // (N00 ^ N01) ^ N00 --> N01
1128  if (N1 == N00)
1129  return N01;
1130  // (N00 ^ N01) ^ N01 --> N00
1131  if (N1 == N01)
1132  return N00;
1133  }
1134 
1135  if (TLI.isReassocProfitable(DAG, N0, N1)) {
1136  if (N1 != N01) {
1137  // Reassociate if (op N00, N1) already exist
1138  if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1139  // if Op (Op N00, N1), N01 already exist
1140  // we need to stop reassciate to avoid dead loop
1141  if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1142  return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1143  }
1144  }
1145 
1146  if (N1 != N00) {
1147  // Reassociate if (op N01, N1) already exist
1148  if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1149  // if Op (Op N01, N1), N00 already exist
1150  // we need to stop reassciate to avoid dead loop
1151  if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1152  return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1153  }
1154  }
1155  }
1156 
1157  return SDValue();
1158 }
1159 
1160 // Try to reassociate commutative binops.
1161 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1162  SDValue N1, SDNodeFlags Flags) {
1163  assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1164 
1165  // Floating-point reassociation is not allowed without loose FP math.
1166  if (N0.getValueType().isFloatingPoint() ||
1168  if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1169  return SDValue();
1170 
1171  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1172  return Combined;
1173  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1174  return Combined;
1175  return SDValue();
1176 }
1177 
1178 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1179  bool AddTo) {
1180  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1181  ++NodesCombined;
1182  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1183  To[0].dump(&DAG);
1184  dbgs() << " and " << NumTo - 1 << " other values\n");
1185  for (unsigned i = 0, e = NumTo; i != e; ++i)
1186  assert((!To[i].getNode() ||
1187  N->getValueType(i) == To[i].getValueType()) &&
1188  "Cannot combine value to value of different type!");
1189 
1190  WorklistRemover DeadNodes(*this);
1191  DAG.ReplaceAllUsesWith(N, To);
1192  if (AddTo) {
1193  // Push the new nodes and any users onto the worklist
1194  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1195  if (To[i].getNode())
1196  AddToWorklistWithUsers(To[i].getNode());
1197  }
1198  }
1199 
1200  // Finally, if the node is now dead, remove it from the graph. The node
1201  // may not be dead if the replacement process recursively simplified to
1202  // something else needing this node.
1203  if (N->use_empty())
1204  deleteAndRecombine(N);
1205  return SDValue(N, 0);
1206 }
1207 
1208 void DAGCombiner::
1209 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1210  // Replace the old value with the new one.
1211  ++NodesCombined;
1212  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1213  dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1214 
1215  // Replace all uses.
1216  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1217 
1218  // Push the new node and any (possibly new) users onto the worklist.
1219  AddToWorklistWithUsers(TLO.New.getNode());
1220 
1221  // Finally, if the node is now dead, remove it from the graph.
1222  recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1223 }
1224 
1225 /// Check the specified integer node value to see if it can be simplified or if
1226 /// things it uses can be simplified by bit propagation. If so, return true.
1227 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1228  const APInt &DemandedElts,
1229  bool AssumeSingleUse) {
1230  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1231  KnownBits Known;
1232  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1233  AssumeSingleUse))
1234  return false;
1235 
1236  // Revisit the node.
1237  AddToWorklist(Op.getNode());
1238 
1239  CommitTargetLoweringOpt(TLO);
1240  return true;
1241 }
1242 
1243 /// Check the specified vector node value to see if it can be simplified or
1244 /// if things it uses can be simplified as it only uses some of the elements.
1245 /// If so, return true.
1246 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1247  const APInt &DemandedElts,
1248  bool AssumeSingleUse) {
1249  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1250  APInt KnownUndef, KnownZero;
1251  if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1252  TLO, 0, AssumeSingleUse))
1253  return false;
1254 
1255  // Revisit the node.
1256  AddToWorklist(Op.getNode());
1257 
1258  CommitTargetLoweringOpt(TLO);
1259  return true;
1260 }
1261 
1262 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1263  SDLoc DL(Load);
1264  EVT VT = Load->getValueType(0);
1265  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1266 
1267  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1268  Trunc.dump(&DAG); dbgs() << '\n');
1269 
1270  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1271  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1272 
1273  AddToWorklist(Trunc.getNode());
1274  recursivelyDeleteUnusedNodes(Load);
1275 }
1276 
1277 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1278  Replace = false;
1279  SDLoc DL(Op);
1280  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1281  LoadSDNode *LD = cast<LoadSDNode>(Op);
1282  EVT MemVT = LD->getMemoryVT();
1284  : LD->getExtensionType();
1285  Replace = true;
1286  return DAG.getExtLoad(ExtType, DL, PVT,
1287  LD->getChain(), LD->getBasePtr(),
1288  MemVT, LD->getMemOperand());
1289  }
1290 
1291  unsigned Opc = Op.getOpcode();
1292  switch (Opc) {
1293  default: break;
1294  case ISD::AssertSext:
1295  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1296  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1297  break;
1298  case ISD::AssertZext:
1299  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1300  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1301  break;
1302  case ISD::Constant: {
1303  unsigned ExtOpc =
1304  Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1305  return DAG.getNode(ExtOpc, DL, PVT, Op);
1306  }
1307  }
1308 
1309  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1310  return SDValue();
1311  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1312 }
1313 
1314 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1316  return SDValue();
1317  EVT OldVT = Op.getValueType();
1318  SDLoc DL(Op);
1319  bool Replace = false;
1320  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1321  if (!NewOp.getNode())
1322  return SDValue();
1323  AddToWorklist(NewOp.getNode());
1324 
1325  if (Replace)
1326  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1327  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1328  DAG.getValueType(OldVT));
1329 }
1330 
1331 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1332  EVT OldVT = Op.getValueType();
1333  SDLoc DL(Op);
1334  bool Replace = false;
1335  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1336  if (!NewOp.getNode())
1337  return SDValue();
1338  AddToWorklist(NewOp.getNode());
1339 
1340  if (Replace)
1341  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1342  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1343 }
1344 
1345 /// Promote the specified integer binary operation if the target indicates it is
1346 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1347 /// i32 since i16 instructions are longer.
1348 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1349  if (!LegalOperations)
1350  return SDValue();
1351 
1352  EVT VT = Op.getValueType();
1353  if (VT.isVector() || !VT.isInteger())
1354  return SDValue();
1355 
1356  // If operation type is 'undesirable', e.g. i16 on x86, consider
1357  // promoting it.
1358  unsigned Opc = Op.getOpcode();
1359  if (TLI.isTypeDesirableForOp(Opc, VT))
1360  return SDValue();
1361 
1362  EVT PVT = VT;
1363  // Consult target whether it is a good idea to promote this operation and
1364  // what's the right type to promote it to.
1365  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1366  assert(PVT != VT && "Don't know what type to promote to!");
1367 
1368  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1369 
1370  bool Replace0 = false;
1371  SDValue N0 = Op.getOperand(0);
1372  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1373 
1374  bool Replace1 = false;
1375  SDValue N1 = Op.getOperand(1);
1376  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1377  SDLoc DL(Op);
1378 
1379  SDValue RV =
1380  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1381 
1382  // We are always replacing N0/N1's use in N and only need additional
1383  // replacements if there are additional uses.
1384  // Note: We are checking uses of the *nodes* (SDNode) rather than values
1385  // (SDValue) here because the node may reference multiple values
1386  // (for example, the chain value of a load node).
1387  Replace0 &= !N0->hasOneUse();
1388  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1389 
1390  // Combine Op here so it is preserved past replacements.
1391  CombineTo(Op.getNode(), RV);
1392 
1393  // If operands have a use ordering, make sure we deal with
1394  // predecessor first.
1395  if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1396  std::swap(N0, N1);
1397  std::swap(NN0, NN1);
1398  }
1399 
1400  if (Replace0) {
1401  AddToWorklist(NN0.getNode());
1402  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1403  }
1404  if (Replace1) {
1405  AddToWorklist(NN1.getNode());
1406  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1407  }
1408  return Op;
1409  }
1410  return SDValue();
1411 }
1412 
1413 /// Promote the specified integer shift operation if the target indicates it is
1414 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1415 /// i32 since i16 instructions are longer.
1416 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1417  if (!LegalOperations)
1418  return SDValue();
1419 
1420  EVT VT = Op.getValueType();
1421  if (VT.isVector() || !VT.isInteger())
1422  return SDValue();
1423 
1424  // If operation type is 'undesirable', e.g. i16 on x86, consider
1425  // promoting it.
1426  unsigned Opc = Op.getOpcode();
1427  if (TLI.isTypeDesirableForOp(Opc, VT))
1428  return SDValue();
1429 
1430  EVT PVT = VT;
1431  // Consult target whether it is a good idea to promote this operation and
1432  // what's the right type to promote it to.
1433  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1434  assert(PVT != VT && "Don't know what type to promote to!");
1435 
1436  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1437 
1438  bool Replace = false;
1439  SDValue N0 = Op.getOperand(0);
1440  if (Opc == ISD::SRA)
1441  N0 = SExtPromoteOperand(N0, PVT);
1442  else if (Opc == ISD::SRL)
1443  N0 = ZExtPromoteOperand(N0, PVT);
1444  else
1445  N0 = PromoteOperand(N0, PVT, Replace);
1446 
1447  if (!N0.getNode())
1448  return SDValue();
1449 
1450  SDLoc DL(Op);
1451  SDValue N1 = Op.getOperand(1);
1452  SDValue RV =
1453  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1454 
1455  if (Replace)
1456  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1457 
1458  // Deal with Op being deleted.
1459  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1460  return RV;
1461  }
1462  return SDValue();
1463 }
1464 
1465 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1466  if (!LegalOperations)
1467  return SDValue();
1468 
1469  EVT VT = Op.getValueType();
1470  if (VT.isVector() || !VT.isInteger())
1471  return SDValue();
1472 
1473  // If operation type is 'undesirable', e.g. i16 on x86, consider
1474  // promoting it.
1475  unsigned Opc = Op.getOpcode();
1476  if (TLI.isTypeDesirableForOp(Opc, VT))
1477  return SDValue();
1478 
1479  EVT PVT = VT;
1480  // Consult target whether it is a good idea to promote this operation and
1481  // what's the right type to promote it to.
1482  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1483  assert(PVT != VT && "Don't know what type to promote to!");
1484  // fold (aext (aext x)) -> (aext x)
1485  // fold (aext (zext x)) -> (zext x)
1486  // fold (aext (sext x)) -> (sext x)
1487  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1488  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1489  }
1490  return SDValue();
1491 }
1492 
1493 bool DAGCombiner::PromoteLoad(SDValue Op) {
1494  if (!LegalOperations)
1495  return false;
1496 
1497  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1498  return false;
1499 
1500  EVT VT = Op.getValueType();
1501  if (VT.isVector() || !VT.isInteger())
1502  return false;
1503 
1504  // If operation type is 'undesirable', e.g. i16 on x86, consider
1505  // promoting it.
1506  unsigned Opc = Op.getOpcode();
1507  if (TLI.isTypeDesirableForOp(Opc, VT))
1508  return false;
1509 
1510  EVT PVT = VT;
1511  // Consult target whether it is a good idea to promote this operation and
1512  // what's the right type to promote it to.
1513  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1514  assert(PVT != VT && "Don't know what type to promote to!");
1515 
1516  SDLoc DL(Op);
1517  SDNode *N = Op.getNode();
1518  LoadSDNode *LD = cast<LoadSDNode>(N);
1519  EVT MemVT = LD->getMemoryVT();
1521  : LD->getExtensionType();
1522  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1523  LD->getChain(), LD->getBasePtr(),
1524  MemVT, LD->getMemOperand());
1525  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1526 
1527  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1528  Result.dump(&DAG); dbgs() << '\n');
1529 
1531  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1532 
1533  AddToWorklist(Result.getNode());
1534  recursivelyDeleteUnusedNodes(N);
1535  return true;
1536  }
1537 
1538  return false;
1539 }
1540 
1541 /// Recursively delete a node which has no uses and any operands for
1542 /// which it is the only use.
1543 ///
1544 /// Note that this both deletes the nodes and removes them from the worklist.
1545 /// It also adds any nodes who have had a user deleted to the worklist as they
1546 /// may now have only one use and subject to other combines.
1547 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1548  if (!N->use_empty())
1549  return false;
1550 
1552  Nodes.insert(N);
1553  do {
1554  N = Nodes.pop_back_val();
1555  if (!N)
1556  continue;
1557 
1558  if (N->use_empty()) {
1559  for (const SDValue &ChildN : N->op_values())
1560  Nodes.insert(ChildN.getNode());
1561 
1562  removeFromWorklist(N);
1563  DAG.DeleteNode(N);
1564  } else {
1565  AddToWorklist(N);
1566  }
1567  } while (!Nodes.empty());
1568  return true;
1569 }
1570 
1571 //===----------------------------------------------------------------------===//
1572 // Main DAG Combiner implementation
1573 //===----------------------------------------------------------------------===//
1574 
1575 void DAGCombiner::Run(CombineLevel AtLevel) {
1576  // set the instance variables, so that the various visit routines may use it.
1577  Level = AtLevel;
1578  LegalDAG = Level >= AfterLegalizeDAG;
1579  LegalOperations = Level >= AfterLegalizeVectorOps;
1580  LegalTypes = Level >= AfterLegalizeTypes;
1581 
1582  WorklistInserter AddNodes(*this);
1583 
1584  // Add all the dag nodes to the worklist.
1585  for (SDNode &Node : DAG.allnodes())
1586  AddToWorklist(&Node);
1587 
1588  // Create a dummy node (which is not added to allnodes), that adds a reference
1589  // to the root node, preventing it from being deleted, and tracking any
1590  // changes of the root.
1591  HandleSDNode Dummy(DAG.getRoot());
1592 
1593  // While we have a valid worklist entry node, try to combine it.
1594  while (SDNode *N = getNextWorklistEntry()) {
1595  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1596  // N is deleted from the DAG, since they too may now be dead or may have a
1597  // reduced number of uses, allowing other xforms.
1598  if (recursivelyDeleteUnusedNodes(N))
1599  continue;
1600 
1601  WorklistRemover DeadNodes(*this);
1602 
1603  // If this combine is running after legalizing the DAG, re-legalize any
1604  // nodes pulled off the worklist.
1605  if (LegalDAG) {
1606  SmallSetVector<SDNode *, 16> UpdatedNodes;
1607  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1608 
1609  for (SDNode *LN : UpdatedNodes)
1610  AddToWorklistWithUsers(LN);
1611 
1612  if (!NIsValid)
1613  continue;
1614  }
1615 
1616  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1617 
1618  // Add any operands of the new node which have not yet been combined to the
1619  // worklist as well. Because the worklist uniques things already, this
1620  // won't repeatedly process the same operand.
1621  CombinedNodes.insert(N);
1622  for (const SDValue &ChildN : N->op_values())
1623  if (!CombinedNodes.count(ChildN.getNode()))
1624  AddToWorklist(ChildN.getNode());
1625 
1626  SDValue RV = combine(N);
1627 
1628  if (!RV.getNode())
1629  continue;
1630 
1631  ++NodesCombined;
1632 
1633  // If we get back the same node we passed in, rather than a new node or
1634  // zero, we know that the node must have defined multiple values and
1635  // CombineTo was used. Since CombineTo takes care of the worklist
1636  // mechanics for us, we have no work to do in this case.
1637  if (RV.getNode() == N)
1638  continue;
1639 
1640  assert(N->getOpcode() != ISD::DELETED_NODE &&
1641  RV.getOpcode() != ISD::DELETED_NODE &&
1642  "Node was deleted but visit returned new node!");
1643 
1644  LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1645 
1646  if (N->getNumValues() == RV->getNumValues())
1647  DAG.ReplaceAllUsesWith(N, RV.getNode());
1648  else {
1649  assert(N->getValueType(0) == RV.getValueType() &&
1650  N->getNumValues() == 1 && "Type mismatch");
1651  DAG.ReplaceAllUsesWith(N, &RV);
1652  }
1653 
1654  // Push the new node and any users onto the worklist. Omit this if the
1655  // new node is the EntryToken (e.g. if a store managed to get optimized
1656  // out), because re-visiting the EntryToken and its users will not uncover
1657  // any additional opportunities, but there may be a large number of such
1658  // users, potentially causing compile time explosion.
1659  if (RV.getOpcode() != ISD::EntryToken) {
1660  AddToWorklist(RV.getNode());
1661  AddUsersToWorklist(RV.getNode());
1662  }
1663 
1664  // Finally, if the node is now dead, remove it from the graph. The node
1665  // may not be dead if the replacement process recursively simplified to
1666  // something else needing this node. This will also take care of adding any
1667  // operands which have lost a user to the worklist.
1668  recursivelyDeleteUnusedNodes(N);
1669  }
1670 
1671  // If the root changed (e.g. it was a dead load, update the root).
1672  DAG.setRoot(Dummy.getValue());
1673  DAG.RemoveDeadNodes();
1674 }
1675 
1676 SDValue DAGCombiner::visit(SDNode *N) {
1677  switch (N->getOpcode()) {
1678  default: break;
1679  case ISD::TokenFactor: return visitTokenFactor(N);
1680  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1681  case ISD::ADD: return visitADD(N);
1682  case ISD::SUB: return visitSUB(N);
1683  case ISD::SADDSAT:
1684  case ISD::UADDSAT: return visitADDSAT(N);
1685  case ISD::SSUBSAT:
1686  case ISD::USUBSAT: return visitSUBSAT(N);
1687  case ISD::ADDC: return visitADDC(N);
1688  case ISD::SADDO:
1689  case ISD::UADDO: return visitADDO(N);
1690  case ISD::SUBC: return visitSUBC(N);
1691  case ISD::SSUBO:
1692  case ISD::USUBO: return visitSUBO(N);
1693  case ISD::ADDE: return visitADDE(N);
1694  case ISD::ADDCARRY: return visitADDCARRY(N);
1695  case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1696  case ISD::SUBE: return visitSUBE(N);
1697  case ISD::SUBCARRY: return visitSUBCARRY(N);
1698  case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1699  case ISD::SMULFIX:
1700  case ISD::SMULFIXSAT:
1701  case ISD::UMULFIX:
1702  case ISD::UMULFIXSAT: return visitMULFIX(N);
1703  case ISD::MUL: return visitMUL(N);
1704  case ISD::SDIV: return visitSDIV(N);
1705  case ISD::UDIV: return visitUDIV(N);
1706  case ISD::SREM:
1707  case ISD::UREM: return visitREM(N);
1708  case ISD::MULHU: return visitMULHU(N);
1709  case ISD::MULHS: return visitMULHS(N);
1710  case ISD::AVGFLOORS:
1711  case ISD::AVGFLOORU:
1712  case ISD::AVGCEILS:
1713  case ISD::AVGCEILU: return visitAVG(N);
1714  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1715  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1716  case ISD::SMULO:
1717  case ISD::UMULO: return visitMULO(N);
1718  case ISD::SMIN:
1719  case ISD::SMAX:
1720  case ISD::UMIN:
1721  case ISD::UMAX: return visitIMINMAX(N);
1722  case ISD::AND: return visitAND(N);
1723  case ISD::OR: return visitOR(N);
1724  case ISD::XOR: return visitXOR(N);
1725  case ISD::SHL: return visitSHL(N);
1726  case ISD::SRA: return visitSRA(N);
1727  case ISD::SRL: return visitSRL(N);
1728  case ISD::ROTR:
1729  case ISD::ROTL: return visitRotate(N);
1730  case ISD::FSHL:
1731  case ISD::FSHR: return visitFunnelShift(N);
1732  case ISD::SSHLSAT:
1733  case ISD::USHLSAT: return visitSHLSAT(N);
1734  case ISD::ABS: return visitABS(N);
1735  case ISD::BSWAP: return visitBSWAP(N);
1736  case ISD::BITREVERSE: return visitBITREVERSE(N);
1737  case ISD::CTLZ: return visitCTLZ(N);
1738  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1739  case ISD::CTTZ: return visitCTTZ(N);
1740  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1741  case ISD::CTPOP: return visitCTPOP(N);
1742  case ISD::SELECT: return visitSELECT(N);
1743  case ISD::VSELECT: return visitVSELECT(N);
1744  case ISD::SELECT_CC: return visitSELECT_CC(N);
1745  case ISD::SETCC: return visitSETCC(N);
1746  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1747  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1748  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1749  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1750  case ISD::AssertSext:
1751  case ISD::AssertZext: return visitAssertExt(N);
1752  case ISD::AssertAlign: return visitAssertAlign(N);
1753  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1755  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1756  case ISD::TRUNCATE: return visitTRUNCATE(N);
1757  case ISD::BITCAST: return visitBITCAST(N);
1758  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1759  case ISD::FADD: return visitFADD(N);
1760  case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1761  case ISD::FSUB: return visitFSUB(N);
1762  case ISD::FMUL: return visitFMUL(N);
1763  case ISD::FMA: return visitFMA(N);
1764  case ISD::FDIV: return visitFDIV(N);
1765  case ISD::FREM: return visitFREM(N);
1766  case ISD::FSQRT: return visitFSQRT(N);
1767  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1768  case ISD::FPOW: return visitFPOW(N);
1769  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1770  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1771  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1772  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1773  case ISD::FP_ROUND: return visitFP_ROUND(N);
1774  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1775  case ISD::FNEG: return visitFNEG(N);
1776  case ISD::FABS: return visitFABS(N);
1777  case ISD::FFLOOR: return visitFFLOOR(N);
1778  case ISD::FMINNUM:
1779  case ISD::FMAXNUM:
1780  case ISD::FMINIMUM:
1781  case ISD::FMAXIMUM: return visitFMinMax(N);
1782  case ISD::FCEIL: return visitFCEIL(N);
1783  case ISD::FTRUNC: return visitFTRUNC(N);
1784  case ISD::BRCOND: return visitBRCOND(N);
1785  case ISD::BR_CC: return visitBR_CC(N);
1786  case ISD::LOAD: return visitLOAD(N);
1787  case ISD::STORE: return visitSTORE(N);
1788  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1789  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1790  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1791  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1792  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1793  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1794  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1795  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1796  case ISD::MGATHER: return visitMGATHER(N);
1797  case ISD::MLOAD: return visitMLOAD(N);
1798  case ISD::MSCATTER: return visitMSCATTER(N);
1799  case ISD::MSTORE: return visitMSTORE(N);
1800  case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1801  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1802  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1803  case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1804  case ISD::FREEZE: return visitFREEZE(N);
1805  case ISD::VECREDUCE_FADD:
1806  case ISD::VECREDUCE_FMUL:
1807  case ISD::VECREDUCE_ADD:
1808  case ISD::VECREDUCE_MUL:
1809  case ISD::VECREDUCE_AND:
1810  case ISD::VECREDUCE_OR:
1811  case ISD::VECREDUCE_XOR:
1812  case ISD::VECREDUCE_SMAX:
1813  case ISD::VECREDUCE_SMIN:
1814  case ISD::VECREDUCE_UMAX:
1815  case ISD::VECREDUCE_UMIN:
1816  case ISD::VECREDUCE_FMAX:
1817  case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1818 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1819 #include "llvm/IR/VPIntrinsics.def"
1820  return visitVPOp(N);
1821  }
1822  return SDValue();
1823 }
1824 
1826  SDValue RV;
1827  if (!DisableGenericCombines)
1828  RV = visit(N);
1829 
1830  // If nothing happened, try a target-specific DAG combine.
1831  if (!RV.getNode()) {
1832  assert(N->getOpcode() != ISD::DELETED_NODE &&
1833  "Node was deleted but visit returned NULL!");
1834 
1835  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1836  TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1837 
1838  // Expose the DAG combiner to the target combiner impls.
1840  DagCombineInfo(DAG, Level, false, this);
1841 
1842  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1843  }
1844  }
1845 
1846  // If nothing happened still, try promoting the operation.
1847  if (!RV.getNode()) {
1848  switch (N->getOpcode()) {
1849  default: break;
1850  case ISD::ADD:
1851  case ISD::SUB:
1852  case ISD::MUL:
1853  case ISD::AND:
1854  case ISD::OR:
1855  case ISD::XOR:
1856  RV = PromoteIntBinOp(SDValue(N, 0));
1857  break;
1858  case ISD::SHL:
1859  case ISD::SRA:
1860  case ISD::SRL:
1861  RV = PromoteIntShiftOp(SDValue(N, 0));
1862  break;
1863  case ISD::SIGN_EXTEND:
1864  case ISD::ZERO_EXTEND:
1865  case ISD::ANY_EXTEND:
1866  RV = PromoteExtend(SDValue(N, 0));
1867  break;
1868  case ISD::LOAD:
1869  if (PromoteLoad(SDValue(N, 0)))
1870  RV = SDValue(N, 0);
1871  break;
1872  }
1873  }
1874 
1875  // If N is a commutative binary node, try to eliminate it if the commuted
1876  // version is already present in the DAG.
1877  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
1878  SDValue N0 = N->getOperand(0);
1879  SDValue N1 = N->getOperand(1);
1880 
1881  // Constant operands are canonicalized to RHS.
1882  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1883  SDValue Ops[] = {N1, N0};
1884  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1885  N->getFlags());
1886  if (CSENode)
1887  return SDValue(CSENode, 0);
1888  }
1889  }
1890 
1891  return RV;
1892 }
1893 
1894 /// Given a node, return its input chain if it has one, otherwise return a null
1895 /// sd operand.
1897  if (unsigned NumOps = N->getNumOperands()) {
1898  if (N->getOperand(0).getValueType() == MVT::Other)
1899  return N->getOperand(0);
1900  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1901  return N->getOperand(NumOps-1);
1902  for (unsigned i = 1; i < NumOps-1; ++i)
1903  if (N->getOperand(i).getValueType() == MVT::Other)
1904  return N->getOperand(i);
1905  }
1906  return SDValue();
1907 }
1908 
1909 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1910  // If N has two operands, where one has an input chain equal to the other,
1911  // the 'other' chain is redundant.
1912  if (N->getNumOperands() == 2) {
1913  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1914  return N->getOperand(0);
1915  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1916  return N->getOperand(1);
1917  }
1918 
1919  // Don't simplify token factors if optnone.
1920  if (OptLevel == CodeGenOpt::None)
1921  return SDValue();
1922 
1923  // Don't simplify the token factor if the node itself has too many operands.
1924  if (N->getNumOperands() > TokenFactorInlineLimit)
1925  return SDValue();
1926 
1927  // If the sole user is a token factor, we should make sure we have a
1928  // chance to merge them together. This prevents TF chains from inhibiting
1929  // optimizations.
1930  if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1931  AddToWorklist(*(N->use_begin()));
1932 
1933  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1934  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1935  SmallPtrSet<SDNode*, 16> SeenOps;
1936  bool Changed = false; // If we should replace this token factor.
1937 
1938  // Start out with this token factor.
1939  TFs.push_back(N);
1940 
1941  // Iterate through token factors. The TFs grows when new token factors are
1942  // encountered.
1943  for (unsigned i = 0; i < TFs.size(); ++i) {
1944  // Limit number of nodes to inline, to avoid quadratic compile times.
1945  // We have to add the outstanding Token Factors to Ops, otherwise we might
1946  // drop Ops from the resulting Token Factors.
1947  if (Ops.size() > TokenFactorInlineLimit) {
1948  for (unsigned j = i; j < TFs.size(); j++)
1949  Ops.emplace_back(TFs[j], 0);
1950  // Drop unprocessed Token Factors from TFs, so we do not add them to the
1951  // combiner worklist later.
1952  TFs.resize(i);
1953  break;
1954  }
1955 
1956  SDNode *TF = TFs[i];
1957  // Check each of the operands.
1958  for (const SDValue &Op : TF->op_values()) {
1959  switch (Op.getOpcode()) {
1960  case ISD::EntryToken:
1961  // Entry tokens don't need to be added to the list. They are
1962  // redundant.
1963  Changed = true;
1964  break;
1965 
1966  case ISD::TokenFactor:
1967  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1968  // Queue up for processing.
1969  TFs.push_back(Op.getNode());
1970  Changed = true;
1971  break;
1972  }
1973  [[fallthrough]];
1974 
1975  default:
1976  // Only add if it isn't already in the list.
1977  if (SeenOps.insert(Op.getNode()).second)
1978  Ops.push_back(Op);
1979  else
1980  Changed = true;
1981  break;
1982  }
1983  }
1984  }
1985 
1986  // Re-visit inlined Token Factors, to clean them up in case they have been
1987  // removed. Skip the first Token Factor, as this is the current node.
1988  for (unsigned i = 1, e = TFs.size(); i < e; i++)
1989  AddToWorklist(TFs[i]);
1990 
1991  // Remove Nodes that are chained to another node in the list. Do so
1992  // by walking up chains breath-first stopping when we've seen
1993  // another operand. In general we must climb to the EntryNode, but we can exit
1994  // early if we find all remaining work is associated with just one operand as
1995  // no further pruning is possible.
1996 
1997  // List of nodes to search through and original Ops from which they originate.
1999  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2000  SmallPtrSet<SDNode *, 16> SeenChains;
2001  bool DidPruneOps = false;
2002 
2003  unsigned NumLeftToConsider = 0;
2004  for (const SDValue &Op : Ops) {
2005  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2006  OpWorkCount.push_back(1);
2007  }
2008 
2009  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2010  // If this is an Op, we can remove the op from the list. Remark any
2011  // search associated with it as from the current OpNumber.
2012  if (SeenOps.contains(Op)) {
2013  Changed = true;
2014  DidPruneOps = true;
2015  unsigned OrigOpNumber = 0;
2016  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2017  OrigOpNumber++;
2018  assert((OrigOpNumber != Ops.size()) &&
2019  "expected to find TokenFactor Operand");
2020  // Re-mark worklist from OrigOpNumber to OpNumber
2021  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2022  if (Worklist[i].second == OrigOpNumber) {
2023  Worklist[i].second = OpNumber;
2024  }
2025  }
2026  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2027  OpWorkCount[OrigOpNumber] = 0;
2028  NumLeftToConsider--;
2029  }
2030  // Add if it's a new chain
2031  if (SeenChains.insert(Op).second) {
2032  OpWorkCount[OpNumber]++;
2033  Worklist.push_back(std::make_pair(Op, OpNumber));
2034  }
2035  };
2036 
2037  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2038  // We need at least be consider at least 2 Ops to prune.
2039  if (NumLeftToConsider <= 1)
2040  break;
2041  auto CurNode = Worklist[i].first;
2042  auto CurOpNumber = Worklist[i].second;
2043  assert((OpWorkCount[CurOpNumber] > 0) &&
2044  "Node should not appear in worklist");
2045  switch (CurNode->getOpcode()) {
2046  case ISD::EntryToken:
2047  // Hitting EntryToken is the only way for the search to terminate without
2048  // hitting
2049  // another operand's search. Prevent us from marking this operand
2050  // considered.
2051  NumLeftToConsider++;
2052  break;
2053  case ISD::TokenFactor:
2054  for (const SDValue &Op : CurNode->op_values())
2055  AddToWorklist(i, Op.getNode(), CurOpNumber);
2056  break;
2057  case ISD::LIFETIME_START:
2058  case ISD::LIFETIME_END:
2059  case ISD::CopyFromReg:
2060  case ISD::CopyToReg:
2061  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2062  break;
2063  default:
2064  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2065  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2066  break;
2067  }
2068  OpWorkCount[CurOpNumber]--;
2069  if (OpWorkCount[CurOpNumber] == 0)
2070  NumLeftToConsider--;
2071  }
2072 
2073  // If we've changed things around then replace token factor.
2074  if (Changed) {
2075  SDValue Result;
2076  if (Ops.empty()) {
2077  // The entry token is the only possible outcome.
2078  Result = DAG.getEntryNode();
2079  } else {
2080  if (DidPruneOps) {
2081  SmallVector<SDValue, 8> PrunedOps;
2082  //
2083  for (const SDValue &Op : Ops) {
2084  if (SeenChains.count(Op.getNode()) == 0)
2085  PrunedOps.push_back(Op);
2086  }
2087  Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2088  } else {
2089  Result = DAG.getTokenFactor(SDLoc(N), Ops);
2090  }
2091  }
2092  return Result;
2093  }
2094  return SDValue();
2095 }
2096 
2097 /// MERGE_VALUES can always be eliminated.
2098 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2099  WorklistRemover DeadNodes(*this);
2100  // Replacing results may cause a different MERGE_VALUES to suddenly
2101  // be CSE'd with N, and carry its uses with it. Iterate until no
2102  // uses remain, to ensure that the node can be safely deleted.
2103  // First add the users of this node to the work list so that they
2104  // can be tried again once they have new operands.
2105  AddUsersToWorklist(N);
2106  do {
2107  // Do as a single replacement to avoid rewalking use lists.
2109  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2110  Ops.push_back(N->getOperand(i));
2111  DAG.ReplaceAllUsesWith(N, Ops.data());
2112  } while (!N->use_empty());
2113  deleteAndRecombine(N);
2114  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2115 }
2116 
2117 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2118 /// ConstantSDNode pointer else nullptr.
2120  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2121  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2122 }
2123 
2124 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2125 /// and that N may be folded in the load / store addressing mode.
2127  const TargetLowering &TLI) {
2128  EVT VT;
2129  unsigned AS;
2130 
2131  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2132  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2133  return false;
2134  VT = LD->getMemoryVT();
2135  AS = LD->getAddressSpace();
2136  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2137  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2138  return false;
2139  VT = ST->getMemoryVT();
2140  AS = ST->getAddressSpace();
2141  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2142  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2143  return false;
2144  VT = LD->getMemoryVT();
2145  AS = LD->getAddressSpace();
2146  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2147  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2148  return false;
2149  VT = ST->getMemoryVT();
2150  AS = ST->getAddressSpace();
2151  } else {
2152  return false;
2153  }
2154 
2156  if (N->getOpcode() == ISD::ADD) {
2157  AM.HasBaseReg = true;
2158  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2159  if (Offset)
2160  // [reg +/- imm]
2161  AM.BaseOffs = Offset->getSExtValue();
2162  else
2163  // [reg +/- reg]
2164  AM.Scale = 1;
2165  } else if (N->getOpcode() == ISD::SUB) {
2166  AM.HasBaseReg = true;
2167  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2168  if (Offset)
2169  // [reg +/- imm]
2170  AM.BaseOffs = -Offset->getSExtValue();
2171  else
2172  // [reg +/- reg]
2173  AM.Scale = 1;
2174  } else {
2175  return false;
2176  }
2177 
2178  return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2179  VT.getTypeForEVT(*DAG.getContext()), AS);
2180 }
2181 
2182 /// This inverts a canonicalization in IR that replaces a variable select arm
2183 /// with an identity constant. Codegen improves if we re-use the variable
2184 /// operand rather than load a constant. This can also be converted into a
2185 /// masked vector operation if the target supports it.
2187  bool ShouldCommuteOperands) {
2188  // Match a select as operand 1. The identity constant that we are looking for
2189  // is only valid as operand 1 of a non-commutative binop.
2190  SDValue N0 = N->getOperand(0);
2191  SDValue N1 = N->getOperand(1);
2192  if (ShouldCommuteOperands)
2193  std::swap(N0, N1);
2194 
2195  // TODO: Should this apply to scalar select too?
2196  if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2197  return SDValue();
2198 
2199  // We can't hoist div/rem because of immediate UB (not speculatable).
2200  unsigned Opcode = N->getOpcode();
2201  if (!DAG.isSafeToSpeculativelyExecute(Opcode))
2202  return SDValue();
2203 
2204  EVT VT = N->getValueType(0);
2205  SDValue Cond = N1.getOperand(0);
2206  SDValue TVal = N1.getOperand(1);
2207  SDValue FVal = N1.getOperand(2);
2208 
2209  // This transform increases uses of N0, so freeze it to be safe.
2210  // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2211  unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2212  if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2213  SDValue F0 = DAG.getFreeze(N0);
2214  SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2215  return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2216  }
2217  // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2218  if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2219  SDValue F0 = DAG.getFreeze(N0);
2220  SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2221  return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2222  }
2223 
2224  return SDValue();
2225 }
2226 
2227 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2228  assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2229  "Unexpected binary operator");
2230 
2231  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2232  auto BinOpcode = BO->getOpcode();
2233  EVT VT = BO->getValueType(0);
2234  if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2235  if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2236  return Sel;
2237 
2238  if (TLI.isCommutativeBinOp(BO->getOpcode()))
2239  if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2240  return Sel;
2241  }
2242 
2243  // Don't do this unless the old select is going away. We want to eliminate the
2244  // binary operator, not replace a binop with a select.
2245  // TODO: Handle ISD::SELECT_CC.
2246  unsigned SelOpNo = 0;
2247  SDValue Sel = BO->getOperand(0);
2248  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2249  SelOpNo = 1;
2250  Sel = BO->getOperand(1);
2251  }
2252 
2253  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2254  return SDValue();
2255 
2256  SDValue CT = Sel.getOperand(1);
2257  if (!isConstantOrConstantVector(CT, true) &&
2259  return SDValue();
2260 
2261  SDValue CF = Sel.getOperand(2);
2262  if (!isConstantOrConstantVector(CF, true) &&
2264  return SDValue();
2265 
2266  // Bail out if any constants are opaque because we can't constant fold those.
2267  // The exception is "and" and "or" with either 0 or -1 in which case we can
2268  // propagate non constant operands into select. I.e.:
2269  // and (select Cond, 0, -1), X --> select Cond, 0, X
2270  // or X, (select Cond, -1, 0) --> select Cond, -1, X
2271  bool CanFoldNonConst =
2272  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2275 
2276  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2277  if (!CanFoldNonConst &&
2278  !isConstantOrConstantVector(CBO, true) &&
2280  return SDValue();
2281 
2282  SDLoc DL(Sel);
2283  SDValue NewCT, NewCF;
2284 
2285  if (CanFoldNonConst) {
2286  // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2287  if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2288  (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2289  NewCT = CT;
2290  else
2291  NewCT = CBO;
2292 
2293  if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2294  (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2295  NewCF = CF;
2296  else
2297  NewCF = CBO;
2298  } else {
2299  // We have a select-of-constants followed by a binary operator with a
2300  // constant. Eliminate the binop by pulling the constant math into the
2301  // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2302  // CBO, CF + CBO
2303  NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2304  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2305  if (!CanFoldNonConst && !NewCT.isUndef() &&
2306  !isConstantOrConstantVector(NewCT, true) &&
2308  return SDValue();
2309 
2310  NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2311  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2312  if (!CanFoldNonConst && !NewCF.isUndef() &&
2313  !isConstantOrConstantVector(NewCF, true) &&
2315  return SDValue();
2316  }
2317 
2318  SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2319  SelectOp->setFlags(BO->getFlags());
2320  return SelectOp;
2321 }
2322 
2324  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2325  "Expecting add or sub");
2326 
2327  // Match a constant operand and a zext operand for the math instruction:
2328  // add Z, C
2329  // sub C, Z
2330  bool IsAdd = N->getOpcode() == ISD::ADD;
2331  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2332  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2333  auto *CN = dyn_cast<ConstantSDNode>(C);
2334  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2335  return SDValue();
2336 
2337  // Match the zext operand as a setcc of a boolean.
2338  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2339  Z.getOperand(0).getValueType() != MVT::i1)
2340  return SDValue();
2341 
2342  // Match the compare as: setcc (X & 1), 0, eq.
2343  SDValue SetCC = Z.getOperand(0);
2344  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2345  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2346  SetCC.getOperand(0).getOpcode() != ISD::AND ||
2347  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2348  return SDValue();
2349 
2350  // We are adding/subtracting a constant and an inverted low bit. Turn that
2351  // into a subtract/add of the low bit with incremented/decremented constant:
2352  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2353  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2354  EVT VT = C.getValueType();
2355  SDLoc DL(N);
2356  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2357  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2358  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2359  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2360 }
2361 
2362 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2363 /// a shift and add with a different constant.
2365  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2366  "Expecting add or sub");
2367 
2368  // We need a constant operand for the add/sub, and the other operand is a
2369  // logical shift right: add (srl), C or sub C, (srl).
2370  bool IsAdd = N->getOpcode() == ISD::ADD;
2371  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2372  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2373  if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2374  ShiftOp.getOpcode() != ISD::SRL)
2375  return SDValue();
2376 
2377  // The shift must be of a 'not' value.
2378  SDValue Not = ShiftOp.getOperand(0);
2379  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2380  return SDValue();
2381 
2382  // The shift must be moving the sign bit to the least-significant-bit.
2383  EVT VT = ShiftOp.getValueType();
2384  SDValue ShAmt = ShiftOp.getOperand(1);
2385  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2386  if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2387  return SDValue();
2388 
2389  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2390  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2391  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2392  SDLoc DL(N);
2393  if (SDValue NewC = DAG.FoldConstantArithmetic(
2394  IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2395  {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2396  SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2397  Not.getOperand(0), ShAmt);
2398  return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2399  }
2400 
2401  return SDValue();
2402 }
2403 
2404 static bool isADDLike(SDValue V, const SelectionDAG &DAG) {
2405  unsigned Opcode = V.getOpcode();
2406  if (Opcode == ISD::OR)
2407  return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1));
2408  if (Opcode == ISD::XOR)
2409  return isMinSignedConstant(V.getOperand(1));
2410  return false;
2411 }
2412 
2413 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2414 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2415 /// are no common bits set in the operands).
2416 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2417  SDValue N0 = N->getOperand(0);
2418  SDValue N1 = N->getOperand(1);
2419  EVT VT = N0.getValueType();
2420  SDLoc DL(N);
2421 
2422  // fold (add x, undef) -> undef
2423  if (N0.isUndef())
2424  return N0;
2425  if (N1.isUndef())
2426  return N1;
2427 
2428  // fold (add c1, c2) -> c1+c2
2429  if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2430  return C;
2431 
2432  // canonicalize constant to RHS
2435  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2436 
2437  // fold vector ops
2438  if (VT.isVector()) {
2439  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2440  return FoldedVOp;
2441 
2442  // fold (add x, 0) -> x, vector edition
2444  return N0;
2445  }
2446 
2447  // fold (add x, 0) -> x
2448  if (isNullConstant(N1))
2449  return N0;
2450 
2451  if (N0.getOpcode() == ISD::SUB) {
2452  SDValue N00 = N0.getOperand(0);
2453  SDValue N01 = N0.getOperand(1);
2454 
2455  // fold ((A-c1)+c2) -> (A+(c2-c1))
2456  if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2457  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2458 
2459  // fold ((c1-A)+c2) -> (c1+c2)-A
2460  if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2461  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2462  }
2463 
2464  // add (sext i1 X), 1 -> zext (not i1 X)
2465  // We don't transform this pattern:
2466  // add (zext i1 X), -1 -> sext (not i1 X)
2467  // because most (?) targets generate better code for the zext form.
2468  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2469  isOneOrOneSplat(N1)) {
2470  SDValue X = N0.getOperand(0);
2471  if ((!LegalOperations ||
2472  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2473  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2474  X.getScalarValueSizeInBits() == 1) {
2475  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2476  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2477  }
2478  }
2479 
2480  // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2481  // iff (or x, c0) is equivalent to (add x, c0).
2482  // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2483  // iff (xor x, c0) is equivalent to (add x, c0).
2484  if (isADDLike(N0, DAG)) {
2485  SDValue N01 = N0.getOperand(1);
2486  if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2487  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2488  }
2489 
2490  if (SDValue NewSel = foldBinOpIntoSelect(N))
2491  return NewSel;
2492 
2493  // reassociate add
2494  if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2495  if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2496  return RADD;
2497 
2498  // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2499  // equivalent to (add x, c).
2500  // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2501  // equivalent to (add x, c).
2502  auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2503  if (isADDLike(N0, DAG) && N0.hasOneUse() &&
2504  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2505  return DAG.getNode(ISD::ADD, DL, VT,
2506  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2507  N0.getOperand(1));
2508  }
2509  return SDValue();
2510  };
2511  if (SDValue Add = ReassociateAddOr(N0, N1))
2512  return Add;
2513  if (SDValue Add = ReassociateAddOr(N1, N0))
2514  return Add;
2515  }
2516  // fold ((0-A) + B) -> B-A
2517  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2518  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2519 
2520  // fold (A + (0-B)) -> A-B
2521  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2522  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2523 
2524  // fold (A+(B-A)) -> B
2525  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2526  return N1.getOperand(0);
2527 
2528  // fold ((B-A)+A) -> B
2529  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2530  return N0.getOperand(0);
2531 
2532  // fold ((A-B)+(C-A)) -> (C-B)
2533  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2534  N0.getOperand(0) == N1.getOperand(1))
2535  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2536  N0.getOperand(1));
2537 
2538  // fold ((A-B)+(B-C)) -> (A-C)
2539  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2540  N0.getOperand(1) == N1.getOperand(0))
2541  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2542  N1.getOperand(1));
2543 
2544  // fold (A+(B-(A+C))) to (B-C)
2545  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2546  N0 == N1.getOperand(1).getOperand(0))
2547  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2548  N1.getOperand(1).getOperand(1));
2549 
2550  // fold (A+(B-(C+A))) to (B-C)
2551  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2552  N0 == N1.getOperand(1).getOperand(1))
2553  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2554  N1.getOperand(1).getOperand(0));
2555 
2556  // fold (A+((B-A)+or-C)) to (B+or-C)
2557  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2558  N1.getOperand(0).getOpcode() == ISD::SUB &&
2559  N0 == N1.getOperand(0).getOperand(1))
2560  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2561  N1.getOperand(1));
2562 
2563  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2564  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2565  N0->hasOneUse() && N1->hasOneUse()) {
2566  SDValue N00 = N0.getOperand(0);
2567  SDValue N01 = N0.getOperand(1);
2568  SDValue N10 = N1.getOperand(0);
2569  SDValue N11 = N1.getOperand(1);
2570 
2572  return DAG.getNode(ISD::SUB, DL, VT,
2573  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2574  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2575  }
2576 
2577  // fold (add (umax X, C), -C) --> (usubsat X, C)
2578  if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2579  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2580  return (!Max && !Op) ||
2581  (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2582  };
2583  if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2584  /*AllowUndefs*/ true))
2585  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2586  N0.getOperand(1));
2587  }
2588 
2589  if (SimplifyDemandedBits(SDValue(N, 0)))
2590  return SDValue(N, 0);
2591 
2592  if (isOneOrOneSplat(N1)) {
2593  // fold (add (xor a, -1), 1) -> (sub 0, a)
2594  if (isBitwiseNot(N0))
2595  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2596  N0.getOperand(0));
2597 
2598  // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2599  if (N0.getOpcode() == ISD::ADD) {
2600  SDValue A, Xor;
2601 
2602  if (isBitwiseNot(N0.getOperand(0))) {
2603  A = N0.getOperand(1);
2604  Xor = N0.getOperand(0);
2605  } else if (isBitwiseNot(N0.getOperand(1))) {
2606  A = N0.getOperand(0);
2607  Xor = N0.getOperand(1);
2608  }
2609 
2610  if (Xor)
2611  return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2612  }
2613 
2614  // Look for:
2615  // add (add x, y), 1
2616  // And if the target does not like this form then turn into:
2617  // sub y, (xor x, -1)
2618  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2619  N0.hasOneUse()) {
2620  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2621  DAG.getAllOnesConstant(DL, VT));
2622  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2623  }
2624  }
2625 
2626  // (x - y) + -1 -> add (xor y, -1), x
2627  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2629  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2630  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2631  }
2632 
2633  if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2634  return Combined;
2635 
2636  if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2637  return Combined;
2638 
2639  return SDValue();
2640 }
2641 
2642 SDValue DAGCombiner::visitADD(SDNode *N) {
2643  SDValue N0 = N->getOperand(0);
2644  SDValue N1 = N->getOperand(1);
2645  EVT VT = N0.getValueType();
2646  SDLoc DL(N);
2647 
2648  if (SDValue Combined = visitADDLike(N))
2649  return Combined;
2650 
2651  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2652  return V;
2653 
2654  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2655  return V;
2656 
2657  // fold (a+b) -> (a|b) iff a and b share no bits.
2658  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2659  DAG.haveNoCommonBitsSet(N0, N1))
2660  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2661 
2662  // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2663  if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2664  const APInt &C0 = N0->getConstantOperandAPInt(0);
2665  const APInt &C1 = N1->getConstantOperandAPInt(0);
2666  return DAG.getVScale(DL, VT, C0 + C1);
2667  }
2668 
2669  // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2670  if (N0.getOpcode() == ISD::ADD &&
2671  N0.getOperand(1).getOpcode() == ISD::VSCALE &&
2672  N1.getOpcode() == ISD::VSCALE) {
2673  const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2674  const APInt &VS1 = N1->getConstantOperandAPInt(0);
2675  SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2676  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2677  }
2678 
2679  // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2680  if (N0.getOpcode() == ISD::STEP_VECTOR &&
2681  N1.getOpcode() == ISD::STEP_VECTOR) {
2682  const APInt &C0 = N0->getConstantOperandAPInt(0);
2683  const APInt &C1 = N1->getConstantOperandAPInt(0);
2684  APInt NewStep = C0 + C1;
2685  return DAG.getStepVector(DL, VT, NewStep);
2686  }
2687 
2688  // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2689  if (N0.getOpcode() == ISD::ADD &&
2690  N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR &&
2691  N1.getOpcode() == ISD::STEP_VECTOR) {
2692  const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2693  const APInt &SV1 = N1->getConstantOperandAPInt(0);
2694  APInt NewStep = SV0 + SV1;
2695  SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2696  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2697  }
2698 
2699  return SDValue();
2700 }
2701 
2702 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2703  unsigned Opcode = N->getOpcode();
2704  SDValue N0 = N->getOperand(0);
2705  SDValue N1 = N->getOperand(1);
2706  EVT VT = N0.getValueType();
2707  SDLoc DL(N);
2708 
2709  // fold (add_sat x, undef) -> -1
2710  if (N0.isUndef() || N1.isUndef())
2711  return DAG.getAllOnesConstant(DL, VT);
2712 
2713  // fold (add_sat c1, c2) -> c3
2714  if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2715  return C;
2716 
2717  // canonicalize constant to RHS
2720  return DAG.getNode(Opcode, DL, VT, N1, N0);
2721 
2722  // fold vector ops
2723  if (VT.isVector()) {
2724  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2725  return FoldedVOp;
2726 
2727  // fold (add_sat x, 0) -> x, vector edition
2729  return N0;
2730  }
2731 
2732  // fold (add_sat x, 0) -> x
2733  if (isNullConstant(N1))
2734  return N0;
2735 
2736  // If it cannot overflow, transform into an add.
2737  if (Opcode == ISD::UADDSAT)
2738  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2739  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2740 
2741  return SDValue();
2742 }
2743 
2744 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2745  bool Masked = false;
2746 
2747  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2748  while (true) {
2749  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2750  V = V.getOperand(0);
2751  continue;
2752  }
2753 
2754  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2755  Masked = true;
2756  V = V.getOperand(0);
2757  continue;
2758  }
2759 
2760  break;
2761  }
2762 
2763  // If this is not a carry, return.
2764  if (V.getResNo() != 1)
2765  return SDValue();
2766 
2767  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2768  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2769  return SDValue();
2770 
2771  EVT VT = V->getValueType(0);
2772  if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2773  return SDValue();
2774 
2775  // If the result is masked, then no matter what kind of bool it is we can
2776  // return. If it isn't, then we need to make sure the bool type is either 0 or
2777  // 1 and not other values.
2778  if (Masked ||
2779  TLI.getBooleanContents(V.getValueType()) ==
2781  return V;
2782 
2783  return SDValue();
2784 }
2785 
2786 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2787 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2788 /// the opcode and bypass the mask operation.
2789 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2790  SelectionDAG &DAG, const SDLoc &DL) {
2791  if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2792  return SDValue();
2793 
2794  EVT VT = N0.getValueType();
2795  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2796  return SDValue();
2797 
2798  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2799  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2800  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2801 }
2802 
2803 /// Helper for doing combines based on N0 and N1 being added to each other.
2804 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2805  SDNode *LocReference) {
2806  EVT VT = N0.getValueType();
2807  SDLoc DL(LocReference);
2808 
2809  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2810  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2812  return DAG.getNode(ISD::SUB, DL, VT, N0,
2813  DAG.getNode(ISD::SHL, DL, VT,
2814  N1.getOperand(0).getOperand(1),
2815  N1.getOperand(1)));
2816 
2817  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2818  return V;
2819 
2820  // Look for:
2821  // add (add x, 1), y
2822  // And if the target does not like this form then turn into:
2823  // sub y, (xor x, -1)
2824  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2825  N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) {
2826  SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2827  DAG.getAllOnesConstant(DL, VT));
2828  return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2829  }
2830 
2831  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
2832  // Hoist one-use subtraction by non-opaque constant:
2833  // (x - C) + y -> (x + y) - C
2834  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2835  if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2836  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2837  return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2838  }
2839  // Hoist one-use subtraction from non-opaque constant:
2840  // (C - x) + y -> (y - x) + C
2841  if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2842  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2843  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2844  }
2845  }
2846 
2847  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2848  // rather than 'add 0/-1' (the zext should get folded).
2849  // add (sext i1 Y), X --> sub X, (zext i1 Y)
2850  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2851  N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2853  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2854  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2855  }
2856 
2857  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2858  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2859  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2860  if (TN->getVT() == MVT::i1) {
2861  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2862  DAG.getConstant(1, DL, VT));
2863  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2864  }
2865  }
2866 
2867  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2868  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2869  N1.getResNo() == 0)
2870  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2871  N0, N1.getOperand(0), N1.getOperand(2));
2872 
2873  // (add X, Carry) -> (addcarry X, 0, Carry)
2875  if (SDValue Carry = getAsCarry(TLI, N1))
2876  return DAG.getNode(ISD::ADDCARRY, DL,
2877  DAG.getVTList(VT, Carry.getValueType()), N0,
2878  DAG.getConstant(0, DL, VT), Carry);
2879 
2880  return SDValue();
2881 }
2882 
2883 SDValue DAGCombiner::visitADDC(SDNode *N) {
2884  SDValue N0 = N->getOperand(0);
2885  SDValue N1 = N->getOperand(1);
2886  EVT VT = N0.getValueType();
2887  SDLoc DL(N);
2888 
2889  // If the flag result is dead, turn this into an ADD.
2890  if (!N->hasAnyUseOfValue(1))
2891  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2893 
2894  // canonicalize constant to RHS.
2895  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2896  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2897  if (N0C && !N1C)
2898  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2899 
2900  // fold (addc x, 0) -> x + no carry out
2901  if (isNullConstant(N1))
2902  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2903  DL, MVT::Glue));
2904 
2905  // If it cannot overflow, transform into an add.
2906  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2907  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2909 
2910  return SDValue();
2911 }
2912 
2913 /**
2914  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2915  * then the flip also occurs if computing the inverse is the same cost.
2916  * This function returns an empty SDValue in case it cannot flip the boolean
2917  * without increasing the cost of the computation. If you want to flip a boolean
2918  * no matter what, use DAG.getLogicalNOT.
2919  */
2921  const TargetLowering &TLI,
2922  bool Force) {
2923  if (Force && isa<ConstantSDNode>(V))
2924  return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2925 
2926  if (V.getOpcode() != ISD::XOR)
2927  return SDValue();
2928 
2929  ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2930  if (!Const)
2931  return SDValue();
2932 
2933  EVT VT = V.getValueType();
2934 
2935  bool IsFlip = false;
2936  switch(TLI.getBooleanContents(VT)) {
2938  IsFlip = Const->isOne();
2939  break;
2941  IsFlip = Const->isAllOnes();
2942  break;
2944  IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2945  break;
2946  }
2947 
2948  if (IsFlip)
2949  return V.getOperand(0);
2950  if (Force)
2951  return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2952  return SDValue();
2953 }
2954 
2955 SDValue DAGCombiner::visitADDO(SDNode *N) {
2956  SDValue N0 = N->getOperand(0);
2957  SDValue N1 = N->getOperand(1);
2958  EVT VT = N0.getValueType();
2959  bool IsSigned = (ISD::SADDO == N->getOpcode());
2960 
2961  EVT CarryVT = N->getValueType(1);
2962  SDLoc DL(N);
2963 
2964  // If the flag result is dead, turn this into an ADD.
2965  if (!N->hasAnyUseOfValue(1))
2966  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2967  DAG.getUNDEF(CarryVT));
2968 
2969  // canonicalize constant to RHS.
2972  return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2973 
2974  // fold (addo x, 0) -> x + no carry out
2975  if (isNullOrNullSplat(N1))
2976  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2977 
2978  if (!IsSigned) {
2979  // If it cannot overflow, transform into an add.
2980  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2981  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2982  DAG.getConstant(0, DL, CarryVT));
2983 
2984  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2985  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2986  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2987  DAG.getConstant(0, DL, VT), N0.getOperand(0));
2988  return CombineTo(
2989  N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2990  }
2991 
2992  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2993  return Combined;
2994 
2995  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2996  return Combined;
2997  }
2998 
2999  return SDValue();
3000 }
3001 
3002 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3003  EVT VT = N0.getValueType();
3004  if (VT.isVector())
3005  return SDValue();
3006 
3007  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
3008  // If Y + 1 cannot overflow.
3009  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
3010  SDValue Y = N1.getOperand(0);
3011  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3013  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
3014  N1.getOperand(2));
3015  }
3016 
3017  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
3019  if (SDValue Carry = getAsCarry(TLI, N1))
3020  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
3021  DAG.getConstant(0, SDLoc(N), VT), Carry);
3022 
3023  return SDValue();
3024 }
3025 
3026 SDValue DAGCombiner::visitADDE(SDNode *N) {
3027  SDValue N0 = N->getOperand(0);
3028  SDValue N1 = N->getOperand(1);
3029  SDValue CarryIn = N->getOperand(2);
3030 
3031  // canonicalize constant to RHS
3032  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3033  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3034  if (N0C && !N1C)
3035  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3036  N1, N0, CarryIn);
3037 
3038  // fold (adde x, y, false) -> (addc x, y)
3039  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3040  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3041 
3042  return SDValue();
3043 }
3044 
3045 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
3046  SDValue N0 = N->getOperand(0);
3047  SDValue N1 = N->getOperand(1);
3048  SDValue CarryIn = N->getOperand(2);
3049  SDLoc DL(N);
3050 
3051  // canonicalize constant to RHS
3052  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3053  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3054  if (N0C && !N1C)
3055  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
3056 
3057  // fold (addcarry x, y, false) -> (uaddo x, y)
3058  if (isNullConstant(CarryIn)) {
3059  if (!LegalOperations ||
3060  TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3061  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3062  }
3063 
3064  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3065  if (isNullConstant(N0) && isNullConstant(N1)) {
3066  EVT VT = N0.getValueType();
3067  EVT CarryVT = CarryIn.getValueType();
3068  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3069  AddToWorklist(CarryExt.getNode());
3070  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3071  DAG.getConstant(1, DL, VT)),
3072  DAG.getConstant(0, DL, CarryVT));
3073  }
3074 
3075  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
3076  return Combined;
3077 
3078  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
3079  return Combined;
3080 
3081  // We want to avoid useless duplication.
3082  // TODO: This is done automatically for binary operations. As ADDCARRY is
3083  // not a binary operation, this is not really possible to leverage this
3084  // existing mechanism for it. However, if more operations require the same
3085  // deduplication logic, then it may be worth generalize.
3086  SDValue Ops[] = {N1, N0, CarryIn};
3087  SDNode *CSENode =
3088  DAG.getNodeIfExists(ISD::ADDCARRY, N->getVTList(), Ops, N->getFlags());
3089  if (CSENode)
3090  return SDValue(CSENode, 0);
3091 
3092  return SDValue();
3093 }
3094 
3095 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3096  SDValue N0 = N->getOperand(0);
3097  SDValue N1 = N->getOperand(1);
3098  SDValue CarryIn = N->getOperand(2);
3099  SDLoc DL(N);
3100 
3101  // canonicalize constant to RHS
3102  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3103  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3104  if (N0C && !N1C)
3105  return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3106 
3107  // fold (saddo_carry x, y, false) -> (saddo x, y)
3108  if (isNullConstant(CarryIn)) {
3109  if (!LegalOperations ||
3110  TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3111  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3112  }
3113 
3114  return SDValue();
3115 }
3116 
3117 /**
3118  * If we are facing some sort of diamond carry propapagtion pattern try to
3119  * break it up to generate something like:
3120  * (addcarry X, 0, (addcarry A, B, Z):Carry)
3121  *
3122  * The end result is usually an increase in operation required, but because the
3123  * carry is now linearized, other transforms can kick in and optimize the DAG.
3124  *
3125  * Patterns typically look something like
3126  * (uaddo A, B)
3127  * / \
3128  * Carry Sum
3129  * | \
3130  * | (addcarry *, 0, Z)
3131  * | /
3132  * \ Carry
3133  * | /
3134  * (addcarry X, *, *)
3135  *
3136  * But numerous variation exist. Our goal is to identify A, B, X and Z and
3137  * produce a combine with a single path for carry propagation.
3138  */
3140  SDValue X, SDValue Carry0, SDValue Carry1,
3141  SDNode *N) {
3142  if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3143  return SDValue();
3144  if (Carry1.getOpcode() != ISD::UADDO)
3145  return SDValue();
3146 
3147  SDValue Z;
3148 
3149  /**
3150  * First look for a suitable Z. It will present itself in the form of
3151  * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3152  */
3153  if (Carry0.getOpcode() == ISD::ADDCARRY &&
3154  isNullConstant(Carry0.getOperand(1))) {
3155  Z = Carry0.getOperand(2);
3156  } else if (Carry0.getOpcode() == ISD::UADDO &&
3157  isOneConstant(Carry0.getOperand(1))) {
3158  EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3159  Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3160  } else {
3161  // We couldn't find a suitable Z.
3162  return SDValue();
3163  }
3164 
3165 
3166  auto cancelDiamond = [&](SDValue A,SDValue B) {
3167  SDLoc DL(N);
3168  SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3169  Combiner.AddToWorklist(NewY.getNode());
3170  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3171  DAG.getConstant(0, DL, X.getValueType()),
3172  NewY.getValue(1));
3173  };
3174 
3175  /**
3176  * (uaddo A, B)
3177  * |
3178  * Sum
3179  * |
3180  * (addcarry *, 0, Z)
3181  */
3182  if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3183  return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3184  }
3185 
3186  /**
3187  * (addcarry A, 0, Z)
3188  * |
3189  * Sum
3190  * |
3191  * (uaddo *, B)
3192  */
3193  if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3194  return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3195  }
3196 
3197  if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3198  return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3199  }
3200 
3201  return SDValue();
3202 }
3203 
3204 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3205 // match patterns like:
3206 //
3207 // (uaddo A, B) CarryIn
3208 // | \ |
3209 // | \ |
3210 // PartialSum PartialCarryOutX /
3211 // | | /
3212 // | ____|____________/
3213 // | / |
3214 // (uaddo *, *) \________
3215 // | \ \
3216 // | \ |
3217 // | PartialCarryOutY |
3218 // | \ |
3219 // | \ /
3220 // AddCarrySum | ______/
3221 // | /
3222 // CarryOut = (or *, *)
3223 //
3224 // And generate ADDCARRY (or SUBCARRY) with two result values:
3225 //
3226 // {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3227 //
3228 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3229 // a single path for carry/borrow out propagation:
3231  SDValue N0, SDValue N1, SDNode *N) {
3232  SDValue Carry0 = getAsCarry(TLI, N0);
3233  if (!Carry0)
3234  return SDValue();
3235  SDValue Carry1 = getAsCarry(TLI, N1);
3236  if (!Carry1)
3237  return SDValue();
3238 
3239  unsigned Opcode = Carry0.getOpcode();
3240  if (Opcode != Carry1.getOpcode())
3241  return SDValue();
3242  if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3243  return SDValue();
3244 
3245  // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3246  // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3247  if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3248  std::swap(Carry0, Carry1);
3249 
3250  // Check if nodes are connected in expected way.
3251  if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3252  Carry1.getOperand(1) != Carry0.getValue(0))
3253  return SDValue();
3254 
3255  // The carry in value must be on the righthand side for subtraction.
3256  unsigned CarryInOperandNum =
3257  Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3258  if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3259  return SDValue();
3260  SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3261 
3262  unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3263  if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3264  return SDValue();
3265 
3266  // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3267  // TODO: make getAsCarry() aware of how partial carries are merged.
3268  if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3269  return SDValue();
3270  CarryIn = CarryIn.getOperand(0);
3271  if (CarryIn.getValueType() != MVT::i1)
3272  return SDValue();
3273 
3274  SDLoc DL(N);
3275  SDValue Merged =
3276  DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3277  Carry0.getOperand(1), CarryIn);
3278 
3279  // Please note that because we have proven that the result of the UADDO/USUBO
3280  // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3281  // therefore prove that if the first UADDO/USUBO overflows, the second
3282  // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3283  // maximum value.
3284  //
3285  // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3286  // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3287  //
3288  // This is important because it means that OR and XOR can be used to merge
3289  // carry flags; and that AND can return a constant zero.
3290  //
3291  // TODO: match other operations that can merge flags (ADD, etc)
3292  DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3293  if (N->getOpcode() == ISD::AND)
3294  return DAG.getConstant(0, DL, MVT::i1);
3295  return Merged.getValue(1);
3296 }
3297 
3298 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3299  SDNode *N) {
3300  // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3301  if (isBitwiseNot(N0))
3302  if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3303  SDLoc DL(N);
3304  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3305  N0.getOperand(0), NotC);
3306  return CombineTo(
3307  N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3308  }
3309 
3310  // Iff the flag result is dead:
3311  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3312  // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3313  // or the dependency between the instructions.
3314  if ((N0.getOpcode() == ISD::ADD ||
3315  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3316  N0.getValue(1) != CarryIn)) &&
3317  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3318  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3319  N0.getOperand(0), N0.getOperand(1), CarryIn);
3320 
3321  /**
3322  * When one of the addcarry argument is itself a carry, we may be facing
3323  * a diamond carry propagation. In which case we try to transform the DAG
3324  * to ensure linear carry propagation if that is possible.
3325  */
3326  if (auto Y = getAsCarry(TLI, N1)) {
3327  // Because both are carries, Y and Z can be swapped.
3328  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3329  return R;
3330  if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3331  return R;
3332  }
3333 
3334  return SDValue();
3335 }
3336 
3337 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3338 // clamp/truncation if necessary.
3340  SDValue RHS, SelectionDAG &DAG,
3341  const SDLoc &DL) {
3342  assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3343  "Illegal truncation");
3344 
3345  if (DstVT == SrcVT)
3346  return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3347 
3348  // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3349  // clamping RHS.
3350  APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3351  DstVT.getScalarSizeInBits());
3352  if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3353  return SDValue();
3354 
3355  SDValue SatLimit =
3357  DstVT.getScalarSizeInBits()),
3358  DL, SrcVT);
3359  RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3360  RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3361  LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3362  return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3363 }
3364 
3365 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3366 // usubsat(a,b), optionally as a truncated type.
3367 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3368  if (N->getOpcode() != ISD::SUB ||
3369  !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3370  return SDValue();
3371 
3372  EVT SubVT = N->getValueType(0);
3373  SDValue Op0 = N->getOperand(0);
3374  SDValue Op1 = N->getOperand(1);
3375 
3376  // Try to find umax(a,b) - b or a - umin(a,b) patterns
3377  // they may be converted to usubsat(a,b).
3378  if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3379  SDValue MaxLHS = Op0.getOperand(0);
3380  SDValue MaxRHS = Op0.getOperand(1);
3381  if (MaxLHS == Op1)
3382  return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3383  if (MaxRHS == Op1)
3384  return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3385  }
3386 
3387  if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3388  SDValue MinLHS = Op1.getOperand(0);
3389  SDValue MinRHS = Op1.getOperand(1);
3390  if (MinLHS == Op0)
3391  return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3392  if (MinRHS == Op0)
3393  return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3394  }
3395 
3396  // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3397  if (Op1.getOpcode() == ISD::TRUNCATE &&
3398  Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3399  Op1.getOperand(0).hasOneUse()) {
3400  SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3401  SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3402  if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3403  return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3404  DAG, SDLoc(N));
3405  if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3406  return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3407  DAG, SDLoc(N));
3408  }
3409 
3410  return SDValue();
3411 }
3412 
3413 // Since it may not be valid to emit a fold to zero for vector initializers
3414 // check if we can before folding.
3415 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3416  SelectionDAG &DAG, bool LegalOperations) {
3417  if (!VT.isVector())
3418  return DAG.getConstant(0, DL, VT);
3419  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3420  return DAG.getConstant(0, DL, VT);
3421  return SDValue();
3422 }
3423 
3424 SDValue DAGCombiner::visitSUB(SDNode *N) {
3425  SDValue N0 = N->getOperand(0);
3426  SDValue N1 = N->getOperand(1);
3427  EVT VT = N0.getValueType();
3428  SDLoc DL(N);
3429 
3430  auto PeekThroughFreeze = [](SDValue N) {
3431  if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3432  return N->getOperand(0);
3433  return N;
3434  };
3435 
3436  // fold (sub x, x) -> 0
3437  // FIXME: Refactor this and xor and other similar operations together.
3438  if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3439  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3440 
3441  // fold (sub c1, c2) -> c3
3442  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3443  return C;
3444 
3445  // fold vector ops
3446  if (VT.isVector()) {
3447  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3448  return FoldedVOp;
3449 
3450  // fold (sub x, 0) -> x, vector edition
3452  return N0;
3453  }
3454 
3455  if (SDValue NewSel = foldBinOpIntoSelect(N))
3456  return NewSel;
3457 
3459 
3460  // fold (sub x, c) -> (add x, -c)
3461  if (N1C) {
3462  return DAG.getNode(ISD::ADD, DL, VT, N0,
3463  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3464  }
3465 
3466  if (isNullOrNullSplat(N0)) {
3467  unsigned BitWidth = VT.getScalarSizeInBits();
3468  // Right-shifting everything out but the sign bit followed by negation is
3469  // the same as flipping arithmetic/logical shift type without the negation:
3470  // -(X >>u 31) -> (X >>s 31)
3471  // -(X >>s 31) -> (X >>u 31)
3472  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3473  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3474  if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3475  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3476  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3477  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3478  }
3479  }
3480 
3481  // 0 - X --> 0 if the sub is NUW.
3482  if (N->getFlags().hasNoUnsignedWrap())
3483  return N0;
3484 
3486  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3487  // N1 must be 0 because negating the minimum signed value is undefined.
3488  if (N->getFlags().hasNoSignedWrap())
3489  return N0;
3490 
3491  // 0 - X --> X if X is 0 or the minimum signed value.
3492  return N1;
3493  }
3494 
3495  // Convert 0 - abs(x).
3496  if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3498  if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3499  return Result;
3500 
3501  // Fold neg(splat(neg(x)) -> splat(x)
3502  if (VT.isVector()) {
3503  SDValue N1S = DAG.getSplatValue(N1, true);
3504  if (N1S && N1S.getOpcode() == ISD::SUB &&
3505  isNullConstant(N1S.getOperand(0)))
3506  return DAG.getSplat(VT, DL, N1S.getOperand(1));
3507  }
3508  }
3509 
3510  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3511  if (isAllOnesOrAllOnesSplat(N0))
3512  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3513 
3514  // fold (A - (0-B)) -> A+B
3515  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3516  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3517 
3518  // fold A-(A-B) -> B
3519  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3520  return N1.getOperand(1);
3521 
3522  // fold (A+B)-A -> B
3523  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3524  return N0.getOperand(1);
3525 
3526  // fold (A+B)-B -> A
3527  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3528  return N0.getOperand(0);
3529 
3530  // fold (A+C1)-C2 -> A+(C1-C2)
3531  if (N0.getOpcode() == ISD::ADD) {
3532  SDValue N01 = N0.getOperand(1);
3533  if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3534  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3535  }
3536 
3537  // fold C2-(A+C1) -> (C2-C1)-A
3538  if (N1.getOpcode() == ISD::ADD) {
3539  SDValue N11 = N1.getOperand(1);
3540  if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3541  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3542  }
3543 
3544  // fold (A-C1)-C2 -> A-(C1+C2)
3545  if (N0.getOpcode() == ISD::SUB) {
3546  SDValue N01 = N0.getOperand(1);
3547  if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3548  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3549  }
3550 
3551  // fold (c1-A)-c2 -> (c1-c2)-A
3552  if (N0.getOpcode() == ISD::SUB) {
3553  SDValue N00 = N0.getOperand(0);
3554  if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3555  return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3556  }
3557 
3558  // fold ((A+(B+or-C))-B) -> A+or-C
3559  if (N0.getOpcode() == ISD::ADD &&
3560  (N0.getOperand(1).getOpcode() == ISD::SUB ||
3561  N0.getOperand(1).getOpcode() == ISD::ADD) &&
3562  N0.getOperand(1).getOperand(0) == N1)
3563  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3564  N0.getOperand(1).getOperand(1));
3565 
3566  // fold ((A+(C+B))-B) -> A+C
3567  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3568  N0.getOperand(1).getOperand(1) == N1)
3569  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3570  N0.getOperand(1).getOperand(0));
3571 
3572  // fold ((A-(B-C))-C) -> A-B
3573  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3574  N0.getOperand(1).getOperand(1) == N1)
3575  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3576  N0.getOperand(1).getOperand(0));
3577 
3578  // fold (A-(B-C)) -> A+(C-B)
3579  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3580  return DAG.getNode(ISD::ADD, DL, VT, N0,
3581  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3582  N1.getOperand(0)));
3583 
3584  // A - (A & B) -> A & (~B)
3585  if (N1.getOpcode() == ISD::AND) {
3586  SDValue A = N1.getOperand(0);
3587  SDValue B = N1.getOperand(1);
3588  if (A != N0)
3589  std::swap(A, B);
3590  if (A == N0 &&
3591  (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3592  SDValue InvB =
3593  DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3594  return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3595  }
3596  }
3597 
3598  // fold (X - (-Y * Z)) -> (X + (Y * Z))
3599  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3600  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3602  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3603  N1.getOperand(0).getOperand(1),
3604  N1.getOperand(1));
3605  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3606  }
3607  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3609  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3610  N1.getOperand(0),
3611  N1.getOperand(1).getOperand(1));
3612  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3613  }
3614  }
3615 
3616  // If either operand of a sub is undef, the result is undef
3617  if (N0.isUndef())
3618  return N0;
3619  if (N1.isUndef())
3620  return N1;
3621 
3622  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3623  return V;
3624 
3625  if (SDValue V = foldAddSubOfSignBit(N, DAG))
3626  return V;
3627 
3628  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3629  return V;
3630 
3631  if (SDValue V = foldSubToUSubSat(VT, N))
3632  return V;
3633 
3634  // (x - y) - 1 -> add (xor y, -1), x
3635  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) {
3636  SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3637  DAG.getAllOnesConstant(DL, VT));
3638  return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3639  }
3640 
3641  // Look for:
3642  // sub y, (xor x, -1)
3643  // And if the target does not like this form then turn into:
3644  // add (add x, y), 1
3645  if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3646  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3647  return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3648  }
3649 
3650  // Hoist one-use addition by non-opaque constant:
3651  // (x + C) - y -> (x - y) + C
3652  if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
3653  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3654  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3655  return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3656  }
3657  // y - (x + C) -> (y - x) - C
3658  if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
3659  isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3660  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3661  return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3662  }
3663  // (x - C) - y -> (x - y) - C
3664  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3665  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3666  isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3667  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3668  return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3669  }
3670  // (C - x) - y -> C - (x + y)
3671  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3672  isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3673  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3674  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3675  }
3676 
3677  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3678  // rather than 'sub 0/1' (the sext should get folded).
3679  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3680  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3681  N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3682  TLI.getBooleanContents(VT) ==
3684  SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3685  return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3686  }
3687 
3688  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3689  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3690  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3691  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3692  SDValue S0 = N1.getOperand(0);
3693  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3695  if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3696  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3697  }
3698  }
3699 
3700  // If the relocation model supports it, consider symbol offsets.
3701  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3702  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3703  // fold (sub Sym, c) -> Sym-c
3704  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3705  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3706  GA->getOffset() -
3707  (uint64_t)N1C->getSExtValue());
3708  // fold (sub Sym+c1, Sym+c2) -> c1-c2
3709  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3710  if (GA->getGlobal() == GB->getGlobal())
3711  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3712  DL, VT);
3713  }
3714 
3715  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3716  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3717  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3718  if (TN->getVT() == MVT::i1) {
3719  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3720  DAG.getConstant(1, DL, VT));
3721  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3722  }
3723  }
3724 
3725  // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3726  if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
3727  const APInt &IntVal = N1.getConstantOperandAPInt(0);
3728  return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3729  }
3730 
3731  // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3732  if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3733  APInt NewStep = -N1.getConstantOperandAPInt(0);
3734  return DAG.getNode(ISD::ADD, DL, VT, N0,
3735  DAG.getStepVector(DL, VT, NewStep));
3736  }
3737 
3738  // Prefer an add for more folding potential and possibly better codegen:
3739  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3740  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3741  SDValue ShAmt = N1.getOperand(1);
3742  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3743  if (ShAmtC &&
3744  ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3745  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3746  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3747  }
3748  }
3749 
3750  // As with the previous fold, prefer add for more folding potential.
3751  // Subtracting SMIN/0 is the same as adding SMIN/0:
3752  // N0 - (X << BW-1) --> N0 + (X << BW-1)
3753  if (N1.getOpcode() == ISD::SHL) {
3755  if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
3756  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
3757  }
3758 
3759  // (sub (subcarry X, 0, Carry), Y) -> (subcarry X, Y, Carry)
3760  if (N0.getOpcode() == ISD::SUBCARRY && isNullConstant(N0.getOperand(1)) &&
3761  N0.getResNo() == 0 && N0.hasOneUse())
3762  return DAG.getNode(ISD::SUBCARRY, DL, N0->getVTList(),
3763  N0.getOperand(0), N1, N0.getOperand(2));
3764 
3765  if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3766  // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3767  if (SDValue Carry = getAsCarry(TLI, N0)) {
3768  SDValue X = N1;
3769  SDValue Zero = DAG.getConstant(0, DL, VT);
3770  SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3771  return DAG.getNode(ISD::ADDCARRY, DL,
3772  DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3773  Carry);
3774  }
3775  }
3776 
3777  // If there's no chance of borrowing from adjacent bits, then sub is xor:
3778  // sub C0, X --> xor X, C0
3779  if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
3780  if (!C0->isOpaque()) {
3781  const APInt &C0Val = C0->getAPIntValue();
3782  const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
3783  if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
3784  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3785  }
3786  }
3787 
3788  return SDValue();
3789 }
3790 
3791 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3792  SDValue N0 = N->getOperand(0);
3793  SDValue N1 = N->getOperand(1);
3794  EVT VT = N0.getValueType();
3795  SDLoc DL(N);
3796 
3797  // fold (sub_sat x, undef) -> 0
3798  if (N0.isUndef() || N1.isUndef())
3799  return DAG.getConstant(0, DL, VT);
3800 
3801  // fold (sub_sat x, x) -> 0
3802  if (N0 == N1)
3803  return DAG.getConstant(0, DL, VT);
3804 
3805  // fold (sub_sat c1, c2) -> c3
3806  if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3807  return C;
3808 
3809  // fold vector ops
3810  if (VT.isVector()) {
3811  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3812  return FoldedVOp;
3813 
3814  // fold (sub_sat x, 0) -> x, vector edition
3816  return N0;
3817  }
3818 
3819  // fold (sub_sat x, 0) -> x
3820  if (isNullConstant(N1))
3821  return N0;
3822 
3823  return SDValue();
3824 }
3825 
3826 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3827  SDValue N0 = N->getOperand(0);
3828  SDValue N1 = N->getOperand(1);
3829  EVT VT = N0.getValueType();
3830  SDLoc DL(N);
3831 
3832  // If the flag result is dead, turn this into an SUB.
3833  if (!N->hasAnyUseOfValue(1))
3834  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3836 
3837  // fold (subc x, x) -> 0 + no borrow
3838  if (N0 == N1)
3839  return CombineTo(N, DAG.getConstant(0, DL, VT),
3841 
3842  // fold (subc x, 0) -> x + no borrow
3843  if (isNullConstant(N1))
3844  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3845 
3846  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3847  if (isAllOnesConstant(N0))
3848  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3850 
3851  return SDValue();
3852 }
3853 
3854 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3855  SDValue N0 = N->getOperand(0);
3856  SDValue N1 = N->getOperand(1);
3857  EVT VT = N0.getValueType();
3858  bool IsSigned = (ISD::SSUBO == N->getOpcode());
3859 
3860  EVT CarryVT = N->getValueType(1);
3861  SDLoc DL(N);
3862 
3863  // If the flag result is dead, turn this into an SUB.
3864  if (!N->hasAnyUseOfValue(1))
3865  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3866  DAG.getUNDEF(CarryVT));
3867 
3868  // fold (subo x, x) -> 0 + no borrow
3869  if (N0 == N1)
3870  return CombineTo(N, DAG.getConstant(0, DL, VT),
3871  DAG.getConstant(0, DL, CarryVT));
3872 
3874 
3875  // fold (subox, c) -> (addo x, -c)
3876  if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3877  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3878  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3879  }
3880 
3881  // fold (subo x, 0) -> x + no borrow
3882  if (isNullOrNullSplat(N1))
3883  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3884 
3885  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3886  if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3887  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3888  DAG.getConstant(0, DL, CarryVT));
3889 
3890  return SDValue();
3891 }
3892 
3893 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3894  SDValue N0 = N->getOperand(0);
3895  SDValue N1 = N->getOperand(1);
3896  SDValue CarryIn = N->getOperand(2);
3897 
3898  // fold (sube x, y, false) -> (subc x, y)
3899  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3900  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3901 
3902  return SDValue();
3903 }
3904 
3905 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3906  SDValue N0 = N->getOperand(0);
3907  SDValue N1 = N->getOperand(1);
3908  SDValue CarryIn = N->getOperand(2);
3909 
3910  // fold (subcarry x, y, false) -> (usubo x, y)
3911  if (isNullConstant(CarryIn)) {
3912  if (!LegalOperations ||
3913  TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3914  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3915  }
3916 
3917  return SDValue();
3918 }
3919 
3920 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3921  SDValue N0 = N->getOperand(0);
3922  SDValue N1 = N->getOperand(1);
3923  SDValue CarryIn = N->getOperand(2);
3924 
3925  // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3926  if (isNullConstant(CarryIn)) {
3927  if (!LegalOperations ||
3928  TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3929  return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3930  }
3931 
3932  return SDValue();
3933 }
3934 
3935 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3936 // UMULFIXSAT here.
3937 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3938  SDValue N0 = N->getOperand(0);
3939  SDValue N1 = N->getOperand(1);
3940  SDValue Scale = N->getOperand(2);
3941  EVT VT = N0.getValueType();
3942 
3943  // fold (mulfix x, undef, scale) -> 0
3944  if (N0.isUndef() || N1.isUndef())
3945  return DAG.getConstant(0, SDLoc(N), VT);
3946 
3947  // Canonicalize constant to RHS (vector doesn't have to splat)
3950  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3951 
3952  // fold (mulfix x, 0, scale) -> 0
3953  if (isNullConstant(N1))
3954  return DAG.getConstant(0, SDLoc(N), VT);
3955 
3956  return SDValue();
3957 }
3958 
3959 SDValue DAGCombiner::visitMUL(SDNode *N) {
3960  SDValue N0 = N->getOperand(0);
3961  SDValue N1 = N->getOperand(1);
3962  EVT VT = N0.getValueType();
3963  SDLoc DL(N);
3964 
3965  // fold (mul x, undef) -> 0
3966  if (N0.isUndef() || N1.isUndef())
3967  return DAG.getConstant(0, DL, VT);
3968 
3969  // fold (mul c1, c2) -> c1*c2
3970  if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
3971  return C;
3972 
3973  // canonicalize constant to RHS (vector doesn't have to splat)
3976  return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
3977 
3978  bool N1IsConst = false;
3979  bool N1IsOpaqueConst = false;
3980  APInt ConstValue1;
3981 
3982  // fold vector ops
3983  if (VT.isVector()) {
3984  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3985  return FoldedVOp;
3986 
3987  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3988  assert((!N1IsConst ||
3989  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3990  "Splat APInt should be element width");
3991  } else {
3992  N1IsConst = isa<ConstantSDNode>(N1);
3993  if (N1IsConst) {
3994  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3995  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3996  }
3997  }
3998 
3999  // fold (mul x, 0) -> 0
4000  if (N1IsConst && ConstValue1.isZero())
4001  return N1;
4002 
4003  // fold (mul x, 1) -> x
4004  if (N1IsConst && ConstValue1.isOne())
4005  return N0;
4006 
4007  if (SDValue NewSel = foldBinOpIntoSelect(N))
4008  return NewSel;
4009 
4010  // fold (mul x, -1) -> 0-x
4011  if (N1IsConst && ConstValue1.isAllOnes())
4012  return DAG.getNegative(N0, DL, VT);
4013 
4014  // fold (mul x, (1 << c)) -> x << c
4015  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4016  DAG.isKnownToBeAPowerOfTwo(N1) &&
4017  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4018  SDValue LogBase2 = BuildLogBase2(N1, DL);
4019  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4020  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4021  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
4022  }
4023 
4024  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4025  if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4026  unsigned Log2Val = (-ConstValue1).logBase2();
4027  // FIXME: If the input is something that is easily negated (e.g. a
4028  // single-use add), we should put the negate there.
4029  return DAG.getNode(ISD::SUB, DL, VT,
4030  DAG.getConstant(0, DL, VT),
4031  DAG.getNode(ISD::SHL, DL, VT, N0,
4032  DAG.getConstant(Log2Val, DL,
4033  getShiftAmountTy(N0.getValueType()))));
4034  }
4035 
4036  // Try to transform:
4037  // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4038  // mul x, (2^N + 1) --> add (shl x, N), x
4039  // mul x, (2^N - 1) --> sub (shl x, N), x
4040  // Examples: x * 33 --> (x << 5) + x
4041  // x * 15 --> (x << 4) - x
4042  // x * -33 --> -((x << 5) + x)
4043  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4044  // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4045  // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4046  // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4047  // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4048  // x * 0xf800 --> (x << 16) - (x << 11)
4049  // x * -0x8800 --> -((x << 15) + (x << 11))
4050  // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4051  if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4052  // TODO: We could handle more general decomposition of any constant by
4053  // having the target set a limit on number of ops and making a
4054  // callback to determine that sequence (similar to sqrt expansion).
4055  unsigned MathOp = ISD::DELETED_NODE;
4056  APInt MulC = ConstValue1.abs();
4057  // The constant `2` should be treated as (2^0 + 1).
4058  unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
4059  MulC.lshrInPlace(TZeros);
4060  if ((MulC - 1).isPowerOf2())
4061  MathOp = ISD::ADD;
4062  else if ((MulC + 1).isPowerOf2())
4063  MathOp = ISD::SUB;
4064 
4065  if (MathOp != ISD::DELETED_NODE) {
4066  unsigned ShAmt =
4067  MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4068  ShAmt += TZeros;
4069  assert(ShAmt < VT.getScalarSizeInBits() &&
4070  "multiply-by-constant generated out of bounds shift");
4071  SDValue Shl =
4072  DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4073  SDValue R =
4074  TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4075  DAG.getNode(ISD::SHL, DL, VT, N0,
4076  DAG.getConstant(TZeros, DL, VT)))
4077  : DAG.getNode(MathOp, DL, VT, Shl, N0);
4078  if (ConstValue1.isNegative())
4079  R = DAG.getNegative(R, DL, VT);
4080  return R;
4081  }
4082  }
4083 
4084  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4085  if (N0.getOpcode() == ISD::SHL) {
4086  SDValue N01 = N0.getOperand(1);
4087  if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4088  return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4089  }
4090 
4091  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4092  // use.
4093  {
4094  SDValue Sh, Y;
4095 
4096  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4097  if (N0.getOpcode() == ISD::SHL &&
4099  Sh = N0; Y = N1;
4100  } else if (N1.getOpcode() == ISD::SHL &&
4102  N1->hasOneUse()) {
4103  Sh = N1; Y = N0;
4104  }
4105 
4106  if (Sh.getNode()) {
4107  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4108  return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4109  }
4110  }
4111 
4112  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4114  N0.getOpcode() == ISD::ADD &&
4116  isMulAddWithConstProfitable(N, N0, N1))
4117  return DAG.getNode(
4118  ISD::ADD, DL, VT,
4119  DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4120  DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4121 
4122  // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4124  if (N0.getOpcode() == ISD::VSCALE && NC1) {
4125  const APInt &C0 = N0.getConstantOperandAPInt(0);
4126  const APInt &C1 = NC1->getAPIntValue();
4127  return DAG.getVScale(DL, VT, C0 * C1);
4128  }
4129 
4130  // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4131  APInt MulVal;
4132  if (N0.getOpcode() == ISD::STEP_VECTOR &&
4133  ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4134  const APInt &C0 = N0.getConstantOperandAPInt(0);
4135  APInt NewStep = C0 * MulVal;
4136  return DAG.getStepVector(DL, VT, NewStep);
4137  }
4138 
4139  // Fold ((mul x, 0/undef) -> 0,
4140  // (mul x, 1) -> x) -> x)
4141  // -> and(x, mask)
4142  // We can replace vectors with '0' and '1' factors with a clearing mask.
4143  if (VT.isFixedLengthVector()) {
4144  unsigned NumElts = VT.getVectorNumElements();
4145  SmallBitVector ClearMask;
4146  ClearMask.reserve(NumElts);
4147  auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4148  if (!V || V->isZero()) {
4149  ClearMask.push_back(true);
4150  return true;
4151  }
4152  ClearMask.push_back(false);
4153  return V->isOne();
4154  };
4155  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4156  ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4157  assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4158  EVT LegalSVT = N1.getOperand(0).getValueType();
4159  SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4160  SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4161  SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
4162  for (unsigned I = 0; I != NumElts; ++I)
4163  if (ClearMask[I])
4164  Mask[I] = Zero;
4165  return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4166  }
4167  }
4168 
4169  // reassociate mul
4170  if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4171  return RMUL;
4172 
4173  // Simplify the operands using demanded-bits information.
4174  if (SimplifyDemandedBits(SDValue(N, 0)))
4175  return SDValue(N, 0);
4176 
4177  return SDValue();
4178 }
4179 
4180 /// Return true if divmod libcall is available.
4181 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4182  const TargetLowering &TLI) {
4183  RTLIB::Libcall LC;
4184  EVT NodeType = Node->getValueType(0);
4185  if (!NodeType.isSimple())
4186  return false;
4187  switch (NodeType.getSimpleVT().SimpleTy) {
4188  default: return false; // No libcall for vector types.
4189  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4190  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4191  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4192  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4193  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4194  }
4195 
4196  return TLI.getLibcallName(LC) != nullptr;
4197 }
4198 
4199 /// Issue divrem if both quotient and remainder are needed.
4200 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4201  if (Node->use_empty())
4202  return SDValue(); // This is a dead node, leave it alone.
4203 
4204  unsigned Opcode = Node->getOpcode();
4205  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4206  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4207 
4208  // DivMod lib calls can still work on non-legal types if using lib-calls.
4209  EVT VT = Node->getValueType(0);
4210  if (VT.isVector() || !VT.isInteger())
4211  return SDValue();
4212 
4213  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4214  return SDValue();
4215 
4216  // If DIVREM is going to get expanded into a libcall,
4217  // but there is no libcall available, then don't combine.
4218  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4219  !isDivRemLibcallAvailable(Node, isSigned, TLI))
4220  return SDValue();
4221 
4222  // If div is legal, it's better to do the normal expansion
4223  unsigned OtherOpcode = 0;
4224  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4225  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4226  if (TLI.isOperationLegalOrCustom(Opcode, VT))
4227  return SDValue();
4228  } else {
4229  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4230  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4231  return SDValue();
4232  }
4233 
4234  SDValue Op0 = Node->getOperand(0);
4235  SDValue Op1 = Node->getOperand(1);
4236  SDValue combined;
4237  for (SDNode *User : Op0->uses()) {
4238  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4239  User->use_empty())
4240  continue;
4241  // Convert the other matching node(s), too;
4242  // otherwise, the DIVREM may get target-legalized into something
4243  // target-specific that we won't be able to recognize.
4244  unsigned UserOpc = User->getOpcode();
4245  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4246  User->getOperand(0) == Op0 &&
4247  User->getOperand(1) == Op1) {
4248  if (!combined) {
4249  if (UserOpc == OtherOpcode) {
4250  SDVTList VTs = DAG.getVTList(VT, VT);
4251  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4252  } else if (UserOpc == DivRemOpc) {
4253  combined = SDValue(User, 0);
4254  } else {
4255  assert(UserOpc == Opcode);
4256  continue;
4257  }
4258  }
4259  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4260  CombineTo(User, combined);
4261  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4262  CombineTo(User, combined.getValue(1));
4263  }
4264  }
4265  return combined;
4266 }
4267 
4269  SDValue N0 = N->getOperand(0);
4270  SDValue N1 = N->getOperand(1);
4271  EVT VT = N->getValueType(0);
4272  SDLoc DL(N);
4273 
4274  unsigned Opc = N->getOpcode();
4275  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4277 
4278  // X / undef -> undef
4279  // X % undef -> undef
4280  // X / 0 -> undef
4281  // X % 0 -> undef
4282  // NOTE: This includes vectors where any divisor element is zero/undef.
4283  if (DAG.isUndef(Opc, {N0, N1}))
4284  return DAG.getUNDEF(VT);
4285 
4286  // undef / X -> 0
4287  // undef % X -> 0
4288  if (N0.isUndef())
4289  return DAG.getConstant(0, DL, VT);
4290 
4291  // 0 / X -> 0
4292  // 0 % X -> 0
4294  if (N0C && N0C->isZero())
4295  return N0;
4296 
4297  // X / X -> 1
4298  // X % X -> 0
4299  if (N0 == N1)
4300  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4301 
4302  // X / 1 -> X
4303  // X % 1 -> 0
4304  // If this is a boolean op (single-bit element type), we can't have
4305  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4306  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4307  // it's a 1.
4308  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4309  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4310 
4311  return SDValue();
4312 }
4313 
4314 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4315  SDValue N0 = N->getOperand(0);
4316  SDValue N1 = N->getOperand(1);
4317  EVT VT = N->getValueType(0);
4318  EVT CCVT = getSetCCResultType(VT);
4319  SDLoc DL(N);
4320 
4321  // fold (sdiv c1, c2) -> c1/c2
4322  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4323  return C;
4324 
4325  // fold vector ops
4326  if (VT.isVector())
4327  if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4328  return FoldedVOp;
4329 
4330  // fold (sdiv X, -1) -> 0-X
4332  if (N1C && N1C->isAllOnes())
4333  return DAG.getNegative(N0, DL, VT);
4334 
4335  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4336  if (N1C && N1C->getAPIntValue().isMinSignedValue())
4337  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4338  DAG.getConstant(1, DL, VT),
4339  DAG.getConstant(0, DL, VT));
4340 
4341  if (SDValue V = simplifyDivRem(N, DAG))
4342  return V;
4343 
4344  if (SDValue NewSel = foldBinOpIntoSelect(N))
4345  return NewSel;
4346 
4347  // If we know the sign bits of both operands are zero, strength reduce to a
4348  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4349  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4350  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4351 
4352  if (SDValue V = visitSDIVLike(N0, N1, N)) {
4353  // If the corresponding remainder node exists, update its users with
4354  // (Dividend - (Quotient * Divisor).
4355  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4356  { N0, N1 })) {
4357  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4358  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4359  AddToWorklist(Mul.getNode());
4360  AddToWorklist(Sub.getNode());
4361  CombineTo(RemNode, Sub);
4362  }
4363  return V;
4364  }
4365 
4366  // sdiv, srem -> sdivrem
4367  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4368  // true. Otherwise, we break the simplification logic in visitREM().
4370  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4371  if (SDValue DivRem = useDivRem(N))
4372  return DivRem;
4373 
4374  return SDValue();
4375 }
4376 
4377 static bool isDivisorPowerOfTwo(SDValue Divisor) {
4378  // Helper for determining whether a value is a power-2 constant scalar or a
4379  // vector of such elements.
4380  auto IsPowerOfTwo = [](ConstantSDNode *C) {
4381  if (C->isZero() || C->isOpaque())
4382  return false;
4383  if (C->getAPIntValue().isPowerOf2())
4384  return true;
4385  if (C->getAPIntValue().isNegatedPowerOf2())
4386  return true;
4387  return false;
4388  };
4389 
4390  return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4391 }
4392 
4393 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4394  SDLoc DL(N);
4395  EVT VT = N->getValueType(0);
4396  EVT CCVT = getSetCCResultType(VT);
4397  unsigned BitWidth = VT.getScalarSizeInBits();
4398 
4399  // fold (sdiv X, pow2) -> simple ops after legalize
4400  // FIXME: We check for the exact bit here because the generic lowering gives
4401  // better results in that case. The target-specific lowering should learn how
4402  // to handle exact sdivs efficiently.
4403  if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4404  // Target-specific implementation of sdiv x, pow2.
4405  if (SDValue Res = BuildSDIVPow2(N))
4406  return Res;
4407 
4408  // Create constants that are functions of the shift amount value.
4409  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4410  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4411  SDValue C1 = DAG.getNode(