LLVM  9.0.0svn
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/IntervalMap.h"
24 #include "llvm/ADT/None.h"
25 #include "llvm/ADT/Optional.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/SetVector.h"
29 #include "llvm/ADT/SmallPtrSet.h"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
49 #include "llvm/IR/Attributes.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/LLVMContext.h"
55 #include "llvm/IR/Metadata.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/CodeGen.h"
59 #include "llvm/Support/Compiler.h"
60 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/KnownBits.h"
68 #include <algorithm>
69 #include <cassert>
70 #include <cstdint>
71 #include <functional>
72 #include <iterator>
73 #include <string>
74 #include <tuple>
75 #include <utility>
76 
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "dagcombine"
80 
81 STATISTIC(NodesCombined , "Number of dag nodes combined");
82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
84 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
85 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
86 STATISTIC(SlicedLoads, "Number of load sliced");
87 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
88 
89 static cl::opt<bool>
90 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
91  cl::desc("Enable DAG combiner's use of IR alias analysis"));
92 
93 static cl::opt<bool>
94 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
95  cl::desc("Enable DAG combiner's use of TBAA"));
96 
97 #ifndef NDEBUG
99 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
100  cl::desc("Only use DAG-combiner alias analysis in this"
101  " function"));
102 #endif
103 
104 /// Hidden option to stress test load slicing, i.e., when this option
105 /// is enabled, load slicing bypasses most of its profitability guards.
106 static cl::opt<bool>
107 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
108  cl::desc("Bypass the profitability model of load slicing"),
109  cl::init(false));
110 
111 static cl::opt<bool>
112  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
113  cl::desc("DAG combiner may split indexing from loads"));
114 
115 namespace {
116 
117  class DAGCombiner {
118  SelectionDAG &DAG;
119  const TargetLowering &TLI;
121  CodeGenOpt::Level OptLevel;
122  bool LegalOperations = false;
123  bool LegalTypes = false;
124  bool ForCodeSize;
125 
126  /// Worklist of all of the nodes that need to be simplified.
127  ///
128  /// This must behave as a stack -- new nodes to process are pushed onto the
129  /// back and when processing we pop off of the back.
130  ///
131  /// The worklist will not contain duplicates but may contain null entries
132  /// due to nodes being deleted from the underlying DAG.
133  SmallVector<SDNode *, 64> Worklist;
134 
135  /// Mapping from an SDNode to its position on the worklist.
136  ///
137  /// This is used to find and remove nodes from the worklist (by nulling
138  /// them) when they are deleted from the underlying DAG. It relies on
139  /// stable indices of nodes within the worklist.
140  DenseMap<SDNode *, unsigned> WorklistMap;
141 
142  /// Set of nodes which have been combined (at least once).
143  ///
144  /// This is used to allow us to reliably add any operands of a DAG node
145  /// which have not yet been combined to the worklist.
146  SmallPtrSet<SDNode *, 32> CombinedNodes;
147 
148  // AA - Used for DAG load/store alias analysis.
149  AliasAnalysis *AA;
150 
151  /// When an instruction is simplified, add all users of the instruction to
152  /// the work lists because they might get more simplified now.
153  void AddUsersToWorklist(SDNode *N) {
154  for (SDNode *Node : N->uses())
155  AddToWorklist(Node);
156  }
157 
158  /// Call the node-specific routine that folds each particular type of node.
159  SDValue visit(SDNode *N);
160 
161  public:
162  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
163  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
164  OptLevel(OL), AA(AA) {
165  ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
166 
167  MaximumLegalStoreInBits = 0;
168  for (MVT VT : MVT::all_valuetypes())
169  if (EVT(VT).isSimple() && VT != MVT::Other &&
170  TLI.isTypeLegal(EVT(VT)) &&
171  VT.getSizeInBits() >= MaximumLegalStoreInBits)
172  MaximumLegalStoreInBits = VT.getSizeInBits();
173  }
174 
175  /// Add to the worklist making sure its instance is at the back (next to be
176  /// processed.)
177  void AddToWorklist(SDNode *N) {
179  "Deleted Node added to Worklist");
180 
181  // Skip handle nodes as they can't usefully be combined and confuse the
182  // zero-use deletion strategy.
183  if (N->getOpcode() == ISD::HANDLENODE)
184  return;
185 
186  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
187  Worklist.push_back(N);
188  }
189 
190  /// Remove all instances of N from the worklist.
191  void removeFromWorklist(SDNode *N) {
192  CombinedNodes.erase(N);
193 
194  auto It = WorklistMap.find(N);
195  if (It == WorklistMap.end())
196  return; // Not in the worklist.
197 
198  // Null out the entry rather than erasing it to avoid a linear operation.
199  Worklist[It->second] = nullptr;
200  WorklistMap.erase(It);
201  }
202 
203  void deleteAndRecombine(SDNode *N);
204  bool recursivelyDeleteUnusedNodes(SDNode *N);
205 
206  /// Replaces all uses of the results of one DAG node with new values.
207  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
208  bool AddTo = true);
209 
210  /// Replaces all uses of the results of one DAG node with new values.
211  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
212  return CombineTo(N, &Res, 1, AddTo);
213  }
214 
215  /// Replaces all uses of the results of one DAG node with new values.
216  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
217  bool AddTo = true) {
218  SDValue To[] = { Res0, Res1 };
219  return CombineTo(N, To, 2, AddTo);
220  }
221 
222  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
223 
224  private:
225  unsigned MaximumLegalStoreInBits;
226 
227  /// Check the specified integer node value to see if it can be simplified or
228  /// if things it uses can be simplified by bit propagation.
229  /// If so, return true.
230  bool SimplifyDemandedBits(SDValue Op) {
231  unsigned BitWidth = Op.getScalarValueSizeInBits();
232  APInt Demanded = APInt::getAllOnesValue(BitWidth);
233  return SimplifyDemandedBits(Op, Demanded);
234  }
235 
236  /// Check the specified vector node value to see if it can be simplified or
237  /// if things it uses can be simplified as it only uses some of the
238  /// elements. If so, return true.
239  bool SimplifyDemandedVectorElts(SDValue Op) {
240  unsigned NumElts = Op.getValueType().getVectorNumElements();
241  APInt Demanded = APInt::getAllOnesValue(NumElts);
242  return SimplifyDemandedVectorElts(Op, Demanded);
243  }
244 
245  bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
246  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
247  bool AssumeSingleUse = false);
248 
249  bool CombineToPreIndexedLoadStore(SDNode *N);
250  bool CombineToPostIndexedLoadStore(SDNode *N);
251  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
252  bool SliceUpLoad(SDNode *N);
253 
254  // Scalars have size 0 to distinguish from singleton vectors.
255  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
256  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
257  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
258 
259  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
260  /// load.
261  ///
262  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
263  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
264  /// \param EltNo index of the vector element to load.
265  /// \param OriginalLoad load that EVE came from to be replaced.
266  /// \returns EVE on success SDValue() on failure.
267  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
268  SDValue EltNo,
269  LoadSDNode *OriginalLoad);
270  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
271  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
272  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
273  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
274  SDValue PromoteIntBinOp(SDValue Op);
275  SDValue PromoteIntShiftOp(SDValue Op);
276  SDValue PromoteExtend(SDValue Op);
277  bool PromoteLoad(SDValue Op);
278 
279  /// Call the node-specific routine that knows how to fold each
280  /// particular type of node. If that doesn't do anything, try the
281  /// target-specific DAG combines.
282  SDValue combine(SDNode *N);
283 
284  // Visitation implementation - Implement dag node combining for different
285  // node types. The semantics are as follows:
286  // Return Value:
287  // SDValue.getNode() == 0 - No change was made
288  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
289  // otherwise - N should be replaced by the returned Operand.
290  //
291  SDValue visitTokenFactor(SDNode *N);
292  SDValue visitMERGE_VALUES(SDNode *N);
293  SDValue visitADD(SDNode *N);
294  SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
295  SDValue visitSUB(SDNode *N);
296  SDValue visitADDSAT(SDNode *N);
297  SDValue visitSUBSAT(SDNode *N);
298  SDValue visitADDC(SDNode *N);
299  SDValue visitUADDO(SDNode *N);
300  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
301  SDValue visitSUBC(SDNode *N);
302  SDValue visitUSUBO(SDNode *N);
303  SDValue visitADDE(SDNode *N);
304  SDValue visitADDCARRY(SDNode *N);
305  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
306  SDValue visitSUBE(SDNode *N);
307  SDValue visitSUBCARRY(SDNode *N);
308  SDValue visitMUL(SDNode *N);
309  SDValue useDivRem(SDNode *N);
310  SDValue visitSDIV(SDNode *N);
311  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
312  SDValue visitUDIV(SDNode *N);
313  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
314  SDValue visitREM(SDNode *N);
315  SDValue visitMULHU(SDNode *N);
316  SDValue visitMULHS(SDNode *N);
317  SDValue visitSMUL_LOHI(SDNode *N);
318  SDValue visitUMUL_LOHI(SDNode *N);
319  SDValue visitSMULO(SDNode *N);
320  SDValue visitUMULO(SDNode *N);
321  SDValue visitIMINMAX(SDNode *N);
322  SDValue visitAND(SDNode *N);
323  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
324  SDValue visitOR(SDNode *N);
325  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
326  SDValue visitXOR(SDNode *N);
327  SDValue SimplifyVBinOp(SDNode *N);
328  SDValue visitSHL(SDNode *N);
329  SDValue visitSRA(SDNode *N);
330  SDValue visitSRL(SDNode *N);
331  SDValue visitFunnelShift(SDNode *N);
332  SDValue visitRotate(SDNode *N);
333  SDValue visitABS(SDNode *N);
334  SDValue visitBSWAP(SDNode *N);
335  SDValue visitBITREVERSE(SDNode *N);
336  SDValue visitCTLZ(SDNode *N);
337  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
338  SDValue visitCTTZ(SDNode *N);
339  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
340  SDValue visitCTPOP(SDNode *N);
341  SDValue visitSELECT(SDNode *N);
342  SDValue visitVSELECT(SDNode *N);
343  SDValue visitSELECT_CC(SDNode *N);
344  SDValue visitSETCC(SDNode *N);
345  SDValue visitSETCCCARRY(SDNode *N);
346  SDValue visitSIGN_EXTEND(SDNode *N);
347  SDValue visitZERO_EXTEND(SDNode *N);
348  SDValue visitANY_EXTEND(SDNode *N);
349  SDValue visitAssertExt(SDNode *N);
350  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
351  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
352  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
353  SDValue visitTRUNCATE(SDNode *N);
354  SDValue visitBITCAST(SDNode *N);
355  SDValue visitBUILD_PAIR(SDNode *N);
356  SDValue visitFADD(SDNode *N);
357  SDValue visitFSUB(SDNode *N);
358  SDValue visitFMUL(SDNode *N);
359  SDValue visitFMA(SDNode *N);
360  SDValue visitFDIV(SDNode *N);
361  SDValue visitFREM(SDNode *N);
362  SDValue visitFSQRT(SDNode *N);
363  SDValue visitFCOPYSIGN(SDNode *N);
364  SDValue visitFPOW(SDNode *N);
365  SDValue visitSINT_TO_FP(SDNode *N);
366  SDValue visitUINT_TO_FP(SDNode *N);
367  SDValue visitFP_TO_SINT(SDNode *N);
368  SDValue visitFP_TO_UINT(SDNode *N);
369  SDValue visitFP_ROUND(SDNode *N);
370  SDValue visitFP_ROUND_INREG(SDNode *N);
371  SDValue visitFP_EXTEND(SDNode *N);
372  SDValue visitFNEG(SDNode *N);
373  SDValue visitFABS(SDNode *N);
374  SDValue visitFCEIL(SDNode *N);
375  SDValue visitFTRUNC(SDNode *N);
376  SDValue visitFFLOOR(SDNode *N);
377  SDValue visitFMINNUM(SDNode *N);
378  SDValue visitFMAXNUM(SDNode *N);
379  SDValue visitFMINIMUM(SDNode *N);
380  SDValue visitFMAXIMUM(SDNode *N);
381  SDValue visitBRCOND(SDNode *N);
382  SDValue visitBR_CC(SDNode *N);
383  SDValue visitLOAD(SDNode *N);
384 
385  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
386  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
387 
388  SDValue visitSTORE(SDNode *N);
389  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
390  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
391  SDValue visitBUILD_VECTOR(SDNode *N);
392  SDValue visitCONCAT_VECTORS(SDNode *N);
393  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
394  SDValue visitVECTOR_SHUFFLE(SDNode *N);
395  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
396  SDValue visitINSERT_SUBVECTOR(SDNode *N);
397  SDValue visitMLOAD(SDNode *N);
398  SDValue visitMSTORE(SDNode *N);
399  SDValue visitMGATHER(SDNode *N);
400  SDValue visitMSCATTER(SDNode *N);
401  SDValue visitFP_TO_FP16(SDNode *N);
402  SDValue visitFP16_TO_FP(SDNode *N);
403 
404  SDValue visitFADDForFMACombine(SDNode *N);
405  SDValue visitFSUBForFMACombine(SDNode *N);
406  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
407 
408  SDValue XformToShuffleWithZero(SDNode *N);
409  SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
410  SDValue N1, SDNodeFlags Flags);
411 
412  SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
413 
414  SDValue foldSelectOfConstants(SDNode *N);
415  SDValue foldVSelectOfConstants(SDNode *N);
416  SDValue foldBinOpIntoSelect(SDNode *BO);
417  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
418  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
419  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
420  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
421  SDValue N2, SDValue N3, ISD::CondCode CC,
422  bool NotExtCompare = false);
423  SDValue convertSelectOfFPConstantsToLoadOffset(
424  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
425  ISD::CondCode CC);
426  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
427  SDValue N2, SDValue N3, ISD::CondCode CC);
428  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
429  const SDLoc &DL);
430  SDValue unfoldMaskedMerge(SDNode *N);
431  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
432  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
433  const SDLoc &DL, bool foldBooleans);
434  SDValue rebuildSetCC(SDValue N);
435 
436  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
437  SDValue &CC) const;
438  bool isOneUseSetCC(SDValue N) const;
439 
440  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
441  unsigned HiOp);
442  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
443  SDValue CombineExtLoad(SDNode *N);
444  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
445  SDValue combineRepeatedFPDivisors(SDNode *N);
446  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
447  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
448  SDValue BuildSDIV(SDNode *N);
449  SDValue BuildSDIVPow2(SDNode *N);
450  SDValue BuildUDIV(SDNode *N);
451  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
452  SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
453  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
454  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
455  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
456  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
457  SDNodeFlags Flags, bool Reciprocal);
458  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
459  SDNodeFlags Flags, bool Reciprocal);
460  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
461  bool DemandHighBits = true);
462  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
463  SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
464  SDValue InnerPos, SDValue InnerNeg,
465  unsigned PosOpcode, unsigned NegOpcode,
466  const SDLoc &DL);
467  SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
468  SDValue MatchLoadCombine(SDNode *N);
469  SDValue ReduceLoadWidth(SDNode *N);
470  SDValue ReduceLoadOpStoreWidth(SDNode *N);
472  SDValue TransformFPLoadStorePair(SDNode *N);
473  SDValue convertBuildVecZextToZext(SDNode *N);
474  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
475  SDValue reduceBuildVecToShuffle(SDNode *N);
476  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
477  ArrayRef<int> VectorMask, SDValue VecIn1,
478  SDValue VecIn2, unsigned LeftIdx);
479  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
480 
481  /// Walk up chain skipping non-aliasing memory nodes,
482  /// looking for aliasing nodes and adding them to the Aliases vector.
483  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
484  SmallVectorImpl<SDValue> &Aliases);
485 
486  /// Return true if there is any possibility that the two addresses overlap.
487  bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
488 
489  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
490  /// chain (aliasing node.)
491  SDValue FindBetterChain(SDNode *N, SDValue Chain);
492 
493  /// Try to replace a store and any possibly adjacent stores on
494  /// consecutive chains with better chains. Return true only if St is
495  /// replaced.
496  ///
497  /// Notice that other chains may still be replaced even if the function
498  /// returns false.
499  bool findBetterNeighborChains(StoreSDNode *St);
500 
501  // Helper for findBetterNeighborChains. Walk up store chain add additional
502  // chained stores that do not overlap and can be parallelized.
503  bool parallelizeChainedStores(StoreSDNode *St);
504 
505  /// Holds a pointer to an LSBaseSDNode as well as information on where it
506  /// is located in a sequence of memory operations connected by a chain.
507  struct MemOpLink {
508  // Ptr to the mem node.
509  LSBaseSDNode *MemNode;
510 
511  // Offset from the base ptr.
512  int64_t OffsetFromBase;
513 
514  MemOpLink(LSBaseSDNode *N, int64_t Offset)
515  : MemNode(N), OffsetFromBase(Offset) {}
516  };
517 
518  /// This is a helper function for visitMUL to check the profitability
519  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
520  /// MulNode is the original multiply, AddNode is (add x, c1),
521  /// and ConstNode is c2.
522  bool isMulAddWithConstProfitable(SDNode *MulNode,
523  SDValue &AddNode,
524  SDValue &ConstNode);
525 
526  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
527  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
528  /// the type of the loaded value to be extended.
529  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
530  EVT LoadResultTy, EVT &ExtVT);
531 
532  /// Helper function to calculate whether the given Load/Store can have its
533  /// width reduced to ExtVT.
534  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
535  EVT &MemVT, unsigned ShAmt = 0);
536 
537  /// Used by BackwardsPropagateMask to find suitable loads.
538  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
539  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
540  ConstantSDNode *Mask, SDNode *&NodeToMask);
541  /// Attempt to propagate a given AND node back to load leaves so that they
542  /// can be combined into narrow loads.
543  bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
544 
545  /// Helper function for MergeConsecutiveStores which merges the
546  /// component store chains.
547  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
548  unsigned NumStores);
549 
550  /// This is a helper function for MergeConsecutiveStores. When the
551  /// source elements of the consecutive stores are all constants or
552  /// all extracted vector elements, try to merge them into one
553  /// larger store introducing bitcasts if necessary. \return True
554  /// if a merged store was created.
555  bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
556  EVT MemVT, unsigned NumStores,
557  bool IsConstantSrc, bool UseVector,
558  bool UseTrunc);
559 
560  /// This is a helper function for MergeConsecutiveStores. Stores
561  /// that potentially may be merged with St are placed in
562  /// StoreNodes. RootNode is a chain predecessor to all store
563  /// candidates.
564  void getStoreMergeCandidates(StoreSDNode *St,
565  SmallVectorImpl<MemOpLink> &StoreNodes,
566  SDNode *&Root);
567 
568  /// Helper function for MergeConsecutiveStores. Checks if
569  /// candidate stores have indirect dependency through their
570  /// operands. RootNode is the predecessor to all stores calculated
571  /// by getStoreMergeCandidates and is used to prune the dependency check.
572  /// \return True if safe to merge.
573  bool checkMergeStoreCandidatesForDependencies(
574  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
575  SDNode *RootNode);
576 
577  /// Merge consecutive store operations into a wide store.
578  /// This optimization uses wide integers or vectors when possible.
579  /// \return number of stores that were merged into a merged store (the
580  /// affected nodes are stored as a prefix in \p StoreNodes).
581  bool MergeConsecutiveStores(StoreSDNode *St);
582 
583  /// Try to transform a truncation where C is a constant:
584  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
585  ///
586  /// \p N needs to be a truncation and its first operand an AND. Other
587  /// requirements are checked by the function (e.g. that trunc is
588  /// single-use) and if missed an empty SDValue is returned.
589  SDValue distributeTruncateThroughAnd(SDNode *N);
590 
591  /// Helper function to determine whether the target supports operation
592  /// given by \p Opcode for type \p VT, that is, whether the operation
593  /// is legal or custom before legalizing operations, and whether is
594  /// legal (but not custom) after legalization.
595  bool hasOperation(unsigned Opcode, EVT VT) {
596  if (LegalOperations)
597  return TLI.isOperationLegal(Opcode, VT);
598  return TLI.isOperationLegalOrCustom(Opcode, VT);
599  }
600 
601  public:
602  /// Runs the dag combiner on all nodes in the work list
603  void Run(CombineLevel AtLevel);
604 
605  SelectionDAG &getDAG() const { return DAG; }
606 
607  /// Returns a type large enough to hold any valid shift amount - before type
608  /// legalization these can be huge.
609  EVT getShiftAmountTy(EVT LHSTy) {
610  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
611  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
612  }
613 
614  /// This method returns true if we are running before type legalization or
615  /// if the specified VT is legal.
616  bool isTypeLegal(const EVT &VT) {
617  if (!LegalTypes) return true;
618  return TLI.isTypeLegal(VT);
619  }
620 
621  /// Convenience wrapper around TargetLowering::getSetCCResultType
622  EVT getSetCCResultType(EVT VT) const {
623  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
624  }
625 
626  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
627  SDValue OrigLoad, SDValue ExtLoad,
629  };
630 
631 /// This class is a DAGUpdateListener that removes any deleted
632 /// nodes from the worklist.
633 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
634  DAGCombiner &DC;
635 
636 public:
637  explicit WorklistRemover(DAGCombiner &dc)
638  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
639 
640  void NodeDeleted(SDNode *N, SDNode *E) override {
641  DC.removeFromWorklist(N);
642  }
643 };
644 
645 } // end anonymous namespace
646 
647 //===----------------------------------------------------------------------===//
648 // TargetLowering::DAGCombinerInfo implementation
649 //===----------------------------------------------------------------------===//
650 
652  ((DAGCombiner*)DC)->AddToWorklist(N);
653 }
654 
656 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
657  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
658 }
659 
661 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
662  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
663 }
664 
666 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
667  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
668 }
669 
672  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
673 }
674 
675 //===----------------------------------------------------------------------===//
676 // Helper Functions
677 //===----------------------------------------------------------------------===//
678 
679 void DAGCombiner::deleteAndRecombine(SDNode *N) {
680  removeFromWorklist(N);
681 
682  // If the operands of this node are only used by the node, they will now be
683  // dead. Make sure to re-visit them and recursively delete dead nodes.
684  for (const SDValue &Op : N->ops())
685  // For an operand generating multiple values, one of the values may
686  // become dead allowing further simplification (e.g. split index
687  // arithmetic from an indexed load).
688  if (Op->hasOneUse() || Op->getNumValues() > 1)
689  AddToWorklist(Op.getNode());
690 
691  DAG.DeleteNode(N);
692 }
693 
694 /// Return 1 if we can compute the negated form of the specified expression for
695 /// the same cost as the expression itself, or 2 if we can compute the negated
696 /// form more cheaply than the expression itself.
697 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
698  const TargetLowering &TLI,
699  const TargetOptions *Options,
700  unsigned Depth = 0) {
701  // fneg is removable even if it has multiple uses.
702  if (Op.getOpcode() == ISD::FNEG) return 2;
703 
704  // Don't allow anything with multiple uses unless we know it is free.
705  EVT VT = Op.getValueType();
706  const SDNodeFlags Flags = Op->getFlags();
707  if (!Op.hasOneUse())
708  if (!(Op.getOpcode() == ISD::FP_EXTEND &&
709  TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
710  return 0;
711 
712  // Don't recurse exponentially.
713  if (Depth > 6) return 0;
714 
715  switch (Op.getOpcode()) {
716  default: return false;
717  case ISD::ConstantFP: {
718  if (!LegalOperations)
719  return 1;
720 
721  // Don't invert constant FP values after legalization unless the target says
722  // the negated constant is legal.
723  return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
724  TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
725  }
726  case ISD::FADD:
727  if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
728  return 0;
729 
730  // After operation legalization, it might not be legal to create new FSUBs.
731  if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
732  return 0;
733 
734  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
735  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
736  Options, Depth + 1))
737  return V;
738  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
739  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
740  Depth + 1);
741  case ISD::FSUB:
742  // We can't turn -(A-B) into B-A when we honor signed zeros.
743  if (!Options->NoSignedZerosFPMath &&
744  !Flags.hasNoSignedZeros())
745  return 0;
746 
747  // fold (fneg (fsub A, B)) -> (fsub B, A)
748  return 1;
749 
750  case ISD::FMUL:
751  case ISD::FDIV:
752  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
753  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
754  Options, Depth + 1))
755  return V;
756 
757  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
758  Depth + 1);
759 
760  case ISD::FP_EXTEND:
761  case ISD::FP_ROUND:
762  case ISD::FSIN:
763  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
764  Depth + 1);
765  }
766 }
767 
768 /// If isNegatibleForFree returns true, return the newly negated expression.
770  bool LegalOperations, unsigned Depth = 0) {
771  const TargetOptions &Options = DAG.getTarget().Options;
772  // fneg is removable even if it has multiple uses.
773  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
774 
775  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
776 
777  const SDNodeFlags Flags = Op.getNode()->getFlags();
778 
779  switch (Op.getOpcode()) {
780  default: llvm_unreachable("Unknown code");
781  case ISD::ConstantFP: {
782  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
783  V.changeSign();
784  return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
785  }
786  case ISD::FADD:
787  assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
788 
789  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
790  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
791  DAG.getTargetLoweringInfo(), &Options, Depth+1))
792  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
793  GetNegatedExpression(Op.getOperand(0), DAG,
794  LegalOperations, Depth+1),
795  Op.getOperand(1), Flags);
796  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
797  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
798  GetNegatedExpression(Op.getOperand(1), DAG,
799  LegalOperations, Depth+1),
800  Op.getOperand(0), Flags);
801  case ISD::FSUB:
802  // fold (fneg (fsub 0, B)) -> B
803  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
804  if (N0CFP->isZero())
805  return Op.getOperand(1);
806 
807  // fold (fneg (fsub A, B)) -> (fsub B, A)
808  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
809  Op.getOperand(1), Op.getOperand(0), Flags);
810 
811  case ISD::FMUL:
812  case ISD::FDIV:
813  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
814  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
815  DAG.getTargetLoweringInfo(), &Options, Depth+1))
816  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
817  GetNegatedExpression(Op.getOperand(0), DAG,
818  LegalOperations, Depth+1),
819  Op.getOperand(1), Flags);
820 
821  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
822  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
823  Op.getOperand(0),
824  GetNegatedExpression(Op.getOperand(1), DAG,
825  LegalOperations, Depth+1), Flags);
826 
827  case ISD::FP_EXTEND:
828  case ISD::FSIN:
829  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
830  GetNegatedExpression(Op.getOperand(0), DAG,
831  LegalOperations, Depth+1));
832  case ISD::FP_ROUND:
833  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
834  GetNegatedExpression(Op.getOperand(0), DAG,
835  LegalOperations, Depth+1),
836  Op.getOperand(1));
837  }
838 }
839 
840 // APInts must be the same size for most operations, this helper
841 // function zero extends the shorter of the pair so that they match.
842 // We provide an Offset so that we can create bitwidths that won't overflow.
843 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
844  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
845  LHS = LHS.zextOrSelf(Bits);
846  RHS = RHS.zextOrSelf(Bits);
847 }
848 
849 // Return true if this node is a setcc, or is a select_cc
850 // that selects between the target values used for true and false, making it
851 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
852 // the appropriate nodes based on the type of node we are checking. This
853 // simplifies life a bit for the callers.
854 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
855  SDValue &CC) const {
856  if (N.getOpcode() == ISD::SETCC) {
857  LHS = N.getOperand(0);
858  RHS = N.getOperand(1);
859  CC = N.getOperand(2);
860  return true;
861  }
862 
863  if (N.getOpcode() != ISD::SELECT_CC ||
864  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
865  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
866  return false;
867 
868  if (TLI.getBooleanContents(N.getValueType()) ==
870  return false;
871 
872  LHS = N.getOperand(0);
873  RHS = N.getOperand(1);
874  CC = N.getOperand(4);
875  return true;
876 }
877 
878 /// Return true if this is a SetCC-equivalent operation with only one use.
879 /// If this is true, it allows the users to invert the operation for free when
880 /// it is profitable to do so.
881 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
882  SDValue N0, N1, N2;
883  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
884  return true;
885  return false;
886 }
887 
888 // Returns the SDNode if it is a constant float BuildVector
889 // or constant float.
891  if (isa<ConstantFPSDNode>(N))
892  return N.getNode();
894  return N.getNode();
895  return nullptr;
896 }
897 
898 // Determines if it is a constant integer or a build vector of constant
899 // integers (and undefs).
900 // Do not permit build vector implicit truncation.
901 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
902  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
903  return !(Const->isOpaque() && NoOpaques);
904  if (N.getOpcode() != ISD::BUILD_VECTOR)
905  return false;
906  unsigned BitWidth = N.getScalarValueSizeInBits();
907  for (const SDValue &Op : N->op_values()) {
908  if (Op.isUndef())
909  continue;
911  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
912  (Const->isOpaque() && NoOpaques))
913  return false;
914  }
915  return true;
916 }
917 
918 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
919 // undef's.
920 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
921  if (V.getOpcode() != ISD::BUILD_VECTOR)
922  return false;
923  return isConstantOrConstantVector(V, NoOpaques) ||
925 }
926 
927 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
928  SDValue N1, SDNodeFlags Flags) {
929  // Don't reassociate reductions.
930  if (Flags.hasVectorReduction())
931  return SDValue();
932 
933  EVT VT = N0.getValueType();
934  if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
937  // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
938  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
939  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
940  return SDValue();
941  }
942  if (N0.hasOneUse()) {
943  // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
944  // use
945  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
946  if (!OpNode.getNode())
947  return SDValue();
948  AddToWorklist(OpNode.getNode());
949  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
950  }
951  }
952  }
953 
954  if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
957  // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
958  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
959  return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
960  return SDValue();
961  }
962  if (N1.hasOneUse()) {
963  // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
964  // use
965  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
966  if (!OpNode.getNode())
967  return SDValue();
968  AddToWorklist(OpNode.getNode());
969  return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
970  }
971  }
972  }
973 
974  return SDValue();
975 }
976 
977 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
978  bool AddTo) {
979  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
980  ++NodesCombined;
981  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
982  To[0].getNode()->dump(&DAG);
983  dbgs() << " and " << NumTo - 1 << " other values\n");
984  for (unsigned i = 0, e = NumTo; i != e; ++i)
985  assert((!To[i].getNode() ||
986  N->getValueType(i) == To[i].getValueType()) &&
987  "Cannot combine value to value of different type!");
988 
989  WorklistRemover DeadNodes(*this);
990  DAG.ReplaceAllUsesWith(N, To);
991  if (AddTo) {
992  // Push the new nodes and any users onto the worklist
993  for (unsigned i = 0, e = NumTo; i != e; ++i) {
994  if (To[i].getNode()) {
995  AddToWorklist(To[i].getNode());
996  AddUsersToWorklist(To[i].getNode());
997  }
998  }
999  }
1000 
1001  // Finally, if the node is now dead, remove it from the graph. The node
1002  // may not be dead if the replacement process recursively simplified to
1003  // something else needing this node.
1004  if (N->use_empty())
1005  deleteAndRecombine(N);
1006  return SDValue(N, 0);
1007 }
1008 
1009 void DAGCombiner::
1010 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1011  // Replace all uses. If any nodes become isomorphic to other nodes and
1012  // are deleted, make sure to remove them from our worklist.
1013  WorklistRemover DeadNodes(*this);
1014  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1015 
1016  // Push the new node and any (possibly new) users onto the worklist.
1017  AddToWorklist(TLO.New.getNode());
1018  AddUsersToWorklist(TLO.New.getNode());
1019 
1020  // Finally, if the node is now dead, remove it from the graph. The node
1021  // may not be dead if the replacement process recursively simplified to
1022  // something else needing this node.
1023  if (TLO.Old.getNode()->use_empty())
1024  deleteAndRecombine(TLO.Old.getNode());
1025 }
1026 
1027 /// Check the specified integer node value to see if it can be simplified or if
1028 /// things it uses can be simplified by bit propagation. If so, return true.
1029 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1030  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1031  KnownBits Known;
1032  if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1033  return false;
1034 
1035  // Revisit the node.
1036  AddToWorklist(Op.getNode());
1037 
1038  // Replace the old value with the new one.
1039  ++NodesCombined;
1040  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1041  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1042  dbgs() << '\n');
1043 
1044  CommitTargetLoweringOpt(TLO);
1045  return true;
1046 }
1047 
1048 /// Check the specified vector node value to see if it can be simplified or
1049 /// if things it uses can be simplified as it only uses some of the elements.
1050 /// If so, return true.
1051 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
1052  bool AssumeSingleUse) {
1053  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1054  APInt KnownUndef, KnownZero;
1055  if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
1056  0, AssumeSingleUse))
1057  return false;
1058 
1059  // Revisit the node.
1060  AddToWorklist(Op.getNode());
1061 
1062  // Replace the old value with the new one.
1063  ++NodesCombined;
1064  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1065  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1066  dbgs() << '\n');
1067 
1068  CommitTargetLoweringOpt(TLO);
1069  return true;
1070 }
1071 
1072 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1073  SDLoc DL(Load);
1074  EVT VT = Load->getValueType(0);
1075  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1076 
1077  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1078  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1079  WorklistRemover DeadNodes(*this);
1080  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1081  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1082  deleteAndRecombine(Load);
1083  AddToWorklist(Trunc.getNode());
1084 }
1085 
1086 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1087  Replace = false;
1088  SDLoc DL(Op);
1089  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1090  LoadSDNode *LD = cast<LoadSDNode>(Op);
1091  EVT MemVT = LD->getMemoryVT();
1093  : LD->getExtensionType();
1094  Replace = true;
1095  return DAG.getExtLoad(ExtType, DL, PVT,
1096  LD->getChain(), LD->getBasePtr(),
1097  MemVT, LD->getMemOperand());
1098  }
1099 
1100  unsigned Opc = Op.getOpcode();
1101  switch (Opc) {
1102  default: break;
1103  case ISD::AssertSext:
1104  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1105  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1106  break;
1107  case ISD::AssertZext:
1108  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1109  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1110  break;
1111  case ISD::Constant: {
1112  unsigned ExtOpc =
1114  return DAG.getNode(ExtOpc, DL, PVT, Op);
1115  }
1116  }
1117 
1118  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1119  return SDValue();
1120  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1121 }
1122 
1123 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1125  return SDValue();
1126  EVT OldVT = Op.getValueType();
1127  SDLoc DL(Op);
1128  bool Replace = false;
1129  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1130  if (!NewOp.getNode())
1131  return SDValue();
1132  AddToWorklist(NewOp.getNode());
1133 
1134  if (Replace)
1135  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1136  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1137  DAG.getValueType(OldVT));
1138 }
1139 
1140 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1141  EVT OldVT = Op.getValueType();
1142  SDLoc DL(Op);
1143  bool Replace = false;
1144  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1145  if (!NewOp.getNode())
1146  return SDValue();
1147  AddToWorklist(NewOp.getNode());
1148 
1149  if (Replace)
1150  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1151  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1152 }
1153 
1154 /// Promote the specified integer binary operation if the target indicates it is
1155 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1156 /// i32 since i16 instructions are longer.
1157 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1158  if (!LegalOperations)
1159  return SDValue();
1160 
1161  EVT VT = Op.getValueType();
1162  if (VT.isVector() || !VT.isInteger())
1163  return SDValue();
1164 
1165  // If operation type is 'undesirable', e.g. i16 on x86, consider
1166  // promoting it.
1167  unsigned Opc = Op.getOpcode();
1168  if (TLI.isTypeDesirableForOp(Opc, VT))
1169  return SDValue();
1170 
1171  EVT PVT = VT;
1172  // Consult target whether it is a good idea to promote this operation and
1173  // what's the right type to promote it to.
1174  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1175  assert(PVT != VT && "Don't know what type to promote to!");
1176 
1177  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1178 
1179  bool Replace0 = false;
1180  SDValue N0 = Op.getOperand(0);
1181  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1182 
1183  bool Replace1 = false;
1184  SDValue N1 = Op.getOperand(1);
1185  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1186  SDLoc DL(Op);
1187 
1188  SDValue RV =
1189  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1190 
1191  // We are always replacing N0/N1's use in N and only need
1192  // additional replacements if there are additional uses.
1193  Replace0 &= !N0->hasOneUse();
1194  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1195 
1196  // Combine Op here so it is preserved past replacements.
1197  CombineTo(Op.getNode(), RV);
1198 
1199  // If operands have a use ordering, make sure we deal with
1200  // predecessor first.
1201  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1202  std::swap(N0, N1);
1203  std::swap(NN0, NN1);
1204  }
1205 
1206  if (Replace0) {
1207  AddToWorklist(NN0.getNode());
1208  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1209  }
1210  if (Replace1) {
1211  AddToWorklist(NN1.getNode());
1212  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1213  }
1214  return Op;
1215  }
1216  return SDValue();
1217 }
1218 
1219 /// Promote the specified integer shift operation if the target indicates it is
1220 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1221 /// i32 since i16 instructions are longer.
1222 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1223  if (!LegalOperations)
1224  return SDValue();
1225 
1226  EVT VT = Op.getValueType();
1227  if (VT.isVector() || !VT.isInteger())
1228  return SDValue();
1229 
1230  // If operation type is 'undesirable', e.g. i16 on x86, consider
1231  // promoting it.
1232  unsigned Opc = Op.getOpcode();
1233  if (TLI.isTypeDesirableForOp(Opc, VT))
1234  return SDValue();
1235 
1236  EVT PVT = VT;
1237  // Consult target whether it is a good idea to promote this operation and
1238  // what's the right type to promote it to.
1239  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1240  assert(PVT != VT && "Don't know what type to promote to!");
1241 
1242  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1243 
1244  bool Replace = false;
1245  SDValue N0 = Op.getOperand(0);
1246  SDValue N1 = Op.getOperand(1);
1247  if (Opc == ISD::SRA)
1248  N0 = SExtPromoteOperand(N0, PVT);
1249  else if (Opc == ISD::SRL)
1250  N0 = ZExtPromoteOperand(N0, PVT);
1251  else
1252  N0 = PromoteOperand(N0, PVT, Replace);
1253 
1254  if (!N0.getNode())
1255  return SDValue();
1256 
1257  SDLoc DL(Op);
1258  SDValue RV =
1259  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1260 
1261  AddToWorklist(N0.getNode());
1262  if (Replace)
1263  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1264 
1265  // Deal with Op being deleted.
1266  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1267  return RV;
1268  }
1269  return SDValue();
1270 }
1271 
1272 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1273  if (!LegalOperations)
1274  return SDValue();
1275 
1276  EVT VT = Op.getValueType();
1277  if (VT.isVector() || !VT.isInteger())
1278  return SDValue();
1279 
1280  // If operation type is 'undesirable', e.g. i16 on x86, consider
1281  // promoting it.
1282  unsigned Opc = Op.getOpcode();
1283  if (TLI.isTypeDesirableForOp(Opc, VT))
1284  return SDValue();
1285 
1286  EVT PVT = VT;
1287  // Consult target whether it is a good idea to promote this operation and
1288  // what's the right type to promote it to.
1289  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1290  assert(PVT != VT && "Don't know what type to promote to!");
1291  // fold (aext (aext x)) -> (aext x)
1292  // fold (aext (zext x)) -> (zext x)
1293  // fold (aext (sext x)) -> (sext x)
1294  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1295  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1296  }
1297  return SDValue();
1298 }
1299 
1300 bool DAGCombiner::PromoteLoad(SDValue Op) {
1301  if (!LegalOperations)
1302  return false;
1303 
1304  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1305  return false;
1306 
1307  EVT VT = Op.getValueType();
1308  if (VT.isVector() || !VT.isInteger())
1309  return false;
1310 
1311  // If operation type is 'undesirable', e.g. i16 on x86, consider
1312  // promoting it.
1313  unsigned Opc = Op.getOpcode();
1314  if (TLI.isTypeDesirableForOp(Opc, VT))
1315  return false;
1316 
1317  EVT PVT = VT;
1318  // Consult target whether it is a good idea to promote this operation and
1319  // what's the right type to promote it to.
1320  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1321  assert(PVT != VT && "Don't know what type to promote to!");
1322 
1323  SDLoc DL(Op);
1324  SDNode *N = Op.getNode();
1325  LoadSDNode *LD = cast<LoadSDNode>(N);
1326  EVT MemVT = LD->getMemoryVT();
1328  : LD->getExtensionType();
1329  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1330  LD->getChain(), LD->getBasePtr(),
1331  MemVT, LD->getMemOperand());
1332  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1333 
1334  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1335  Result.getNode()->dump(&DAG); dbgs() << '\n');
1336  WorklistRemover DeadNodes(*this);
1337  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1338  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1339  deleteAndRecombine(N);
1340  AddToWorklist(Result.getNode());
1341  return true;
1342  }
1343  return false;
1344 }
1345 
1346 /// Recursively delete a node which has no uses and any operands for
1347 /// which it is the only use.
1348 ///
1349 /// Note that this both deletes the nodes and removes them from the worklist.
1350 /// It also adds any nodes who have had a user deleted to the worklist as they
1351 /// may now have only one use and subject to other combines.
1352 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1353  if (!N->use_empty())
1354  return false;
1355 
1357  Nodes.insert(N);
1358  do {
1359  N = Nodes.pop_back_val();
1360  if (!N)
1361  continue;
1362 
1363  if (N->use_empty()) {
1364  for (const SDValue &ChildN : N->op_values())
1365  Nodes.insert(ChildN.getNode());
1366 
1367  removeFromWorklist(N);
1368  DAG.DeleteNode(N);
1369  } else {
1370  AddToWorklist(N);
1371  }
1372  } while (!Nodes.empty());
1373  return true;
1374 }
1375 
1376 //===----------------------------------------------------------------------===//
1377 // Main DAG Combiner implementation
1378 //===----------------------------------------------------------------------===//
1379 
1380 void DAGCombiner::Run(CombineLevel AtLevel) {
1381  // set the instance variables, so that the various visit routines may use it.
1382  Level = AtLevel;
1383  LegalOperations = Level >= AfterLegalizeVectorOps;
1384  LegalTypes = Level >= AfterLegalizeTypes;
1385 
1386  // Add all the dag nodes to the worklist.
1387  for (SDNode &Node : DAG.allnodes())
1388  AddToWorklist(&Node);
1389 
1390  // Create a dummy node (which is not added to allnodes), that adds a reference
1391  // to the root node, preventing it from being deleted, and tracking any
1392  // changes of the root.
1393  HandleSDNode Dummy(DAG.getRoot());
1394 
1395  // While the worklist isn't empty, find a node and try to combine it.
1396  while (!WorklistMap.empty()) {
1397  SDNode *N;
1398  // The Worklist holds the SDNodes in order, but it may contain null entries.
1399  do {
1400  N = Worklist.pop_back_val();
1401  } while (!N);
1402 
1403  bool GoodWorklistEntry = WorklistMap.erase(N);
1404  (void)GoodWorklistEntry;
1405  assert(GoodWorklistEntry &&
1406  "Found a worklist entry without a corresponding map entry!");
1407 
1408  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1409  // N is deleted from the DAG, since they too may now be dead or may have a
1410  // reduced number of uses, allowing other xforms.
1411  if (recursivelyDeleteUnusedNodes(N))
1412  continue;
1413 
1414  WorklistRemover DeadNodes(*this);
1415 
1416  // If this combine is running after legalizing the DAG, re-legalize any
1417  // nodes pulled off the worklist.
1418  if (Level == AfterLegalizeDAG) {
1419  SmallSetVector<SDNode *, 16> UpdatedNodes;
1420  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1421 
1422  for (SDNode *LN : UpdatedNodes) {
1423  AddToWorklist(LN);
1424  AddUsersToWorklist(LN);
1425  }
1426  if (!NIsValid)
1427  continue;
1428  }
1429 
1430  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1431 
1432  // Add any operands of the new node which have not yet been combined to the
1433  // worklist as well. Because the worklist uniques things already, this
1434  // won't repeatedly process the same operand.
1435  CombinedNodes.insert(N);
1436  for (const SDValue &ChildN : N->op_values())
1437  if (!CombinedNodes.count(ChildN.getNode()))
1438  AddToWorklist(ChildN.getNode());
1439 
1440  SDValue RV = combine(N);
1441 
1442  if (!RV.getNode())
1443  continue;
1444 
1445  ++NodesCombined;
1446 
1447  // If we get back the same node we passed in, rather than a new node or
1448  // zero, we know that the node must have defined multiple values and
1449  // CombineTo was used. Since CombineTo takes care of the worklist
1450  // mechanics for us, we have no work to do in this case.
1451  if (RV.getNode() == N)
1452  continue;
1453 
1455  RV.getOpcode() != ISD::DELETED_NODE &&
1456  "Node was deleted but visit returned new node!");
1457 
1458  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1459 
1460  if (N->getNumValues() == RV.getNode()->getNumValues())
1461  DAG.ReplaceAllUsesWith(N, RV.getNode());
1462  else {
1463  assert(N->getValueType(0) == RV.getValueType() &&
1464  N->getNumValues() == 1 && "Type mismatch");
1465  DAG.ReplaceAllUsesWith(N, &RV);
1466  }
1467 
1468  // Push the new node and any users onto the worklist
1469  AddToWorklist(RV.getNode());
1470  AddUsersToWorklist(RV.getNode());
1471 
1472  // Finally, if the node is now dead, remove it from the graph. The node
1473  // may not be dead if the replacement process recursively simplified to
1474  // something else needing this node. This will also take care of adding any
1475  // operands which have lost a user to the worklist.
1476  recursivelyDeleteUnusedNodes(N);
1477  }
1478 
1479  // If the root changed (e.g. it was a dead load, update the root).
1480  DAG.setRoot(Dummy.getValue());
1481  DAG.RemoveDeadNodes();
1482 }
1483 
1484 SDValue DAGCombiner::visit(SDNode *N) {
1485  switch (N->getOpcode()) {
1486  default: break;
1487  case ISD::TokenFactor: return visitTokenFactor(N);
1488  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1489  case ISD::ADD: return visitADD(N);
1490  case ISD::SUB: return visitSUB(N);
1491  case ISD::SADDSAT:
1492  case ISD::UADDSAT: return visitADDSAT(N);
1493  case ISD::SSUBSAT:
1494  case ISD::USUBSAT: return visitSUBSAT(N);
1495  case ISD::ADDC: return visitADDC(N);
1496  case ISD::UADDO: return visitUADDO(N);
1497  case ISD::SUBC: return visitSUBC(N);
1498  case ISD::USUBO: return visitUSUBO(N);
1499  case ISD::ADDE: return visitADDE(N);
1500  case ISD::ADDCARRY: return visitADDCARRY(N);
1501  case ISD::SUBE: return visitSUBE(N);
1502  case ISD::SUBCARRY: return visitSUBCARRY(N);
1503  case ISD::MUL: return visitMUL(N);
1504  case ISD::SDIV: return visitSDIV(N);
1505  case ISD::UDIV: return visitUDIV(N);
1506  case ISD::SREM:
1507  case ISD::UREM: return visitREM(N);
1508  case ISD::MULHU: return visitMULHU(N);
1509  case ISD::MULHS: return visitMULHS(N);
1510  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1511  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1512  case ISD::SMULO: return visitSMULO(N);
1513  case ISD::UMULO: return visitUMULO(N);
1514  case ISD::SMIN:
1515  case ISD::SMAX:
1516  case ISD::UMIN:
1517  case ISD::UMAX: return visitIMINMAX(N);
1518  case ISD::AND: return visitAND(N);
1519  case ISD::OR: return visitOR(N);
1520  case ISD::XOR: return visitXOR(N);
1521  case ISD::SHL: return visitSHL(N);
1522  case ISD::SRA: return visitSRA(N);
1523  case ISD::SRL: return visitSRL(N);
1524  case ISD::ROTR:
1525  case ISD::ROTL: return visitRotate(N);
1526  case ISD::FSHL:
1527  case ISD::FSHR: return visitFunnelShift(N);
1528  case ISD::ABS: return visitABS(N);
1529  case ISD::BSWAP: return visitBSWAP(N);
1530  case ISD::BITREVERSE: return visitBITREVERSE(N);
1531  case ISD::CTLZ: return visitCTLZ(N);
1532  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1533  case ISD::CTTZ: return visitCTTZ(N);
1534  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1535  case ISD::CTPOP: return visitCTPOP(N);
1536  case ISD::SELECT: return visitSELECT(N);
1537  case ISD::VSELECT: return visitVSELECT(N);
1538  case ISD::SELECT_CC: return visitSELECT_CC(N);
1539  case ISD::SETCC: return visitSETCC(N);
1540  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1541  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1542  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1543  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1544  case ISD::AssertSext:
1545  case ISD::AssertZext: return visitAssertExt(N);
1546  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1547  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1548  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1549  case ISD::TRUNCATE: return visitTRUNCATE(N);
1550  case ISD::BITCAST: return visitBITCAST(N);
1551  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1552  case ISD::FADD: return visitFADD(N);
1553  case ISD::FSUB: return visitFSUB(N);
1554  case ISD::FMUL: return visitFMUL(N);
1555  case ISD::FMA: return visitFMA(N);
1556  case ISD::FDIV: return visitFDIV(N);
1557  case ISD::FREM: return visitFREM(N);
1558  case ISD::FSQRT: return visitFSQRT(N);
1559  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1560  case ISD::FPOW: return visitFPOW(N);
1561  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1562  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1563  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1564  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1565  case ISD::FP_ROUND: return visitFP_ROUND(N);
1566  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1567  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1568  case ISD::FNEG: return visitFNEG(N);
1569  case ISD::FABS: return visitFABS(N);
1570  case ISD::FFLOOR: return visitFFLOOR(N);
1571  case ISD::FMINNUM: return visitFMINNUM(N);
1572  case ISD::FMAXNUM: return visitFMAXNUM(N);
1573  case ISD::FMINIMUM: return visitFMINIMUM(N);
1574  case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1575  case ISD::FCEIL: return visitFCEIL(N);
1576  case ISD::FTRUNC: return visitFTRUNC(N);
1577  case ISD::BRCOND: return visitBRCOND(N);
1578  case ISD::BR_CC: return visitBR_CC(N);
1579  case ISD::LOAD: return visitLOAD(N);
1580  case ISD::STORE: return visitSTORE(N);
1581  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1582  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1583  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1584  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1585  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1586  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1587  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1588  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1589  case ISD::MGATHER: return visitMGATHER(N);
1590  case ISD::MLOAD: return visitMLOAD(N);
1591  case ISD::MSCATTER: return visitMSCATTER(N);
1592  case ISD::MSTORE: return visitMSTORE(N);
1593  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1594  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1595  }
1596  return SDValue();
1597 }
1598 
1599 SDValue DAGCombiner::combine(SDNode *N) {
1600  SDValue RV = visit(N);
1601 
1602  // If nothing happened, try a target-specific DAG combine.
1603  if (!RV.getNode()) {
1605  "Node was deleted but visit returned NULL!");
1606 
1607  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1609 
1610  // Expose the DAG combiner to the target combiner impls.
1612  DagCombineInfo(DAG, Level, false, this);
1613 
1614  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1615  }
1616  }
1617 
1618  // If nothing happened still, try promoting the operation.
1619  if (!RV.getNode()) {
1620  switch (N->getOpcode()) {
1621  default: break;
1622  case ISD::ADD:
1623  case ISD::SUB:
1624  case ISD::MUL:
1625  case ISD::AND:
1626  case ISD::OR:
1627  case ISD::XOR:
1628  RV = PromoteIntBinOp(SDValue(N, 0));
1629  break;
1630  case ISD::SHL:
1631  case ISD::SRA:
1632  case ISD::SRL:
1633  RV = PromoteIntShiftOp(SDValue(N, 0));
1634  break;
1635  case ISD::SIGN_EXTEND:
1636  case ISD::ZERO_EXTEND:
1637  case ISD::ANY_EXTEND:
1638  RV = PromoteExtend(SDValue(N, 0));
1639  break;
1640  case ISD::LOAD:
1641  if (PromoteLoad(SDValue(N, 0)))
1642  RV = SDValue(N, 0);
1643  break;
1644  }
1645  }
1646 
1647  // If N is a commutative binary node, try eliminate it if the commuted
1648  // version is already present in the DAG.
1649  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1650  N->getNumValues() == 1) {
1651  SDValue N0 = N->getOperand(0);
1652  SDValue N1 = N->getOperand(1);
1653 
1654  // Constant operands are canonicalized to RHS.
1655  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1656  SDValue Ops[] = {N1, N0};
1657  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1658  N->getFlags());
1659  if (CSENode)
1660  return SDValue(CSENode, 0);
1661  }
1662  }
1663 
1664  return RV;
1665 }
1666 
1667 /// Given a node, return its input chain if it has one, otherwise return a null
1668 /// sd operand.
1670  if (unsigned NumOps = N->getNumOperands()) {
1671  if (N->getOperand(0).getValueType() == MVT::Other)
1672  return N->getOperand(0);
1673  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1674  return N->getOperand(NumOps-1);
1675  for (unsigned i = 1; i < NumOps-1; ++i)
1676  if (N->getOperand(i).getValueType() == MVT::Other)
1677  return N->getOperand(i);
1678  }
1679  return SDValue();
1680 }
1681 
1682 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1683  // If N has two operands, where one has an input chain equal to the other,
1684  // the 'other' chain is redundant.
1685  if (N->getNumOperands() == 2) {
1686  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1687  return N->getOperand(0);
1688  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1689  return N->getOperand(1);
1690  }
1691 
1692  // Don't simplify token factors if optnone.
1693  if (OptLevel == CodeGenOpt::None)
1694  return SDValue();
1695 
1696  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1697  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1698  SmallPtrSet<SDNode*, 16> SeenOps;
1699  bool Changed = false; // If we should replace this token factor.
1700 
1701  // Start out with this token factor.
1702  TFs.push_back(N);
1703 
1704  // Iterate through token factors. The TFs grows when new token factors are
1705  // encountered.
1706  for (unsigned i = 0; i < TFs.size(); ++i) {
1707  SDNode *TF = TFs[i];
1708 
1709  // Check each of the operands.
1710  for (const SDValue &Op : TF->op_values()) {
1711  switch (Op.getOpcode()) {
1712  case ISD::EntryToken:
1713  // Entry tokens don't need to be added to the list. They are
1714  // redundant.
1715  Changed = true;
1716  break;
1717 
1718  case ISD::TokenFactor:
1719  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1720  // Queue up for processing.
1721  TFs.push_back(Op.getNode());
1722  // Clean up in case the token factor is removed.
1723  AddToWorklist(Op.getNode());
1724  Changed = true;
1725  break;
1726  }
1728 
1729  default:
1730  // Only add if it isn't already in the list.
1731  if (SeenOps.insert(Op.getNode()).second)
1732  Ops.push_back(Op);
1733  else
1734  Changed = true;
1735  break;
1736  }
1737  }
1738  }
1739 
1740  // Remove Nodes that are chained to another node in the list. Do so
1741  // by walking up chains breath-first stopping when we've seen
1742  // another operand. In general we must climb to the EntryNode, but we can exit
1743  // early if we find all remaining work is associated with just one operand as
1744  // no further pruning is possible.
1745 
1746  // List of nodes to search through and original Ops from which they originate.
1748  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1749  SmallPtrSet<SDNode *, 16> SeenChains;
1750  bool DidPruneOps = false;
1751 
1752  unsigned NumLeftToConsider = 0;
1753  for (const SDValue &Op : Ops) {
1754  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1755  OpWorkCount.push_back(1);
1756  }
1757 
1758  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1759  // If this is an Op, we can remove the op from the list. Remark any
1760  // search associated with it as from the current OpNumber.
1761  if (SeenOps.count(Op) != 0) {
1762  Changed = true;
1763  DidPruneOps = true;
1764  unsigned OrigOpNumber = 0;
1765  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1766  OrigOpNumber++;
1767  assert((OrigOpNumber != Ops.size()) &&
1768  "expected to find TokenFactor Operand");
1769  // Re-mark worklist from OrigOpNumber to OpNumber
1770  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1771  if (Worklist[i].second == OrigOpNumber) {
1772  Worklist[i].second = OpNumber;
1773  }
1774  }
1775  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1776  OpWorkCount[OrigOpNumber] = 0;
1777  NumLeftToConsider--;
1778  }
1779  // Add if it's a new chain
1780  if (SeenChains.insert(Op).second) {
1781  OpWorkCount[OpNumber]++;
1782  Worklist.push_back(std::make_pair(Op, OpNumber));
1783  }
1784  };
1785 
1786  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1787  // We need at least be consider at least 2 Ops to prune.
1788  if (NumLeftToConsider <= 1)
1789  break;
1790  auto CurNode = Worklist[i].first;
1791  auto CurOpNumber = Worklist[i].second;
1792  assert((OpWorkCount[CurOpNumber] > 0) &&
1793  "Node should not appear in worklist");
1794  switch (CurNode->getOpcode()) {
1795  case ISD::EntryToken:
1796  // Hitting EntryToken is the only way for the search to terminate without
1797  // hitting
1798  // another operand's search. Prevent us from marking this operand
1799  // considered.
1800  NumLeftToConsider++;
1801  break;
1802  case ISD::TokenFactor:
1803  for (const SDValue &Op : CurNode->op_values())
1804  AddToWorklist(i, Op.getNode(), CurOpNumber);
1805  break;
1806  case ISD::CopyFromReg:
1807  case ISD::CopyToReg:
1808  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1809  break;
1810  default:
1811  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1812  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1813  break;
1814  }
1815  OpWorkCount[CurOpNumber]--;
1816  if (OpWorkCount[CurOpNumber] == 0)
1817  NumLeftToConsider--;
1818  }
1819 
1820  // If we've changed things around then replace token factor.
1821  if (Changed) {
1822  SDValue Result;
1823  if (Ops.empty()) {
1824  // The entry token is the only possible outcome.
1825  Result = DAG.getEntryNode();
1826  } else {
1827  if (DidPruneOps) {
1828  SmallVector<SDValue, 8> PrunedOps;
1829  //
1830  for (const SDValue &Op : Ops) {
1831  if (SeenChains.count(Op.getNode()) == 0)
1832  PrunedOps.push_back(Op);
1833  }
1834  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1835  } else {
1836  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1837  }
1838  }
1839  return Result;
1840  }
1841  return SDValue();
1842 }
1843 
1844 /// MERGE_VALUES can always be eliminated.
1845 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1846  WorklistRemover DeadNodes(*this);
1847  // Replacing results may cause a different MERGE_VALUES to suddenly
1848  // be CSE'd with N, and carry its uses with it. Iterate until no
1849  // uses remain, to ensure that the node can be safely deleted.
1850  // First add the users of this node to the work list so that they
1851  // can be tried again once they have new operands.
1852  AddUsersToWorklist(N);
1853  do {
1854  // Do as a single replacement to avoid rewalking use lists.
1856  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1857  Ops.push_back(N->getOperand(i));
1858  DAG.ReplaceAllUsesWith(N, Ops.data());
1859  } while (!N->use_empty());
1860  deleteAndRecombine(N);
1861  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1862 }
1863 
1864 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1865 /// ConstantSDNode pointer else nullptr.
1868  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1869 }
1870 
1871 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1872  assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
1873 
1874  // Don't do this unless the old select is going away. We want to eliminate the
1875  // binary operator, not replace a binop with a select.
1876  // TODO: Handle ISD::SELECT_CC.
1877  unsigned SelOpNo = 0;
1878  SDValue Sel = BO->getOperand(0);
1879  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1880  SelOpNo = 1;
1881  Sel = BO->getOperand(1);
1882  }
1883 
1884  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1885  return SDValue();
1886 
1887  SDValue CT = Sel.getOperand(1);
1888  if (!isConstantOrConstantVector(CT, true) &&
1890  return SDValue();
1891 
1892  SDValue CF = Sel.getOperand(2);
1893  if (!isConstantOrConstantVector(CF, true) &&
1895  return SDValue();
1896 
1897  // Bail out if any constants are opaque because we can't constant fold those.
1898  // The exception is "and" and "or" with either 0 or -1 in which case we can
1899  // propagate non constant operands into select. I.e.:
1900  // and (select Cond, 0, -1), X --> select Cond, 0, X
1901  // or X, (select Cond, -1, 0) --> select Cond, -1, X
1902  auto BinOpcode = BO->getOpcode();
1903  bool CanFoldNonConst =
1904  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1907 
1908  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1909  if (!CanFoldNonConst &&
1910  !isConstantOrConstantVector(CBO, true) &&
1912  return SDValue();
1913 
1914  EVT VT = Sel.getValueType();
1915 
1916  // In case of shift value and shift amount may have different VT. For instance
1917  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1918  // swapped operands and value types do not match. NB: x86 is fine if operands
1919  // are not swapped with shift amount VT being not bigger than shifted value.
1920  // TODO: that is possible to check for a shift operation, correct VTs and
1921  // still perform optimization on x86 if needed.
1922  if (SelOpNo && VT != CBO.getValueType())
1923  return SDValue();
1924 
1925  // We have a select-of-constants followed by a binary operator with a
1926  // constant. Eliminate the binop by pulling the constant math into the select.
1927  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1928  SDLoc DL(Sel);
1929  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1930  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1931  if (!CanFoldNonConst && !NewCT.isUndef() &&
1932  !isConstantOrConstantVector(NewCT, true) &&
1934  return SDValue();
1935 
1936  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1937  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1938  if (!CanFoldNonConst && !NewCF.isUndef() &&
1939  !isConstantOrConstantVector(NewCF, true) &&
1941  return SDValue();
1942 
1943  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1944 }
1945 
1947  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1948  "Expecting add or sub");
1949 
1950  // Match a constant operand and a zext operand for the math instruction:
1951  // add Z, C
1952  // sub C, Z
1953  bool IsAdd = N->getOpcode() == ISD::ADD;
1954  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1955  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1956  auto *CN = dyn_cast<ConstantSDNode>(C);
1957  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
1958  return SDValue();
1959 
1960  // Match the zext operand as a setcc of a boolean.
1961  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
1962  Z.getOperand(0).getValueType() != MVT::i1)
1963  return SDValue();
1964 
1965  // Match the compare as: setcc (X & 1), 0, eq.
1966  SDValue SetCC = Z.getOperand(0);
1967  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
1968  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
1969  SetCC.getOperand(0).getOpcode() != ISD::AND ||
1970  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
1971  return SDValue();
1972 
1973  // We are adding/subtracting a constant and an inverted low bit. Turn that
1974  // into a subtract/add of the low bit with incremented/decremented constant:
1975  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
1976  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
1977  EVT VT = C.getValueType();
1978  SDLoc DL(N);
1979  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
1980  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
1981  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
1982  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
1983 }
1984 
1985 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
1986 /// a shift and add with a different constant.
1988  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1989  "Expecting add or sub");
1990 
1991  // We need a constant operand for the add/sub, and the other operand is a
1992  // logical shift right: add (srl), C or sub C, (srl).
1993  bool IsAdd = N->getOpcode() == ISD::ADD;
1994  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
1995  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
1996  ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
1997  if (!C || ShiftOp.getOpcode() != ISD::SRL)
1998  return SDValue();
1999 
2000  // The shift must be of a 'not' value.
2001  SDValue Not = ShiftOp.getOperand(0);
2002  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2003  return SDValue();
2004 
2005  // The shift must be moving the sign bit to the least-significant-bit.
2006  EVT VT = ShiftOp.getValueType();
2007  SDValue ShAmt = ShiftOp.getOperand(1);
2008  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2009  if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2010  return SDValue();
2011 
2012  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2013  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2014  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2015  SDLoc DL(N);
2016  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2017  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2018  APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2019  return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2020 }
2021 
2022 SDValue DAGCombiner::visitADD(SDNode *N) {
2023  SDValue N0 = N->getOperand(0);
2024  SDValue N1 = N->getOperand(1);
2025  EVT VT = N0.getValueType();
2026  SDLoc DL(N);
2027 
2028  // fold vector ops
2029  if (VT.isVector()) {
2030  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2031  return FoldedVOp;
2032 
2033  // fold (add x, 0) -> x, vector edition
2035  return N0;
2037  return N1;
2038  }
2039 
2040  // fold (add x, undef) -> undef
2041  if (N0.isUndef())
2042  return N0;
2043 
2044  if (N1.isUndef())
2045  return N1;
2046 
2048  // canonicalize constant to RHS
2050  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2051  // fold (add c1, c2) -> c1+c2
2052  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2053  N1.getNode());
2054  }
2055 
2056  // fold (add x, 0) -> x
2057  if (isNullConstant(N1))
2058  return N0;
2059 
2060  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2061  // fold ((c1-A)+c2) -> (c1+c2)-A
2062  if (N0.getOpcode() == ISD::SUB &&
2063  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2064  // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2065  return DAG.getNode(ISD::SUB, DL, VT,
2066  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2067  N0.getOperand(1));
2068  }
2069 
2070  // add (sext i1 X), 1 -> zext (not i1 X)
2071  // We don't transform this pattern:
2072  // add (zext i1 X), -1 -> sext (not i1 X)
2073  // because most (?) targets generate better code for the zext form.
2074  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2075  isOneOrOneSplat(N1)) {
2076  SDValue X = N0.getOperand(0);
2077  if ((!LegalOperations ||
2078  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2079  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2080  X.getScalarValueSizeInBits() == 1) {
2081  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2082  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2083  }
2084  }
2085 
2086  // Undo the add -> or combine to merge constant offsets from a frame index.
2087  if (N0.getOpcode() == ISD::OR &&
2088  isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2089  isa<ConstantSDNode>(N0.getOperand(1)) &&
2090  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2091  SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2092  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2093  }
2094  }
2095 
2096  if (SDValue NewSel = foldBinOpIntoSelect(N))
2097  return NewSel;
2098 
2099  // reassociate add
2100  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2101  return RADD;
2102 
2103  // fold ((0-A) + B) -> B-A
2104  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2105  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2106 
2107  // fold (A + (0-B)) -> A-B
2108  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2109  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2110 
2111  // fold (A+(B-A)) -> B
2112  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2113  return N1.getOperand(0);
2114 
2115  // fold ((B-A)+A) -> B
2116  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2117  return N0.getOperand(0);
2118 
2119  // fold (A+(B-(A+C))) to (B-C)
2120  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2121  N0 == N1.getOperand(1).getOperand(0))
2122  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2123  N1.getOperand(1).getOperand(1));
2124 
2125  // fold (A+(B-(C+A))) to (B-C)
2126  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2127  N0 == N1.getOperand(1).getOperand(1))
2128  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2129  N1.getOperand(1).getOperand(0));
2130 
2131  // fold (A+((B-A)+or-C)) to (B+or-C)
2132  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2133  N1.getOperand(0).getOpcode() == ISD::SUB &&
2134  N0 == N1.getOperand(0).getOperand(1))
2135  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2136  N1.getOperand(1));
2137 
2138  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2139  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2140  SDValue N00 = N0.getOperand(0);
2141  SDValue N01 = N0.getOperand(1);
2142  SDValue N10 = N1.getOperand(0);
2143  SDValue N11 = N1.getOperand(1);
2144 
2146  return DAG.getNode(ISD::SUB, DL, VT,
2147  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2148  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2149  }
2150 
2151  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2152  return V;
2153 
2154  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2155  return V;
2156 
2157  if (SimplifyDemandedBits(SDValue(N, 0)))
2158  return SDValue(N, 0);
2159 
2160  // fold (a+b) -> (a|b) iff a and b share no bits.
2161  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2162  DAG.haveNoCommonBitsSet(N0, N1))
2163  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2164 
2165  // fold (add (xor a, -1), 1) -> (sub 0, a)
2166  if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
2167  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2168  N0.getOperand(0));
2169 
2170  if (SDValue Combined = visitADDLike(N0, N1, N))
2171  return Combined;
2172 
2173  if (SDValue Combined = visitADDLike(N1, N0, N))
2174  return Combined;
2175 
2176  return SDValue();
2177 }
2178 
2179 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2180  unsigned Opcode = N->getOpcode();
2181  SDValue N0 = N->getOperand(0);
2182  SDValue N1 = N->getOperand(1);
2183  EVT VT = N0.getValueType();
2184  SDLoc DL(N);
2185 
2186  // fold vector ops
2187  if (VT.isVector()) {
2188  // TODO SimplifyVBinOp
2189 
2190  // fold (add_sat x, 0) -> x, vector edition
2192  return N0;
2194  return N1;
2195  }
2196 
2197  // fold (add_sat x, undef) -> -1
2198  if (N0.isUndef() || N1.isUndef())
2199  return DAG.getAllOnesConstant(DL, VT);
2200 
2202  // canonicalize constant to RHS
2204  return DAG.getNode(Opcode, DL, VT, N1, N0);
2205  // fold (add_sat c1, c2) -> c3
2206  return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2207  N1.getNode());
2208  }
2209 
2210  // fold (add_sat x, 0) -> x
2211  if (isNullConstant(N1))
2212  return N0;
2213 
2214  // If it cannot overflow, transform into an add.
2215  if (Opcode == ISD::UADDSAT)
2216  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2217  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2218 
2219  return SDValue();
2220 }
2221 
2222 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2223  bool Masked = false;
2224 
2225  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2226  while (true) {
2227  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2228  V = V.getOperand(0);
2229  continue;
2230  }
2231 
2232  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2233  Masked = true;
2234  V = V.getOperand(0);
2235  continue;
2236  }
2237 
2238  break;
2239  }
2240 
2241  // If this is not a carry, return.
2242  if (V.getResNo() != 1)
2243  return SDValue();
2244 
2245  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2246  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2247  return SDValue();
2248 
2249  // If the result is masked, then no matter what kind of bool it is we can
2250  // return. If it isn't, then we need to make sure the bool type is either 0 or
2251  // 1 and not other values.
2252  if (Masked ||
2253  TLI.getBooleanContents(V.getValueType()) ==
2255  return V;
2256 
2257  return SDValue();
2258 }
2259 
2260 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2261  EVT VT = N0.getValueType();
2262  SDLoc DL(LocReference);
2263 
2264  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2265  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2267  return DAG.getNode(ISD::SUB, DL, VT, N0,
2268  DAG.getNode(ISD::SHL, DL, VT,
2269  N1.getOperand(0).getOperand(1),
2270  N1.getOperand(1)));
2271 
2272  if (N1.getOpcode() == ISD::AND) {
2273  SDValue AndOp0 = N1.getOperand(0);
2274  unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2275  unsigned DestBits = VT.getScalarSizeInBits();
2276 
2277  // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2278  // and similar xforms where the inner op is either ~0 or 0.
2279  if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1)))
2280  return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2281  }
2282 
2283  // add (sext i1), X -> sub X, (zext i1)
2284  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2285  N0.getOperand(0).getValueType() == MVT::i1 &&
2287  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2288  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2289  }
2290 
2291  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2292  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2293  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2294  if (TN->getVT() == MVT::i1) {
2295  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2296  DAG.getConstant(1, DL, VT));
2297  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2298  }
2299  }
2300 
2301  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2302  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2303  N1.getResNo() == 0)
2304  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2305  N0, N1.getOperand(0), N1.getOperand(2));
2306 
2307  // (add X, Carry) -> (addcarry X, 0, Carry)
2309  if (SDValue Carry = getAsCarry(TLI, N1))
2310  return DAG.getNode(ISD::ADDCARRY, DL,
2311  DAG.getVTList(VT, Carry.getValueType()), N0,
2312  DAG.getConstant(0, DL, VT), Carry);
2313 
2314  return SDValue();
2315 }
2316 
2317 SDValue DAGCombiner::visitADDC(SDNode *N) {
2318  SDValue N0 = N->getOperand(0);
2319  SDValue N1 = N->getOperand(1);
2320  EVT VT = N0.getValueType();
2321  SDLoc DL(N);
2322 
2323  // If the flag result is dead, turn this into an ADD.
2324  if (!N->hasAnyUseOfValue(1))
2325  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2326  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2327 
2328  // canonicalize constant to RHS.
2331  if (N0C && !N1C)
2332  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2333 
2334  // fold (addc x, 0) -> x + no carry out
2335  if (isNullConstant(N1))
2336  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2337  DL, MVT::Glue));
2338 
2339  // If it cannot overflow, transform into an add.
2340  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2341  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2342  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2343 
2344  return SDValue();
2345 }
2346 
2347 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
2348  SelectionDAG &DAG, const TargetLowering &TLI) {
2349  SDValue Cst;
2350  switch (TLI.getBooleanContents(VT)) {
2353  Cst = DAG.getConstant(1, DL, VT);
2354  break;
2356  Cst = DAG.getConstant(-1, DL, VT);
2357  break;
2358  }
2359 
2360  return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2361 }
2362 
2363 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
2364  if (V.getOpcode() != ISD::XOR) return false;
2366  if (!Const) return false;
2367 
2368  switch(TLI.getBooleanContents(VT)) {
2370  return Const->isOne();
2372  return Const->isAllOnesValue();
2374  return (Const->getAPIntValue() & 0x01) == 1;
2375  }
2376  llvm_unreachable("Unsupported boolean content");
2377 }
2378 
2379 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2380  SDValue N0 = N->getOperand(0);
2381  SDValue N1 = N->getOperand(1);
2382  EVT VT = N0.getValueType();
2383  if (VT.isVector())
2384  return SDValue();
2385 
2386  EVT CarryVT = N->getValueType(1);
2387  SDLoc DL(N);
2388 
2389  // If the flag result is dead, turn this into an ADD.
2390  if (!N->hasAnyUseOfValue(1))
2391  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2392  DAG.getUNDEF(CarryVT));
2393 
2394  // canonicalize constant to RHS.
2397  if (N0C && !N1C)
2398  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2399 
2400  // fold (uaddo x, 0) -> x + no carry out
2401  if (isNullConstant(N1))
2402  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2403 
2404  // If it cannot overflow, transform into an add.
2405  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2406  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2407  DAG.getConstant(0, DL, CarryVT));
2408 
2409  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2410  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2411  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2412  DAG.getConstant(0, DL, VT),
2413  N0.getOperand(0));
2414  return CombineTo(N, Sub,
2415  flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2416  }
2417 
2418  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2419  return Combined;
2420 
2421  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2422  return Combined;
2423 
2424  return SDValue();
2425 }
2426 
2427 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2428  auto VT = N0.getValueType();
2429 
2430  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2431  // If Y + 1 cannot overflow.
2432  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2433  SDValue Y = N1.getOperand(0);
2434  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2435  if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2436  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2437  N1.getOperand(2));
2438  }
2439 
2440  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2442  if (SDValue Carry = getAsCarry(TLI, N1))
2443  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2444  DAG.getConstant(0, SDLoc(N), VT), Carry);
2445 
2446  return SDValue();
2447 }
2448 
2449 SDValue DAGCombiner::visitADDE(SDNode *N) {
2450  SDValue N0 = N->getOperand(0);
2451  SDValue N1 = N->getOperand(1);
2452  SDValue CarryIn = N->getOperand(2);
2453 
2454  // canonicalize constant to RHS
2457  if (N0C && !N1C)
2458  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2459  N1, N0, CarryIn);
2460 
2461  // fold (adde x, y, false) -> (addc x, y)
2462  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2463  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2464 
2465  return SDValue();
2466 }
2467 
2468 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2469  SDValue N0 = N->getOperand(0);
2470  SDValue N1 = N->getOperand(1);
2471  SDValue CarryIn = N->getOperand(2);
2472  SDLoc DL(N);
2473 
2474  // canonicalize constant to RHS
2477  if (N0C && !N1C)
2478  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2479 
2480  // fold (addcarry x, y, false) -> (uaddo x, y)
2481  if (isNullConstant(CarryIn)) {
2482  if (!LegalOperations ||
2484  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2485  }
2486 
2487  EVT CarryVT = CarryIn.getValueType();
2488 
2489  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2490  if (isNullConstant(N0) && isNullConstant(N1)) {
2491  EVT VT = N0.getValueType();
2492  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2493  AddToWorklist(CarryExt.getNode());
2494  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2495  DAG.getConstant(1, DL, VT)),
2496  DAG.getConstant(0, DL, CarryVT));
2497  }
2498 
2499  // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2500  if (isBitwiseNot(N0) && isNullConstant(N1) &&
2501  isBooleanFlip(CarryIn, CarryVT, TLI)) {
2502  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2503  DAG.getConstant(0, DL, N0.getValueType()),
2504  N0.getOperand(0), CarryIn.getOperand(0));
2505  return CombineTo(N, Sub,
2506  flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2507  }
2508 
2509  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2510  return Combined;
2511 
2512  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2513  return Combined;
2514 
2515  return SDValue();
2516 }
2517 
2518 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2519  SDNode *N) {
2520  // Iff the flag result is dead:
2521  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2522  if ((N0.getOpcode() == ISD::ADD ||
2523  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2524  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2525  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2526  N0.getOperand(0), N0.getOperand(1), CarryIn);
2527 
2528  /**
2529  * When one of the addcarry argument is itself a carry, we may be facing
2530  * a diamond carry propagation. In which case we try to transform the DAG
2531  * to ensure linear carry propagation if that is possible.
2532  *
2533  * We are trying to get:
2534  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2535  */
2536  if (auto Y = getAsCarry(TLI, N1)) {
2537  /**
2538  * (uaddo A, B)
2539  * / \
2540  * Carry Sum
2541  * | \
2542  * | (addcarry *, 0, Z)
2543  * | /
2544  * \ Carry
2545  * | /
2546  * (addcarry X, *, *)
2547  */
2548  if (Y.getOpcode() == ISD::UADDO &&
2549  CarryIn.getResNo() == 1 &&
2550  CarryIn.getOpcode() == ISD::ADDCARRY &&
2551  isNullConstant(CarryIn.getOperand(1)) &&
2552  CarryIn.getOperand(0) == Y.getValue(0)) {
2553  auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2554  Y.getOperand(0), Y.getOperand(1),
2555  CarryIn.getOperand(2));
2556  AddToWorklist(NewY.getNode());
2557  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2558  DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2559  NewY.getValue(1));
2560  }
2561  }
2562 
2563  return SDValue();
2564 }
2565 
2566 // Since it may not be valid to emit a fold to zero for vector initializers
2567 // check if we can before folding.
2568 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2569  SelectionDAG &DAG, bool LegalOperations) {
2570  if (!VT.isVector())
2571  return DAG.getConstant(0, DL, VT);
2572  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2573  return DAG.getConstant(0, DL, VT);
2574  return SDValue();
2575 }
2576 
2577 SDValue DAGCombiner::visitSUB(SDNode *N) {
2578  SDValue N0 = N->getOperand(0);
2579  SDValue N1 = N->getOperand(1);
2580  EVT VT = N0.getValueType();
2581  SDLoc DL(N);
2582 
2583  // fold vector ops
2584  if (VT.isVector()) {
2585  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2586  return FoldedVOp;
2587 
2588  // fold (sub x, 0) -> x, vector edition
2590  return N0;
2591  }
2592 
2593  // fold (sub x, x) -> 0
2594  // FIXME: Refactor this and xor and other similar operations together.
2595  if (N0 == N1)
2596  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2599  // fold (sub c1, c2) -> c1-c2
2600  return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2601  N1.getNode());
2602  }
2603 
2604  if (SDValue NewSel = foldBinOpIntoSelect(N))
2605  return NewSel;
2606 
2608 
2609  // fold (sub x, c) -> (add x, -c)
2610  if (N1C) {
2611  return DAG.getNode(ISD::ADD, DL, VT, N0,
2612  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2613  }
2614 
2615  if (isNullOrNullSplat(N0)) {
2616  unsigned BitWidth = VT.getScalarSizeInBits();
2617  // Right-shifting everything out but the sign bit followed by negation is
2618  // the same as flipping arithmetic/logical shift type without the negation:
2619  // -(X >>u 31) -> (X >>s 31)
2620  // -(X >>s 31) -> (X >>u 31)
2621  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2622  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2623  if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2624  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2625  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2626  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2627  }
2628  }
2629 
2630  // 0 - X --> 0 if the sub is NUW.
2631  if (N->getFlags().hasNoUnsignedWrap())
2632  return N0;
2633 
2634  if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2635  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2636  // N1 must be 0 because negating the minimum signed value is undefined.
2637  if (N->getFlags().hasNoSignedWrap())
2638  return N0;
2639 
2640  // 0 - X --> X if X is 0 or the minimum signed value.
2641  return N1;
2642  }
2643  }
2644 
2645  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2646  if (isAllOnesOrAllOnesSplat(N0))
2647  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2648 
2649  // fold (A - (0-B)) -> A+B
2650  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2651  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2652 
2653  // fold A-(A-B) -> B
2654  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2655  return N1.getOperand(1);
2656 
2657  // fold (A+B)-A -> B
2658  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2659  return N0.getOperand(1);
2660 
2661  // fold (A+B)-B -> A
2662  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2663  return N0.getOperand(0);
2664 
2665  // fold C2-(A+C1) -> (C2-C1)-A
2666  if (N1.getOpcode() == ISD::ADD) {
2667  SDValue N11 = N1.getOperand(1);
2668  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2669  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2670  SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2671  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2672  }
2673  }
2674 
2675  // fold ((A+(B+or-C))-B) -> A+or-C
2676  if (N0.getOpcode() == ISD::ADD &&
2677  (N0.getOperand(1).getOpcode() == ISD::SUB ||
2678  N0.getOperand(1).getOpcode() == ISD::ADD) &&
2679  N0.getOperand(1).getOperand(0) == N1)
2680  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2681  N0.getOperand(1).getOperand(1));
2682 
2683  // fold ((A+(C+B))-B) -> A+C
2684  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2685  N0.getOperand(1).getOperand(1) == N1)
2686  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2687  N0.getOperand(1).getOperand(0));
2688 
2689  // fold ((A-(B-C))-C) -> A-B
2690  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2691  N0.getOperand(1).getOperand(1) == N1)
2692  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2693  N0.getOperand(1).getOperand(0));
2694 
2695  // fold (A-(B-C)) -> A+(C-B)
2696  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2697  return DAG.getNode(ISD::ADD, DL, VT, N0,
2698  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2699  N1.getOperand(0)));
2700 
2701  // fold (X - (-Y * Z)) -> (X + (Y * Z))
2702  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2703  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2705  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2706  N1.getOperand(0).getOperand(1),
2707  N1.getOperand(1));
2708  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2709  }
2710  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2712  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2713  N1.getOperand(0),
2714  N1.getOperand(1).getOperand(1));
2715  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2716  }
2717  }
2718 
2719  // If either operand of a sub is undef, the result is undef
2720  if (N0.isUndef())
2721  return N0;
2722  if (N1.isUndef())
2723  return N1;
2724 
2725  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2726  return V;
2727 
2728  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2729  return V;
2730 
2731  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2732  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2733  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2734  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2735  SDValue S0 = N1.getOperand(0);
2736  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2737  unsigned OpSizeInBits = VT.getScalarSizeInBits();
2739  if (C->getAPIntValue() == (OpSizeInBits - 1))
2740  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2741  }
2742  }
2743  }
2744 
2745  // If the relocation model supports it, consider symbol offsets.
2746  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2747  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2748  // fold (sub Sym, c) -> Sym-c
2749  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2750  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2751  GA->getOffset() -
2752  (uint64_t)N1C->getSExtValue());
2753  // fold (sub Sym+c1, Sym+c2) -> c1-c2
2754  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2755  if (GA->getGlobal() == GB->getGlobal())
2756  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2757  DL, VT);
2758  }
2759 
2760  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2761  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2762  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2763  if (TN->getVT() == MVT::i1) {
2764  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2765  DAG.getConstant(1, DL, VT));
2766  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2767  }
2768  }
2769 
2770  // Prefer an add for more folding potential and possibly better codegen:
2771  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2772  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2773  SDValue ShAmt = N1.getOperand(1);
2774  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2775  if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2776  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2777  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2778  }
2779  }
2780 
2781  return SDValue();
2782 }
2783 
2784 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
2785  SDValue N0 = N->getOperand(0);
2786  SDValue N1 = N->getOperand(1);
2787  EVT VT = N0.getValueType();
2788  SDLoc DL(N);
2789 
2790  // fold vector ops
2791  if (VT.isVector()) {
2792  // TODO SimplifyVBinOp
2793 
2794  // fold (sub_sat x, 0) -> x, vector edition
2796  return N0;
2797  }
2798 
2799  // fold (sub_sat x, undef) -> 0
2800  if (N0.isUndef() || N1.isUndef())
2801  return DAG.getConstant(0, DL, VT);
2802 
2803  // fold (sub_sat x, x) -> 0
2804  if (N0 == N1)
2805  return DAG.getConstant(0, DL, VT);
2806 
2809  // fold (sub_sat c1, c2) -> c3
2810  return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
2811  N1.getNode());
2812  }
2813 
2814  // fold (sub_sat x, 0) -> x
2815  if (isNullConstant(N1))
2816  return N0;
2817 
2818  return SDValue();
2819 }
2820 
2821 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2822  SDValue N0 = N->getOperand(0);
2823  SDValue N1 = N->getOperand(1);
2824  EVT VT = N0.getValueType();
2825  SDLoc DL(N);
2826 
2827  // If the flag result is dead, turn this into an SUB.
2828  if (!N->hasAnyUseOfValue(1))
2829  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2830  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2831 
2832  // fold (subc x, x) -> 0 + no borrow
2833  if (N0 == N1)
2834  return CombineTo(N, DAG.getConstant(0, DL, VT),
2835  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2836 
2837  // fold (subc x, 0) -> x + no borrow
2838  if (isNullConstant(N1))
2839  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2840 
2841  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2842  if (isAllOnesConstant(N0))
2843  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2844  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2845 
2846  return SDValue();
2847 }
2848 
2849 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2850  SDValue N0 = N->getOperand(0);
2851  SDValue N1 = N->getOperand(1);
2852  EVT VT = N0.getValueType();
2853  if (VT.isVector())
2854  return SDValue();
2855 
2856  EVT CarryVT = N->getValueType(1);
2857  SDLoc DL(N);
2858 
2859  // If the flag result is dead, turn this into an SUB.
2860  if (!N->hasAnyUseOfValue(1))
2861  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2862  DAG.getUNDEF(CarryVT));
2863 
2864  // fold (usubo x, x) -> 0 + no borrow
2865  if (N0 == N1)
2866  return CombineTo(N, DAG.getConstant(0, DL, VT),
2867  DAG.getConstant(0, DL, CarryVT));
2868 
2869  // fold (usubo x, 0) -> x + no borrow
2870  if (isNullConstant(N1))
2871  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2872 
2873  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2874  if (isAllOnesConstant(N0))
2875  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2876  DAG.getConstant(0, DL, CarryVT));
2877 
2878  return SDValue();
2879 }
2880 
2881 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2882  SDValue N0 = N->getOperand(0);
2883  SDValue N1 = N->getOperand(1);
2884  SDValue CarryIn = N->getOperand(2);
2885 
2886  // fold (sube x, y, false) -> (subc x, y)
2887  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2888  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2889 
2890  return SDValue();
2891 }
2892 
2893 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2894  SDValue N0 = N->getOperand(0);
2895  SDValue N1 = N->getOperand(1);
2896  SDValue CarryIn = N->getOperand(2);
2897 
2898  // fold (subcarry x, y, false) -> (usubo x, y)
2899  if (isNullConstant(CarryIn)) {
2900  if (!LegalOperations ||
2902  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2903  }
2904 
2905  return SDValue();
2906 }
2907 
2908 SDValue DAGCombiner::visitMUL(SDNode *N) {
2909  SDValue N0 = N->getOperand(0);
2910  SDValue N1 = N->getOperand(1);
2911  EVT VT = N0.getValueType();
2912 
2913  // fold (mul x, undef) -> 0
2914  if (N0.isUndef() || N1.isUndef())
2915  return DAG.getConstant(0, SDLoc(N), VT);
2916 
2917  bool N0IsConst = false;
2918  bool N1IsConst = false;
2919  bool N1IsOpaqueConst = false;
2920  bool N0IsOpaqueConst = false;
2921  APInt ConstValue0, ConstValue1;
2922  // fold vector ops
2923  if (VT.isVector()) {
2924  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2925  return FoldedVOp;
2926 
2927  N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2928  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2929  assert((!N0IsConst ||
2930  ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2931  "Splat APInt should be element width");
2932  assert((!N1IsConst ||
2933  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2934  "Splat APInt should be element width");
2935  } else {
2936  N0IsConst = isa<ConstantSDNode>(N0);
2937  if (N0IsConst) {
2938  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2939  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2940  }
2941  N1IsConst = isa<ConstantSDNode>(N1);
2942  if (N1IsConst) {
2943  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2944  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2945  }
2946  }
2947 
2948  // fold (mul c1, c2) -> c1*c2
2949  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2950  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2951  N0.getNode(), N1.getNode());
2952 
2953  // canonicalize constant to RHS (vector doesn't have to splat)
2956  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2957  // fold (mul x, 0) -> 0
2958  if (N1IsConst && ConstValue1.isNullValue())
2959  return N1;
2960  // fold (mul x, 1) -> x
2961  if (N1IsConst && ConstValue1.isOneValue())
2962  return N0;
2963 
2964  if (SDValue NewSel = foldBinOpIntoSelect(N))
2965  return NewSel;
2966 
2967  // fold (mul x, -1) -> 0-x
2968  if (N1IsConst && ConstValue1.isAllOnesValue()) {
2969  SDLoc DL(N);
2970  return DAG.getNode(ISD::SUB, DL, VT,
2971  DAG.getConstant(0, DL, VT), N0);
2972  }
2973  // fold (mul x, (1 << c)) -> x << c
2974  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2975  DAG.isKnownToBeAPowerOfTwo(N1) &&
2976  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2977  SDLoc DL(N);
2978  SDValue LogBase2 = BuildLogBase2(N1, DL);
2979  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2980  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2981  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2982  }
2983  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2984  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2985  unsigned Log2Val = (-ConstValue1).logBase2();
2986  SDLoc DL(N);
2987  // FIXME: If the input is something that is easily negated (e.g. a
2988  // single-use add), we should put the negate there.
2989  return DAG.getNode(ISD::SUB, DL, VT,
2990  DAG.getConstant(0, DL, VT),
2991  DAG.getNode(ISD::SHL, DL, VT, N0,
2992  DAG.getConstant(Log2Val, DL,
2993  getShiftAmountTy(N0.getValueType()))));
2994  }
2995 
2996  // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
2997  // mul x, (2^N + 1) --> add (shl x, N), x
2998  // mul x, (2^N - 1) --> sub (shl x, N), x
2999  // Examples: x * 33 --> (x << 5) + x
3000  // x * 15 --> (x << 4) - x
3001  // x * -33 --> -((x << 5) + x)
3002  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3003  if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
3004  // TODO: We could handle more general decomposition of any constant by
3005  // having the target set a limit on number of ops and making a
3006  // callback to determine that sequence (similar to sqrt expansion).
3007  unsigned MathOp = ISD::DELETED_NODE;
3008  APInt MulC = ConstValue1.abs();
3009  if ((MulC - 1).isPowerOf2())
3010  MathOp = ISD::ADD;
3011  else if ((MulC + 1).isPowerOf2())
3012  MathOp = ISD::SUB;
3013 
3014  if (MathOp != ISD::DELETED_NODE) {
3015  unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
3016  : (MulC + 1).logBase2();
3017  assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
3018  "Not expecting multiply-by-constant that could have simplified");
3019  SDLoc DL(N);
3020  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
3021  DAG.getConstant(ShAmt, DL, VT));
3022  SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3023  if (ConstValue1.isNegative())
3024  R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3025  return R;
3026  }
3027  }
3028 
3029  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3030  if (N0.getOpcode() == ISD::SHL &&
3031  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3032  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3033  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3035  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3036  }
3037 
3038  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3039  // use.
3040  {
3041  SDValue Sh(nullptr, 0), Y(nullptr, 0);
3042 
3043  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3044  if (N0.getOpcode() == ISD::SHL &&
3046  N0.getNode()->hasOneUse()) {
3047  Sh = N0; Y = N1;
3048  } else if (N1.getOpcode() == ISD::SHL &&
3050  N1.getNode()->hasOneUse()) {
3051  Sh = N1; Y = N0;
3052  }
3053 
3054  if (Sh.getNode()) {
3055  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3056  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3057  }
3058  }
3059 
3060  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3062  N0.getOpcode() == ISD::ADD &&
3064  isMulAddWithConstProfitable(N, N0, N1))
3065  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3066  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3067  N0.getOperand(0), N1),
3068  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3069  N0.getOperand(1), N1));
3070 
3071  // reassociate mul
3072  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3073  return RMUL;
3074 
3075  return SDValue();
3076 }
3077 
3078 /// Return true if divmod libcall is available.
3079 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3080  const TargetLowering &TLI) {
3081  RTLIB::Libcall LC;
3082  EVT NodeType = Node->getValueType(0);
3083  if (!NodeType.isSimple())
3084  return false;
3085  switch (NodeType.getSimpleVT().SimpleTy) {
3086  default: return false; // No libcall for vector types.
3087  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3088  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3089  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3090  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3091  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3092  }
3093 
3094  return TLI.getLibcallName(LC) != nullptr;
3095 }
3096 
3097 /// Issue divrem if both quotient and remainder are needed.
3098 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3099  if (Node->use_empty())
3100  return SDValue(); // This is a dead node, leave it alone.
3101 
3102  unsigned Opcode = Node->getOpcode();
3103  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3104  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3105 
3106  // DivMod lib calls can still work on non-legal types if using lib-calls.
3107  EVT VT = Node->getValueType(0);
3108  if (VT.isVector() || !VT.isInteger())
3109  return SDValue();
3110 
3111  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3112  return SDValue();
3113 
3114  // If DIVREM is going to get expanded into a libcall,
3115  // but there is no libcall available, then don't combine.
3116  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3117  !isDivRemLibcallAvailable(Node, isSigned, TLI))
3118  return SDValue();
3119 
3120  // If div is legal, it's better to do the normal expansion
3121  unsigned OtherOpcode = 0;
3122  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3123  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3124  if (TLI.isOperationLegalOrCustom(Opcode, VT))
3125  return SDValue();
3126  } else {
3127  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3128  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3129  return SDValue();
3130  }
3131 
3132  SDValue Op0 = Node->getOperand(0);
3133  SDValue Op1 = Node->getOperand(1);
3134  SDValue combined;
3135  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3136  UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3137  SDNode *User = *UI;
3138  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3139  User->use_empty())
3140  continue;
3141  // Convert the other matching node(s), too;
3142  // otherwise, the DIVREM may get target-legalized into something
3143  // target-specific that we won't be able to recognize.
3144  unsigned UserOpc = User->getOpcode();
3145  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3146  User->getOperand(0) == Op0 &&
3147  User->getOperand(1) == Op1) {
3148  if (!combined) {
3149  if (UserOpc == OtherOpcode) {
3150  SDVTList VTs = DAG.getVTList(VT, VT);
3151  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3152  } else if (UserOpc == DivRemOpc) {
3153  combined = SDValue(User, 0);
3154  } else {
3155  assert(UserOpc == Opcode);
3156  continue;
3157  }
3158  }
3159  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3160  CombineTo(User, combined);
3161  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3162  CombineTo(User, combined.getValue(1));
3163  }
3164  }
3165  return combined;
3166 }
3167 
3169  SDValue N0 = N->getOperand(0);
3170  SDValue N1 = N->getOperand(1);
3171  EVT VT = N->getValueType(0);
3172  SDLoc DL(N);
3173 
3174  unsigned Opc = N->getOpcode();
3175  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3177 
3178  // X / undef -> undef
3179  // X % undef -> undef
3180  // X / 0 -> undef
3181  // X % 0 -> undef
3182  // NOTE: This includes vectors where any divisor element is zero/undef.
3183  if (DAG.isUndef(Opc, {N0, N1}))
3184  return DAG.getUNDEF(VT);
3185 
3186  // undef / X -> 0
3187  // undef % X -> 0
3188  if (N0.isUndef())
3189  return DAG.getConstant(0, DL, VT);
3190 
3191  // 0 / X -> 0
3192  // 0 % X -> 0
3194  if (N0C && N0C->isNullValue())
3195  return N0;
3196 
3197  // X / X -> 1
3198  // X % X -> 0
3199  if (N0 == N1)
3200  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3201 
3202  // X / 1 -> X
3203  // X % 1 -> 0
3204  // If this is a boolean op (single-bit element type), we can't have
3205  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3206  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3207  // it's a 1.
3208  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3209  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3210 
3211  return SDValue();
3212 }
3213 
3214 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3215  SDValue N0 = N->getOperand(0);
3216  SDValue N1 = N->getOperand(1);
3217  EVT VT = N->getValueType(0);
3218  EVT CCVT = getSetCCResultType(VT);
3219 
3220  // fold vector ops
3221  if (VT.isVector())
3222  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3223  return FoldedVOp;
3224 
3225  SDLoc DL(N);
3226 
3227  // fold (sdiv c1, c2) -> c1/c2
3230  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3231  return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3232  // fold (sdiv X, -1) -> 0-X
3233  if (N1C && N1C->isAllOnesValue())
3234  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3235  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3236  if (N1C && N1C->getAPIntValue().isMinSignedValue())
3237  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3238  DAG.getConstant(1, DL, VT),
3239  DAG.getConstant(0, DL, VT));
3240 
3241  if (SDValue V = simplifyDivRem(N, DAG))
3242  return V;
3243 
3244  if (SDValue NewSel = foldBinOpIntoSelect(N))
3245  return NewSel;
3246 
3247  // If we know the sign bits of both operands are zero, strength reduce to a
3248  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3249  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3250  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3251 
3252  if (SDValue V = visitSDIVLike(N0, N1, N)) {
3253  // If the corresponding remainder node exists, update its users with
3254  // (Dividend - (Quotient * Divisor).
3255  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3256  { N0, N1 })) {
3257  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3258  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3259  AddToWorklist(Mul.getNode());
3260  AddToWorklist(Sub.getNode());
3261  CombineTo(RemNode, Sub);
3262  }
3263  return V;
3264  }
3265 
3266  // sdiv, srem -> sdivrem
3267  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3268  // true. Otherwise, we break the simplification logic in visitREM().
3270  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3271  if (SDValue DivRem = useDivRem(N))
3272  return DivRem;
3273 
3274  return SDValue();
3275 }
3276 
3277 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3278  SDLoc DL(N);
3279  EVT VT = N->getValueType(0);
3280  EVT CCVT = getSetCCResultType(VT);
3281  unsigned BitWidth = VT.getScalarSizeInBits();
3282 
3283  // Helper for determining whether a value is a power-2 constant scalar or a
3284  // vector of such elements.
3285  auto IsPowerOfTwo = [](ConstantSDNode *C) {
3286  if (C->isNullValue() || C->isOpaque())
3287  return false;
3288  if (C->getAPIntValue().isPowerOf2())
3289  return true;
3290  if ((-C->getAPIntValue()).isPowerOf2())
3291  return true;
3292  return false;
3293  };
3294 
3295  // fold (sdiv X, pow2) -> simple ops after legalize
3296  // FIXME: We check for the exact bit here because the generic lowering gives
3297  // better results in that case. The target-specific lowering should learn how
3298  // to handle exact sdivs efficiently.
3299  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3300  // Target-specific implementation of sdiv x, pow2.
3301  if (SDValue Res = BuildSDIVPow2(N))
3302  return Res;
3303 
3304  // Create constants that are functions of the shift amount value.
3305  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3306  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3307  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3308  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3309  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3310  if (!isConstantOrConstantVector(Inexact))
3311  return SDValue();
3312 
3313  // Splat the sign bit into the register
3314  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3315  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3316  AddToWorklist(Sign.getNode());
3317 
3318  // Add (N0 < 0) ? abs2 - 1 : 0;
3319  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3320  AddToWorklist(Srl.getNode());
3321  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3322  AddToWorklist(Add.getNode());
3323  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3324  AddToWorklist(Sra.getNode());
3325 
3326  // Special case: (sdiv X, 1) -> X
3327  // Special Case: (sdiv X, -1) -> 0-X
3328  SDValue One = DAG.getConstant(1, DL, VT);
3329  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3330  SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3331  SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3332  SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3333  Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3334 
3335  // If dividing by a positive value, we're done. Otherwise, the result must
3336  // be negated.
3337  SDValue Zero = DAG.getConstant(0, DL, VT);
3338  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3339 
3340  // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3341  SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3342  SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3343  return Res;
3344  }
3345 
3346  // If integer divide is expensive and we satisfy the requirements, emit an
3347  // alternate sequence. Targets may check function attributes for size/speed
3348  // trade-offs.
3350  if (isConstantOrConstantVector(N1) &&
3351  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3352  if (SDValue Op = BuildSDIV(N))
3353  return Op;
3354 
3355  return SDValue();
3356 }
3357 
3358 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3359  SDValue N0 = N->getOperand(0);
3360  SDValue N1 = N->getOperand(1);
3361  EVT VT = N->getValueType(0);
3362  EVT CCVT = getSetCCResultType(VT);
3363 
3364  // fold vector ops
3365  if (VT.isVector())
3366  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3367  return FoldedVOp;
3368 
3369  SDLoc DL(N);
3370 
3371  // fold (udiv c1, c2) -> c1/c2
3374  if (N0C && N1C)
3375  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3376  N0C, N1C))
3377  return Folded;
3378  // fold (udiv X, -1) -> select(X == -1, 1, 0)
3379  if (N1C && N1C->getAPIntValue().isAllOnesValue())
3380  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3381  DAG.getConstant(1, DL, VT),
3382  DAG.getConstant(0, DL, VT));
3383 
3384  if (SDValue V = simplifyDivRem(N, DAG))
3385  return V;
3386 
3387  if (SDValue NewSel = foldBinOpIntoSelect(N))
3388  return NewSel;
3389 
3390  if (SDValue V = visitUDIVLike(N0, N1, N)) {
3391  // If the corresponding remainder node exists, update its users with
3392  // (Dividend - (Quotient * Divisor).
3393  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3394  { N0, N1 })) {
3395  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3396  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3397  AddToWorklist(Mul.getNode());
3398  AddToWorklist(Sub.getNode());
3399  CombineTo(RemNode, Sub);
3400  }
3401  return V;
3402  }
3403 
3404  // sdiv, srem -> sdivrem
3405  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3406  // true. Otherwise, we break the simplification logic in visitREM().
3408  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3409  if (SDValue DivRem = useDivRem(N))
3410  return DivRem;
3411 
3412  return SDValue();
3413 }
3414 
3415 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3416  SDLoc DL(N);
3417  EVT VT = N->getValueType(0);
3418 
3419  // fold (udiv x, (1 << c)) -> x >>u c
3420  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3421  DAG.isKnownToBeAPowerOfTwo(N1)) {
3422  SDValue LogBase2 = BuildLogBase2(N1, DL);
3423  AddToWorklist(LogBase2.getNode());
3424 
3425  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3426  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3427  AddToWorklist(Trunc.getNode());
3428  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3429  }
3430 
3431  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3432  if (N1.getOpcode() == ISD::SHL) {
3433  SDValue N10 = N1.getOperand(0);
3434  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3435  DAG.isKnownToBeAPowerOfTwo(N10)) {
3436  SDValue LogBase2 = BuildLogBase2(N10, DL);
3437  AddToWorklist(LogBase2.getNode());
3438 
3439  EVT ADDVT = N1.getOperand(1).getValueType();
3440  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3441  AddToWorklist(Trunc.getNode());
3442  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3443  AddToWorklist(Add.getNode());
3444  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3445  }
3446  }
3447 
3448  // fold (udiv x, c) -> alternate
3450  if (isConstantOrConstantVector(N1) &&
3451  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3452  if (SDValue Op = BuildUDIV(N))
3453  return Op;
3454 
3455  return SDValue();
3456 }
3457 
3458 // handles ISD::SREM and ISD::UREM
3459 SDValue DAGCombiner::visitREM(SDNode *N) {
3460  unsigned Opcode = N->getOpcode();
3461  SDValue N0 = N->getOperand(0);
3462  SDValue N1 = N->getOperand(1);
3463  EVT VT = N->getValueType(0);
3464  EVT CCVT = getSetCCResultType(VT);
3465 
3466  bool isSigned = (Opcode == ISD::SREM);
3467  SDLoc DL(N);
3468 
3469  // fold (rem c1, c2) -> c1%c2
3472  if (N0C && N1C)
3473  if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3474  return Folded;
3475  // fold (urem X, -1) -> select(X == -1, 0, x)
3476  if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3477  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3478  DAG.getConstant(0, DL, VT), N0);
3479 
3480  if (SDValue V = simplifyDivRem(N, DAG))
3481  return V;
3482 
3483  if (SDValue NewSel = foldBinOpIntoSelect(N))
3484  return NewSel;
3485 
3486  if (isSigned) {
3487  // If we know the sign bits of both operands are zero, strength reduce to a
3488  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3489  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3490  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3491  } else {
3492  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3493  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3494  // fold (urem x, pow2) -> (and x, pow2-1)
3495  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3496  AddToWorklist(Add.getNode());
3497  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3498  }
3499  if (N1.getOpcode() == ISD::SHL &&
3500  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3501  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3502  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3503  AddToWorklist(Add.getNode());
3504  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3505  }
3506  }
3507 
3509 
3510  // If X/C can be simplified by the division-by-constant logic, lower
3511  // X%C to the equivalent of X-X/C*C.
3512  // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3513  // speculative DIV must not cause a DIVREM conversion. We guard against this
3514  // by skipping the simplification if isIntDivCheap(). When div is not cheap,
3515  // combine will not return a DIVREM. Regardless, checking cheapness here
3516  // makes sense since the simplification results in fatter code.
3517  if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3518  SDValue OptimizedDiv =
3519  isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3520  if (OptimizedDiv.getNode()) {
3521  // If the equivalent Div node also exists, update its users.
3522  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3523  if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
3524  { N0, N1 }))
3525  CombineTo(DivNode, OptimizedDiv);
3526  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3527  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3528  AddToWorklist(OptimizedDiv.getNode());
3529  AddToWorklist(Mul.getNode());
3530  return Sub;
3531  }
3532  }
3533 
3534  // sdiv, srem -> sdivrem
3535  if (SDValue DivRem = useDivRem(N))
3536  return DivRem.getValue(1);
3537 
3538  return SDValue();
3539 }
3540 
3541 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3542  SDValue N0 = N->getOperand(0);
3543  SDValue N1 = N->getOperand(1);
3544  EVT VT = N->getValueType(0);
3545  SDLoc DL(N);
3546 
3547  if (VT.isVector()) {
3548  // fold (mulhs x, 0) -> 0
3550  return N1;
3552  return N0;
3553  }
3554 
3555  // fold (mulhs x, 0) -> 0
3556  if (isNullConstant(N1))
3557  return N1;
3558  // fold (mulhs x, 1) -> (sra x, size(x)-1)
3559  if (isOneConstant(N1))
3560  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3561  DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3562  getShiftAmountTy(N0.getValueType())));
3563 
3564  // fold (mulhs x, undef) -> 0
3565  if (N0.isUndef() || N1.isUndef())
3566  return DAG.getConstant(0, DL, VT);
3567 
3568  // If the type twice as wide is legal, transform the mulhs to a wider multiply
3569  // plus a shift.
3570  if (VT.isSimple() && !VT.isVector()) {
3571  MVT Simple = VT.getSimpleVT();
3572  unsigned SimpleSize = Simple.getSizeInBits();
3573  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3574  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3575  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3576  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3577  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3578  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3579  DAG.getConstant(SimpleSize, DL,
3580  getShiftAmountTy(N1.getValueType())));
3581  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3582  }
3583  }
3584 
3585  return SDValue();
3586 }
3587 
3588 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3589  SDValue N0 = N->getOperand(0);
3590  SDValue N1 = N->getOperand(1);
3591  EVT VT = N->getValueType(0);
3592  SDLoc DL(N);
3593 
3594  if (VT.isVector()) {
3595  // fold (mulhu x, 0) -> 0
3597  return N1;
3599  return N0;
3600  }
3601 
3602  // fold (mulhu x, 0) -> 0
3603  if (isNullConstant(N1))
3604  return N1;
3605  // fold (mulhu x, 1) -> 0
3606  if (isOneConstant(N1))
3607  return DAG.getConstant(0, DL, N0.getValueType());
3608  // fold (mulhu x, undef) -> 0
3609  if (N0.isUndef() || N1.isUndef())
3610  return DAG.getConstant(0, DL, VT);
3611 
3612  // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3613  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3614  DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3615  SDLoc DL(N);
3616  unsigned NumEltBits = VT.getScalarSizeInBits();
3617  SDValue LogBase2 = BuildLogBase2(N1, DL);
3618  SDValue SRLAmt = DAG.getNode(
3619  ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3620  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3621  SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3622  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3623  }
3624 
3625  // If the type twice as wide is legal, transform the mulhu to a wider multiply
3626  // plus a shift.
3627  if (VT.isSimple() && !VT.isVector()) {
3628  MVT Simple = VT.getSimpleVT();
3629  unsigned SimpleSize = Simple.getSizeInBits();
3630  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3631  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3632  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3633  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3634  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3635  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3636  DAG.getConstant(SimpleSize, DL,
3637  getShiftAmountTy(N1.getValueType())));
3638  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3639  }
3640  }
3641 
3642  return SDValue();
3643 }
3644 
3645 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3646 /// give the opcodes for the two computations that are being performed. Return
3647 /// true if a simplification was made.
3648 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3649  unsigned HiOp) {
3650  // If the high half is not needed, just compute the low half.
3651  bool HiExists = N->hasAnyUseOfValue(1);
3652  if (!HiExists && (!LegalOperations ||
3653  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3654  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3655  return CombineTo(N, Res, Res);
3656  }
3657 
3658  // If the low half is not needed, just compute the high half.
3659  bool LoExists = N->hasAnyUseOfValue(0);
3660  if (!LoExists && (!LegalOperations ||
3661  TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
3662  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3663  return CombineTo(N, Res, Res);
3664  }
3665 
3666  // If both halves are used, return as it is.
3667  if (LoExists && HiExists)
3668  return SDValue();
3669 
3670  // If the two computed results can be simplified separately, separate them.
3671  if (LoExists) {
3672  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3673  AddToWorklist(Lo.getNode());
3674  SDValue LoOpt = combine(Lo.getNode());
3675  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3676  (!LegalOperations ||
3677  TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
3678  return CombineTo(N, LoOpt, LoOpt);
3679  }
3680 
3681  if (HiExists) {
3682  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3683  AddToWorklist(Hi.getNode());
3684  SDValue HiOpt = combine(Hi.getNode());
3685  if (HiOpt.getNode() && HiOpt != Hi &&
3686  (!LegalOperations ||
3687  TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
3688  return CombineTo(N, HiOpt, HiOpt);
3689  }
3690 
3691  return SDValue();
3692 }
3693 
3694 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3695  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3696  return Res;
3697 
3698  EVT VT = N->getValueType(0);
3699  SDLoc DL(N);
3700 
3701  // If the type is twice as wide is legal, transform the mulhu to a wider
3702  // multiply plus a shift.
3703  if (VT.isSimple() && !VT.isVector()) {
3704  MVT Simple = VT.getSimpleVT();
3705  unsigned SimpleSize = Simple.getSizeInBits();
3706  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3707  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3708  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3709  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3710  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3711  // Compute the high part as N1.
3712  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3713  DAG.getConstant(SimpleSize, DL,
3714  getShiftAmountTy(Lo.getValueType())));
3715  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3716  // Compute the low part as N0.
3717  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3718  return CombineTo(N, Lo, Hi);
3719  }
3720  }
3721 
3722  return SDValue();
3723 }
3724 
3725 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3726  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3727  return Res;
3728 
3729  EVT VT = N->getValueType(0);
3730  SDLoc DL(N);
3731 
3732  // If the type is twice as wide is legal, transform the mulhu to a wider
3733  // multiply plus a shift.
3734  if (VT.isSimple() && !VT.isVector()) {
3735  MVT Simple = VT.getSimpleVT();
3736  unsigned SimpleSize = Simple.getSizeInBits();
3737  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3738  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3739  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3740  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3741  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3742  // Compute the high part as N1.
3743  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3744  DAG.getConstant(SimpleSize, DL,
3745  getShiftAmountTy(Lo.getValueType())));
3746  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3747  // Compute the low part as N0.
3748  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3749  return CombineTo(N, Lo, Hi);
3750  }
3751  }
3752 
3753  return SDValue();
3754 }
3755 
3756 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3757  // (smulo x, 2) -> (saddo x, x)
3758  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3759  if (C2->getAPIntValue() == 2)
3760  return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3761  N->getOperand(0), N->getOperand(0));
3762 
3763  return SDValue();
3764 }
3765 
3766 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3767  // (umulo x, 2) -> (uaddo x, x)
3768  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3769  if (C2->getAPIntValue() == 2)
3770  return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3771  N->getOperand(0), N->getOperand(0));
3772 
3773  return SDValue();
3774 }
3775 
3776 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3777  SDValue N0 = N->getOperand(0);
3778  SDValue N1 = N->getOperand(1);
3779  EVT VT = N0.getValueType();
3780 
3781  // fold vector ops
3782  if (VT.isVector())
3783  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3784  return FoldedVOp;
3785 
3786  // fold operation with constant operands.
3789  if (N0C && N1C)
3790  return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3791 
3792  // canonicalize constant to RHS
3795  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3796 
3797  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3798  // Only do this if the current op isn't legal and the flipped is.
3799  unsigned Opcode = N->getOpcode();
3800  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3801  if (!TLI.isOperationLegal(Opcode, VT) &&
3802  (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3803  (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3804  unsigned AltOpcode;
3805  switch (Opcode) {
3806  case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3807  case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3808  case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3809  case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3810  default: llvm_unreachable("Unknown MINMAX opcode");
3811  }
3812  if (TLI.isOperationLegal(AltOpcode, VT))
3813  return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3814  }
3815 
3816  return SDValue();
3817 }
3818 
3819 /// If this is a bitwise logic instruction and both operands have the same
3820 /// opcode, try to sink the other opcode after the logic instruction.
3821 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
3822  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3823  EVT VT = N0.getValueType();
3824  unsigned LogicOpcode = N->getOpcode();
3825  unsigned HandOpcode = N0.getOpcode();
3826  assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
3827  LogicOpcode == ISD::XOR) && "Expected logic opcode");
3828  assert(HandOpcode == N1.getOpcode() && "Bad input!");
3829 
3830  // Bail early if none of these transforms apply.
3831  if (N0.getNumOperands() == 0)
3832  return SDValue();
3833 
3834  // FIXME: We should check number of uses of the operands to not increase
3835  // the instruction count for all transforms.
3836 
3837  // Handle size-changing casts.
3838  SDValue X = N0.getOperand(0);
3839  SDValue Y = N1.getOperand(0);
3840  EVT XVT = X.getValueType();
3841  SDLoc DL(N);
3842  if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
3843  HandOpcode == ISD::SIGN_EXTEND) {
3844  // If both operands have other uses, this transform would create extra
3845  // instructions without eliminating anything.
3846  if (!N0.hasOneUse() && !N1.hasOneUse())
3847  return SDValue();
3848  // We need matching integer source types.
3849  if (XVT != Y.getValueType())
3850  return SDValue();
3851  // Don't create an illegal op during or after legalization. Don't ever
3852  // create an unsupported vector op.
3853  if ((VT.isVector() || LegalOperations) &&
3854  !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
3855  return SDValue();
3856  // Avoid infinite looping with PromoteIntBinOp.
3857  // TODO: Should we apply desirable/legal constraints to all opcodes?
3858  if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
3859  !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
3860  return SDValue();
3861  // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
3862  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3863  return DAG.getNode(HandOpcode, DL, VT, Logic);
3864  }
3865 
3866  // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
3867  if (HandOpcode == ISD::TRUNCATE) {
3868  // If both operands have other uses, this transform would create extra
3869  // instructions without eliminating anything.
3870  if (!N0.hasOneUse() && !N1.hasOneUse())
3871  return SDValue();
3872  // We need matching source types.
3873  if (XVT != Y.getValueType())
3874  return SDValue();
3875  // Don't create an illegal op during or after legalization.
3876  if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
3877  return SDValue();
3878  // Be extra careful sinking truncate. If it's free, there's no benefit in
3879  // widening a binop. Also, don't create a logic op on an illegal type.
3880  if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
3881  return SDValue();
3882  if (!TLI.isTypeLegal(XVT))
3883  return SDValue();
3884  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3885  return DAG.getNode(HandOpcode, DL, VT, Logic);
3886  }
3887 
3888  // For binops SHL/SRL/SRA/AND:
3889  // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
3890  if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
3891  HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
3892  N0.getOperand(1) == N1.getOperand(1)) {
3893  // If either operand has other uses, this transform is not an improvement.
3894  if (!N0.hasOneUse() || !N1.hasOneUse())
3895  return SDValue();
3896  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3897  return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
3898  }
3899 
3900  // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
3901  if (HandOpcode == ISD::BSWAP) {
3902  // If either operand has other uses, this transform is not an improvement.
3903  if (!N0.hasOneUse() || !N1.hasOneUse())
3904  return SDValue();
3905  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3906  return DAG.getNode(HandOpcode, DL, VT, Logic);
3907  }
3908 
3909  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3910  // Only perform this optimization up until type legalization, before
3911  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3912  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3913  // we don't want to undo this promotion.
3914  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3915  // on scalars.
3916  if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
3917  Level <= AfterLegalizeTypes) {
3918  // Input types must be integer and the same.
3919  if (XVT.isInteger() && XVT == Y.getValueType()) {
3920  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3921  return DAG.getNode(HandOpcode, DL, VT, Logic);
3922  }
3923  }
3924 
3925  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3926  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3927  // If both shuffles use the same mask, and both shuffle within a single
3928  // vector, then it is worthwhile to move the swizzle after the operation.
3929  // The type-legalizer generates this pattern when loading illegal
3930  // vector types from memory. In many cases this allows additional shuffle
3931  // optimizations.
3932  // There are other cases where moving the shuffle after the xor/and/or
3933  // is profitable even if shuffles don't perform a swizzle.
3934  // If both shuffles use the same mask, and both shuffles have the same first
3935  // or second operand, then it might still be profitable to move the shuffle
3936  // after the xor/and/or operation.
3937  if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3938  auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
3939  auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
3940  assert(X.getValueType() == Y.getValueType() &&
3941  "Inputs to shuffles are not the same type");
3942 
3943  // Check that both shuffles use the same mask. The masks are known to be of
3944  // the same length because the result vector type is the same.
3945  // Check also that shuffles have only one use to avoid introducing extra
3946  // instructions.
3947  if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
3948  !SVN0->getMask().equals(SVN1->getMask()))
3949  return SDValue();
3950 
3951  // Don't try to fold this node if it requires introducing a
3952  // build vector of all zeros that might be illegal at this stage.
3953  SDValue ShOp = N0.getOperand(1);
3954  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
3955  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3956 
3957  // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
3958  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3959  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
3960  N0.getOperand(0), N1.getOperand(0));
3961  return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
3962  }
3963 
3964  // Don't try to fold this node if it requires introducing a
3965  // build vector of all zeros that might be illegal at this stage.
3966  ShOp = N0.getOperand(0);
3967  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
3968  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3969 
3970  // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
3971  if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
3972  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
3973  N1.getOperand(1));
3974  return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
3975  }
3976  }
3977 
3978  return SDValue();
3979 }
3980 
3981 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3982 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3983  const SDLoc &DL) {
3984  SDValue LL, LR, RL, RR, N0CC, N1CC;
3985  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3986  !isSetCCEquivalent(N1, RL, RR, N1CC))
3987  return SDValue();
3988 
3989  assert(N0.getValueType() == N1.getValueType() &&
3990  "Unexpected operand types for bitwise logic op");
3991  assert(LL.getValueType() == LR.getValueType() &&
3992  RL.getValueType() == RR.getValueType() &&
3993  "Unexpected operand types for setcc");
3994 
3995  // If we're here post-legalization or the logic op type is not i1, the logic
3996  // op type must match a setcc result type. Also, all folds require new
3997  // operations on the left and right operands, so those types must match.
3998  EVT VT = N0.getValueType();
3999  EVT OpVT = LL.getValueType();
4000  if (LegalOperations || VT.getScalarType() != MVT::i1)
4001  if (VT != getSetCCResultType(OpVT))
4002  return SDValue();
4003  if (OpVT != RL.getValueType())
4004  return SDValue();
4005 
4006  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4007  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4008  bool IsInteger = OpVT.isInteger();
4009  if (LR == RR && CC0 == CC1 && IsInteger) {
4010  bool IsZero = isNullOrNullSplat(LR);
4011  bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4012 
4013  // All bits clear?
4014  bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4015  // All sign bits clear?
4016  bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4017  // Any bits set?
4018  bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4019  // Any sign bits set?
4020  bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4021 
4022  // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4023  // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4024  // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4025  // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4026  if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4027  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4028  AddToWorklist(Or.getNode());
4029  return DAG.getSetCC(DL, VT, Or, LR, CC1);
4030  }
4031 
4032  // All bits set?
4033  bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4034  // All sign bits set?
4035  bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4036  // Any bits clear?
4037  bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4038  // Any sign bits clear?
4039  bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4040 
4041  // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4042  // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4043  // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4044  // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4045  if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4046  SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4047  AddToWorklist(And.getNode());
4048  return DAG.getSetCC(DL, VT, And, LR, CC1);
4049  }
4050  }
4051 
4052  // TODO: What is the 'or' equivalent of this fold?
4053  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4054  if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4055  IsInteger && CC0 == ISD::SETNE &&
4056  ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4057  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4058  SDValue One = DAG.getConstant(1, DL, OpVT);
4059  SDValue Two = DAG.getConstant(2, DL, OpVT);
4060  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4061  AddToWorklist(Add.getNode());
4062  return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4063  }
4064 
4065  // Try more general transforms if the predicates match and the only user of
4066  // the compares is the 'and' or 'or'.
4067  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4068  N0.hasOneUse() && N1.hasOneUse()) {
4069  // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4070  // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4071  if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4072  SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4073  SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4074  SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4075  SDValue Zero = DAG.getConstant(0, DL, OpVT);
4076  return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4077  }
4078  }
4079 
4080  // Canonicalize equivalent operands to LL == RL.
4081  if (LL == RR && LR == RL) {
4082  CC1 = ISD::getSetCCSwappedOperands(CC1);
4083  std::swap(RL, RR);
4084  }
4085 
4086  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4087  // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4088  if (LL == RL && LR == RR) {
4089  ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4090  : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4091  if (NewCC != ISD::SETCC_INVALID &&
4092  (!LegalOperations ||
4093  (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4094  TLI.isOperationLegal(ISD::SETCC, OpVT))))
4095  return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4096  }
4097 
4098  return SDValue();
4099 }
4100 
4101 /// This contains all DAGCombine rules which reduce two values combined by
4102 /// an And operation to a single value. This makes them reusable in the context
4103 /// of visitSELECT(). Rules involving constants are not included as
4104 /// visitSELECT() already handles those cases.
4105 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4106  EVT VT = N1.getValueType();
4107  SDLoc DL(N);
4108 
4109  // fold (and x, undef) -> 0
4110  if (N0.isUndef() || N1.isUndef())
4111  return DAG.getConstant(0, DL, VT);
4112 
4113  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4114  return V;
4115 
4116  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4117  VT.getSizeInBits() <= 64) {
4118  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4119  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4120  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4121  // immediate for an add, but it is legal if its top c2 bits are set,
4122  // transform the ADD so the immediate doesn't need to be materialized
4123  // in a register.
4124  APInt ADDC = ADDI->getAPIntValue();
4125  APInt SRLC = SRLI->getAPIntValue();
4126  if (ADDC.getMinSignedBits() <= 64 &&
4127  SRLC.ult(VT.getSizeInBits()) &&
4128  !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4130  SRLC.getZExtValue());
4131  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4132  ADDC |= Mask;
4133  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4134  SDLoc DL0(N0);
4135  SDValue NewAdd =
4136  DAG.getNode(ISD::ADD, DL0, VT,
4137  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4138  CombineTo(N0.getNode(), NewAdd);
4139  // Return N so it doesn't get rechecked!
4140  return SDValue(N, 0);
4141  }
4142  }
4143  }
4144  }
4145  }
4146  }
4147 
4148  // Reduce bit extract of low half of an integer to the narrower type.
4149  // (and (srl i64:x, K), KMask) ->
4150  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4151  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4152  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4153  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4154  unsigned Size = VT.getSizeInBits();
4155  const APInt &AndMask = CAnd->getAPIntValue();
4156  unsigned ShiftBits = CShift->getZExtValue();
4157 
4158  // Bail out, this node will probably disappear anyway.
4159  if (ShiftBits == 0)
4160  return SDValue();
4161 
4162  unsigned MaskBits = AndMask.countTrailingOnes();
4163  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4164 
4165  if (AndMask.isMask() &&
4166  // Required bits must not span the two halves of the integer and
4167  // must fit in the half size type.
4168  (ShiftBits + MaskBits <= Size / 2) &&
4169  TLI.isNarrowingProfitable(VT, HalfVT) &&
4170  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4171  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4172  TLI.isTruncateFree(VT, HalfVT) &&
4173  TLI.isZExtFree(HalfVT, VT)) {
4174  // The isNarrowingProfitable is to avoid regressions on PPC and
4175  // AArch64 which match a few 64-bit bit insert / bit extract patterns
4176  // on downstream users of this. Those patterns could probably be
4177  // extended to handle extensions mixed in.
4178 
4179  SDValue SL(N0);
4180  assert(MaskBits <= Size);
4181 
4182  // Extracting the highest bit of the low half.
4183  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4184  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4185  N0.getOperand(0));
4186 
4187  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4188  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4189  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4190  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4191  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4192  }
4193  }
4194  }
4195  }
4196 
4197  return SDValue();
4198 }
4199 
4200 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4201  EVT LoadResultTy, EVT &ExtVT) {
4202  if (!AndC->getAPIntValue().isMask())
4203  return false;
4204 
4205  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4206 
4207  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4208  EVT LoadedVT = LoadN->getMemoryVT();
4209 
4210  if (ExtVT == LoadedVT &&
4211  (!LegalOperations ||
4212  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4213  // ZEXTLOAD will match without needing to change the size of the value being
4214  // loaded.
4215  return true;
4216  }
4217 
4218  // Do not change the width of a volatile load.
4219  if (LoadN->isVolatile())
4220  return false;
4221 
4222  // Do not generate loads of non-round integer types since these can
4223  // be expensive (and would be wrong if the type is not byte sized).
4224  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4225  return false;
4226 
4227  if (LegalOperations &&
4228  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4229  return false;
4230 
4231  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4232  return false;
4233 
4234  return true;
4235 }
4236 
4237 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4238  ISD::LoadExtType ExtType, EVT &MemVT,
4239  unsigned ShAmt) {
4240  if (!LDST)
4241  return false;
4242  // Only allow byte offsets.
4243  if (ShAmt % 8)
4244  return false;
4245 
4246  // Do not generate loads of non-round integer types since these can
4247  // be expensive (and would be wrong if the type is not byte sized).
4248  if (!MemVT.isRound())
4249  return false;
4250 
4251  // Don't change the width of a volatile load.
4252  if (LDST->isVolatile())
4253  return false;
4254 
4255  // Verify that we are actually reducing a load width here.
4256  if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4257  return false;
4258 
4259  // Ensure that this isn't going to produce an unsupported unaligned access.
4260  if (ShAmt &&
4261  !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4262  LDST->getAddressSpace(), ShAmt / 8))
4263  return false;
4264 
4265  // It's not possible to generate a constant of extended or untyped type.
4266  EVT PtrType = LDST->getBasePtr().getValueType();
4267  if (PtrType == MVT::Untyped || PtrType.isExtended())
4268  return false;
4269 
4270  if (isa<LoadSDNode>(LDST)) {
4271  LoadSDNode *Load = cast<LoadSDNode>(LDST);
4272  // Don't transform one with multiple uses, this would require adding a new
4273  // load.
4274  if (!SDValue(Load, 0).hasOneUse())
4275  return false;
4276 
4277  if (LegalOperations &&
4278  !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4279  return false;
4280 
4281  // For the transform to be legal, the load must produce only two values
4282  // (the value loaded and the chain). Don't transform a pre-increment
4283  // load, for example, which produces an extra value. Otherwise the
4284  // transformation is not equivalent, and the downstream logic to replace
4285  // uses gets things wrong.
4286  if (Load->getNumValues() > 2)
4287  return false;
4288 
4289  // If the load that we're shrinking is an extload and we're not just
4290  // discarding the extension we can't simply shrink the load. Bail.
4291  // TODO: It would be possible to merge the extensions in some cases.
4292  if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4293  Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4294  return false;
4295 
4296  if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4297  return false;
4298  } else {
4299  assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4300  StoreSDNode *Store = cast<StoreSDNode>(LDST);
4301  // Can't write outside the original store
4302  if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4303  return false;
4304 
4305  if (LegalOperations &&
4306  !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4307  return false;
4308  }
4309  return true;
4310 }
4311 
4312 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4314  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4316  SDNode *&NodeToMask) {
4317  // Recursively search for the operands, looking for loads which can be
4318  // narrowed.
4319  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4320  SDValue Op = N->getOperand(i);
4321 
4322  if (Op.getValueType().isVector())
4323  return false;
4324 
4325  // Some constants may need fixing up later if they are too large.
4326  if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4327  if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4328  (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4329  NodesWithConsts.insert(N);
4330  continue;
4331  }
4332 
4333  if (!Op.hasOneUse())
4334  return false;
4335 
4336  switch(Op.getOpcode()) {
4337  case ISD::LOAD: {
4338  auto *Load = cast<LoadSDNode>(Op);
4339  EVT ExtVT;
4340  if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4341  isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4342 
4343  // ZEXTLOAD is already small enough.
4344  if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4345  ExtVT.bitsGE(Load->getMemoryVT()))
4346  continue;
4347 
4348  // Use LE to convert equal sized loads to zext.
4349  if (ExtVT.bitsLE(Load->getMemoryVT()))
4350  Loads.push_back(Load);
4351 
4352  continue;
4353  }
4354  return false;
4355  }
4356  case ISD::ZERO_EXTEND:
4357  case ISD::AssertZext: {
4358  unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4359  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4360  EVT VT = Op.getOpcode() == ISD::AssertZext ?
4361  cast<VTSDNode>(Op.getOperand(1))->getVT() :
4362  Op.getOperand(0).getValueType();
4363 
4364  // We can accept extending nodes if the mask is wider or an equal
4365  // width to the original type.
4366  if (ExtVT.bitsGE(VT))
4367  continue;
4368  break;
4369  }
4370  case ISD::OR:
4371  case ISD::XOR:
4372  case ISD::AND:
4373  if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4374  NodeToMask))
4375  return false;
4376  continue;
4377  }
4378 
4379  // Allow one node which will masked along with any loads found.
4380  if (NodeToMask)
4381  return false;
4382 
4383  // Also ensure that the node to be masked only produces one data result.
4384  NodeToMask = Op.getNode();
4385  if (NodeToMask->getNumValues() > 1) {
4386  bool HasValue = false;
4387  for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4388  MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4389  if (VT != MVT::Glue && VT != MVT::Other) {
4390  if (HasValue) {
4391  NodeToMask = nullptr;
4392  return false;
4393  }
4394  HasValue = true;
4395  }
4396  }
4397  assert(HasValue && "Node to be masked has no data result?");
4398  }
4399  }
4400  return true;
4401 }
4402 
4403 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4404  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4405  if (!Mask)
4406  return false;
4407 
4408  if (!Mask->getAPIntValue().isMask())
4409  return false;
4410 
4411  // No need to do anything if the and directly uses a load.
4412  if (isa<LoadSDNode>(N->getOperand(0)))
4413  return false;
4414 
4416  SmallPtrSet<SDNode*, 2> NodesWithConsts;
4417  SDNode *FixupNode = nullptr;
4418  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4419  if (Loads.size() == 0)
4420  return false;
4421 
4422  LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4423  SDValue MaskOp = N->getOperand(1);
4424 
4425  // If it exists, fixup the single node we allow in the tree that needs
4426  // masking.
4427  if (FixupNode) {
4428  LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4429  SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4430  FixupNode->getValueType(0),
4431  SDValue(FixupNode, 0), MaskOp);
4432  DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4433  if (And.getOpcode() == ISD ::AND)
4434  DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4435  }
4436 
4437  // Narrow any constants that need it.
4438  for (auto *LogicN : NodesWithConsts) {
4439  SDValue Op0 = LogicN->getOperand(0);
4440  SDValue Op1 = LogicN->getOperand(1);
4441 
4442  if (isa<ConstantSDNode>(Op0))
4443  std::swap(Op0, Op1);
4444 
4445  SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4446  Op1, MaskOp);
4447 
4448  DAG.UpdateNodeOperands(LogicN, Op0, And);
4449  }
4450 
4451  // Create narrow loads.
4452  for (auto *Load : Loads) {
4453  LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4454  SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4455  SDValue(Load, 0), MaskOp);
4456  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4457  if (And.getOpcode() == ISD ::AND)
4458  And = SDValue(
4459  DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4460  SDValue NewLoad = ReduceLoadWidth(And.getNode());
4461  assert(NewLoad &&
4462  "Shouldn't be masking the load if it can't be narrowed");
4463  CombineTo(Load, NewLoad, NewLoad.getValue(1));
4464  }
4465  DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4466  return true;
4467  }
4468  return false;
4469 }
4470 
4471 // Unfold
4472 // x & (-1 'logical shift' y)
4473 // To
4474 // (x 'opposite logical shift' y) 'logical shift' y
4475 // if it is better for performance.
4476 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4477  assert(N->getOpcode() == ISD::AND);
4478 
4479  SDValue N0 = N->getOperand(0);
4480  SDValue N1 = N->getOperand(1);
4481 
4482  // Do we actually prefer shifts over mask?
4483  if (!TLI.preferShiftsToClearExtremeBits(N0))
4484  return SDValue();
4485 
4486  // Try to match (-1 '[outer] logical shift' y)
4487  unsigned OuterShift;
4488  unsigned InnerShift; // The opposite direction to the OuterShift.
4489  SDValue Y; // Shift amount.
4490  auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4491  if (!M.hasOneUse())
4492  return false;
4493  OuterShift = M->getOpcode();
4494  if (OuterShift == ISD::SHL)
4495  InnerShift = ISD::SRL;
4496  else if (OuterShift == ISD::SRL)
4497  InnerShift = ISD::SHL;
4498  else
4499  return false;
4500  if (!isAllOnesConstant(M->getOperand(0)))
4501  return false;
4502  Y = M->getOperand(1);
4503  return true;
4504  };
4505 
4506  SDValue X;
4507  if (matchMask(N1))
4508  X = N0;
4509  else if (matchMask(N0))
4510  X = N1;
4511  else
4512  return SDValue();
4513 
4514  SDLoc DL(N);
4515  EVT VT = N->getValueType(0);
4516 
4517  // tmp = x 'opposite logical shift' y
4518  SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4519  // ret = tmp 'logical shift' y
4520  SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4521 
4522  return T1;
4523 }
4524 
4525 SDValue DAGCombiner::visitAND(SDNode *N) {
4526  SDValue N0 = N->getOperand(0);
4527  SDValue N1 = N->getOperand(1);
4528  EVT VT = N1.getValueType();
4529 
4530  // x & x --> x
4531  if (N0 == N1)
4532  return N0;
4533 
4534  // fold vector ops
4535  if (VT.isVector()) {
4536  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4537  return FoldedVOp;
4538 
4539  // fold (and x, 0) -> 0, vector edition
4541  // do not return N0, because undef node may exist in N0
4543  SDLoc(N), N0.getValueType());
4545  // do not return N1, because undef node may exist in N1
4547  SDLoc(N), N1.getValueType());
4548 
4549  // fold (and x, -1) -> x, vector edition
4551  return N1;
4553  return N0;
4554  }
4555 
4556  // fold (and c1, c2) -> c1&c2
4559  if (N0C && N1C && !N1C->isOpaque())
4560  return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4561  // canonicalize constant to RHS
4564  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4565  // fold (and x, -1) -> x
4566  if (isAllOnesConstant(N1))
4567  return N0;
4568  // if (and x, c) is known to be zero, return 0
4569  unsigned BitWidth = VT.getScalarSizeInBits();
4570  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4571  APInt::getAllOnesValue(BitWidth)))
4572  return DAG.getConstant(0, SDLoc(N), VT);
4573 
4574  if (SDValue NewSel = foldBinOpIntoSelect(N))
4575  return NewSel;
4576 
4577  // reassociate and
4578  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4579  return RAND;
4580 
4581  // Try to convert a constant mask AND into a shuffle clear mask.
4582  if (VT.isVector())
4583  if (SDValue Shuffle = XformToShuffleWithZero(N))
4584  return Shuffle;
4585 
4586  // fold (and (or x, C), D) -> D if (C & D) == D
4587  auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4588  return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4589  };
4590  if (N0.getOpcode() == ISD::OR &&
4591  ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4592  return N1;
4593  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4594  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4595  SDValue N0Op0 = N0.getOperand(0);
4596  APInt Mask = ~N1C->getAPIntValue();
4597  Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4598  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4599  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4600  N0.getValueType(), N0Op0);
4601 
4602  // Replace uses of the AND with uses of the Zero extend node.
4603  CombineTo(N, Zext);
4604 
4605  // We actually want to replace all uses of the any_extend with the
4606  // zero_extend, to avoid duplicating things. This will later cause this
4607  // AND to be folded.
4608  CombineTo(N0.getNode(), Zext);
4609  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4610  }
4611  }
4612  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4613  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4614  // already be zero by virtue of the width of the base type of the load.
4615  //
4616  // the 'X' node here can either be nothing or an extract_vector_elt to catch
4617  // more cases.
4618  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4620  N0.getOperand(0).getOpcode() == ISD::LOAD &&
4621  N0.getOperand(0).getResNo() == 0) ||
4622  (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4623  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4624  N0 : N0.getOperand(0) );
4625 
4626  // Get the constant (if applicable) the zero'th operand is being ANDed with.
4627  // This can be a pure constant or a vector splat, in which case we treat the
4628  // vector as a scalar and use the splat value.
4630  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4631  Constant = C->getAPIntValue();
4632  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4633  APInt SplatValue, SplatUndef;
4634  unsigned SplatBitSize;
4635  bool HasAnyUndefs;
4636  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4637  SplatBitSize, HasAnyUndefs);
4638  if (IsSplat) {
4639  // Undef bits can contribute to a possible optimisation if set, so
4640  // set them.
4641  SplatValue |= SplatUndef;
4642 
4643  // The splat value may be something like "0x00FFFFFF", which means 0 for
4644  // the first vector value and FF for the rest, repeating. We need a mask
4645  // that will apply equally to all members of the vector, so AND all the
4646  // lanes of the constant together.
4647  EVT VT = Vector->getValueType(0);
4648  unsigned BitWidth = VT.getScalarSizeInBits();
4649 
4650  // If the splat value has been compressed to a bitlength lower
4651  // than the size of the vector lane, we need to re-expand it to
4652  // the lane size.
4653  if (BitWidth > SplatBitSize)
4654  for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4655  SplatBitSize < BitWidth;
4656  SplatBitSize = SplatBitSize * 2)
4657  SplatValue |= SplatValue.shl(SplatBitSize);
4658 
4659  // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4660  // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4661  if (SplatBitSize % BitWidth == 0) {
4662  Constant = APInt::getAllOnesValue(BitWidth);
4663  for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4664  Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4665  }
4666  }
4667  }
4668 
4669  // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4670  // actually legal and isn't going to get expanded, else this is a false
4671  // optimisation.
4672  bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4673  Load->getValueType(0),
4674  Load->getMemoryVT());
4675 
4676  // Resize the constant to the same size as the original memory access before
4677  // extension. If it is still the AllOnesValue then this AND is completely
4678  // unneeded.
4679  Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4680 
4681  bool B;
4682  switch (Load->getExtensionType()) {
4683  default: B = false; break;
4684  case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4685  case ISD::ZEXTLOAD:
4686  case ISD::NON_EXTLOAD: B = true; break;
4687  }
4688 
4689  if (B && Constant.isAllOnesValue()) {
4690  // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4691  // preserve semantics once we get rid of the AND.
4692  SDValue NewLoad(Load, 0);
4693 
4694  // Fold the AND away. NewLoad may get replaced immediately.
4695  CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4696 
4697  if (Load->getExtensionType() == ISD::EXTLOAD) {
4698  NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4699  Load->getValueType(0), SDLoc(Load),
4700  Load->getChain(), Load->getBasePtr(),
4701  Load->getOffset(), Load->getMemoryVT(),
4702  Load->getMemOperand());
4703  // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4704  if (Load->getNumValues() == 3) {
4705  // PRE/POST_INC loads have 3 values.
4706  SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4707  NewLoad.getValue(2) };
4708  CombineTo(Load, To, 3, true);
4709  } else {
4710  CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4711  }
4712  }
4713 
4714  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4715  }
4716  }
4717 
4718  // fold (and (load x), 255) -> (zextload x, i8)
4719  // fold (and (extload x, i16), 255) -> (zextload x, i8)
4720  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4721  if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4722  (N0.getOpcode() == ISD::ANY_EXTEND &&
4723  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4724  if (SDValue Res = ReduceLoadWidth(N)) {
4725  LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4726  ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4727  AddToWorklist(N);
4728  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
4729  return SDValue(N, 0);
4730  }
4731  }
4732 
4733  if (Level >= AfterLegalizeTypes) {
4734  // Attempt to propagate the AND back up to the leaves which, if they're
4735  // loads, can be combined to narrow loads and the AND node can be removed.
4736  // Perform after legalization so that extend nodes will already be
4737  // combined into the loads.
4738  if (BackwardsPropagateMask(N, DAG)) {
4739  return SDValue(N, 0);
4740  }
4741  }
4742 
4743  if (SDValue Combined = visitANDLike(N0, N1, N))
4744  return Combined;
4745 
4746  // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
4747  if (N0.getOpcode() == N1.getOpcode())
4748  if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
4749  return V;
4750 
4751  // Masking the negated extension of a boolean is just the zero-extended
4752  // boolean:
4753  // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4754  // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4755  //
4756  // Note: the SimplifyDemandedBits fold below can make an information-losing
4757  // transform, and then we have no way to find this better fold.
4758  if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4759  if (isNullOrNullSplat(N0.getOperand(0))) {
4760  SDValue SubRHS = N0.getOperand(1);
4761  if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4762  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4763  return SubRHS;
4764  if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4765  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4766  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4767  }
4768  }
4769 
4770  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4771  // fold (and (sra)) -> (and (srl)) when possible.
4772  if (SimplifyDemandedBits(SDValue(N, 0)))
4773  return SDValue(N, 0);
4774 
4775  // fold (zext_inreg (extload x)) -> (zextload x)
4776  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4777  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4778  EVT MemVT = LN0->getMemoryVT();
4779  // If we zero all the possible extended bits, then we can turn this into
4780  // a zextload if we are running before legalize or the operation is legal.
4781  unsigned BitWidth = N1.getScalarValueSizeInBits();
4782  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4783  BitWidth - MemVT.getScalarSizeInBits())) &&
4784  ((!LegalOperations && !LN0->isVolatile()) ||
4785  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4786  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4787  LN0->getChain(), LN0->getBasePtr(),
4788  MemVT, LN0->getMemOperand());
4789  AddToWorklist(N);
4790  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4791  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4792  }
4793  }
4794  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4795  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4796  N0.hasOneUse()) {
4797  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4798  EVT MemVT = LN0->getMemoryVT();
4799  // If we zero all the possible extended bits, then we can turn this into
4800  // a zextload if we are running before legalize or the operation is legal.
4801  unsigned BitWidth = N1.getScalarValueSizeInBits();
4802  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4803  BitWidth - MemVT.getScalarSizeInBits())) &&
4804  ((!LegalOperations && !LN0->isVolatile()) ||
4805  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4806  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4807  LN0->getChain(), LN0->getBasePtr(),
4808  MemVT, LN0->getMemOperand());
4809  AddToWorklist(N);
4810  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4811  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4812  }
4813  }
4814  // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4815  if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4816  if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4817  N0.getOperand(1), false))
4818  return BSwap;
4819  }
4820 
4821  if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
4822  return Shifts;
4823 
4824  return SDValue();
4825 }
4826 
4827 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4828 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4829  bool DemandHighBits) {
4830  if (!LegalOperations)
4831  return SDValue();
4832 
4833  EVT VT = N->getValueType(0);
4834  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4835  return SDValue();
4836  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4837  return SDValue();
4838 
4839  // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4840  bool LookPassAnd0 = false;
4841  bool LookPassAnd1 = false;
4842  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4843  std::swap(N0, N1);
4844  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4845  std::swap(N0, N1);
4846  if (N0.getOpcode() == ISD::AND) {
4847  if (!N0.getNode()->hasOneUse())
4848  return SDValue();
4850  // Also handle 0xffff since the LHS is guaranteed to have zeros there.
4851  // This is needed for X86.
4852  if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
4853  N01C->getZExtValue() != 0xFFFF))
4854  return SDValue();
4855  N0 = N0.getOperand(0);
4856  LookPassAnd0 = true;
4857  }
4858 
4859  if (N1.getOpcode() == ISD::AND) {
4860  if (!N1.getNode()->hasOneUse())
4861  return SDValue();
4863  if (!N11C || N11C->getZExtValue(