LLVM  9.0.0svn
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/KnownBits.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "dagcombine"
79 
80 STATISTIC(NodesCombined , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94  cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99  cl::desc("Only use DAG-combiner alias analysis in this"
100  " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107  cl::desc("Bypass the profitability model of load slicing"),
108  cl::init(false));
109 
110 static cl::opt<bool>
111  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112  cl::desc("DAG combiner may split indexing from loads"));
113 
114 namespace {
115 
116  class DAGCombiner {
117  SelectionDAG &DAG;
118  const TargetLowering &TLI;
120  CodeGenOpt::Level OptLevel;
121  bool LegalOperations = false;
122  bool LegalTypes = false;
123  bool ForCodeSize;
124 
125  /// Worklist of all of the nodes that need to be simplified.
126  ///
127  /// This must behave as a stack -- new nodes to process are pushed onto the
128  /// back and when processing we pop off of the back.
129  ///
130  /// The worklist will not contain duplicates but may contain null entries
131  /// due to nodes being deleted from the underlying DAG.
132  SmallVector<SDNode *, 64> Worklist;
133 
134  /// Mapping from an SDNode to its position on the worklist.
135  ///
136  /// This is used to find and remove nodes from the worklist (by nulling
137  /// them) when they are deleted from the underlying DAG. It relies on
138  /// stable indices of nodes within the worklist.
139  DenseMap<SDNode *, unsigned> WorklistMap;
140  /// This records all nodes attempted to add to the worklist since we
141  /// considered a new worklist entry. As we keep do not add duplicate nodes
142  /// in the worklist, this is different from the tail of the worklist.
143  SmallSetVector<SDNode *, 32> PruningList;
144 
145  /// Set of nodes which have been combined (at least once).
146  ///
147  /// This is used to allow us to reliably add any operands of a DAG node
148  /// which have not yet been combined to the worklist.
149  SmallPtrSet<SDNode *, 32> CombinedNodes;
150 
151  // AA - Used for DAG load/store alias analysis.
152  AliasAnalysis *AA;
153 
154  /// When an instruction is simplified, add all users of the instruction to
155  /// the work lists because they might get more simplified now.
156  void AddUsersToWorklist(SDNode *N) {
157  for (SDNode *Node : N->uses())
158  AddToWorklist(Node);
159  }
160 
161  // Prune potentially dangling nodes. This is called after
162  // any visit to a node, but should also be called during a visit after any
163  // failed combine which may have created a DAG node.
164  void clearAddedDanglingWorklistEntries() {
165  // Check any nodes added to the worklist to see if they are prunable.
166  while (!PruningList.empty()) {
167  auto *N = PruningList.pop_back_val();
168  if (N->use_empty())
169  recursivelyDeleteUnusedNodes(N);
170  }
171  }
172 
173  SDNode *getNextWorklistEntry() {
174  // Before we do any work, remove nodes that are not in use.
175  clearAddedDanglingWorklistEntries();
176  SDNode *N = nullptr;
177  // The Worklist holds the SDNodes in order, but it may contain null
178  // entries.
179  while (!N && !Worklist.empty()) {
180  N = Worklist.pop_back_val();
181  }
182 
183  if (N) {
184  bool GoodWorklistEntry = WorklistMap.erase(N);
185  (void)GoodWorklistEntry;
186  assert(GoodWorklistEntry &&
187  "Found a worklist entry without a corresponding map entry!");
188  }
189  return N;
190  }
191 
192  /// Call the node-specific routine that folds each particular type of node.
193  SDValue visit(SDNode *N);
194 
195  public:
196  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
197  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
198  OptLevel(OL), AA(AA) {
199  ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
200 
201  MaximumLegalStoreInBits = 0;
202  for (MVT VT : MVT::all_valuetypes())
203  if (EVT(VT).isSimple() && VT != MVT::Other &&
204  TLI.isTypeLegal(EVT(VT)) &&
205  VT.getSizeInBits() >= MaximumLegalStoreInBits)
206  MaximumLegalStoreInBits = VT.getSizeInBits();
207  }
208 
209  void ConsiderForPruning(SDNode *N) {
210  // Mark this for potential pruning.
211  PruningList.insert(N);
212  }
213 
214  /// Add to the worklist making sure its instance is at the back (next to be
215  /// processed.)
216  void AddToWorklist(SDNode *N) {
218  "Deleted Node added to Worklist");
219 
220  // Skip handle nodes as they can't usefully be combined and confuse the
221  // zero-use deletion strategy.
222  if (N->getOpcode() == ISD::HANDLENODE)
223  return;
224 
225  ConsiderForPruning(N);
226 
227  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
228  Worklist.push_back(N);
229  }
230 
231  /// Remove all instances of N from the worklist.
232  void removeFromWorklist(SDNode *N) {
233  CombinedNodes.erase(N);
234  PruningList.remove(N);
235 
236  auto It = WorklistMap.find(N);
237  if (It == WorklistMap.end())
238  return; // Not in the worklist.
239 
240  // Null out the entry rather than erasing it to avoid a linear operation.
241  Worklist[It->second] = nullptr;
242  WorklistMap.erase(It);
243  }
244 
245  void deleteAndRecombine(SDNode *N);
246  bool recursivelyDeleteUnusedNodes(SDNode *N);
247 
248  /// Replaces all uses of the results of one DAG node with new values.
249  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
250  bool AddTo = true);
251 
252  /// Replaces all uses of the results of one DAG node with new values.
253  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
254  return CombineTo(N, &Res, 1, AddTo);
255  }
256 
257  /// Replaces all uses of the results of one DAG node with new values.
258  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
259  bool AddTo = true) {
260  SDValue To[] = { Res0, Res1 };
261  return CombineTo(N, To, 2, AddTo);
262  }
263 
264  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
265 
266  private:
267  unsigned MaximumLegalStoreInBits;
268 
269  /// Check the specified integer node value to see if it can be simplified or
270  /// if things it uses can be simplified by bit propagation.
271  /// If so, return true.
272  bool SimplifyDemandedBits(SDValue Op) {
273  unsigned BitWidth = Op.getScalarValueSizeInBits();
275  return SimplifyDemandedBits(Op, DemandedBits);
276  }
277 
278  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
279  EVT VT = Op.getValueType();
280  unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
281  APInt DemandedElts = APInt::getAllOnesValue(NumElts);
282  return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
283  }
284 
285  /// Check the specified vector node value to see if it can be simplified or
286  /// if things it uses can be simplified as it only uses some of the
287  /// elements. If so, return true.
288  bool SimplifyDemandedVectorElts(SDValue Op) {
289  unsigned NumElts = Op.getValueType().getVectorNumElements();
290  APInt DemandedElts = APInt::getAllOnesValue(NumElts);
291  return SimplifyDemandedVectorElts(Op, DemandedElts);
292  }
293 
294  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
295  const APInt &DemandedElts);
296  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
297  bool AssumeSingleUse = false);
298 
299  bool CombineToPreIndexedLoadStore(SDNode *N);
300  bool CombineToPostIndexedLoadStore(SDNode *N);
301  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
302  bool SliceUpLoad(SDNode *N);
303 
304  // Scalars have size 0 to distinguish from singleton vectors.
305  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
306  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
307  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
308 
309  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
310  /// load.
311  ///
312  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
313  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
314  /// \param EltNo index of the vector element to load.
315  /// \param OriginalLoad load that EVE came from to be replaced.
316  /// \returns EVE on success SDValue() on failure.
317  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
318  SDValue EltNo,
319  LoadSDNode *OriginalLoad);
320  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
321  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
322  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
323  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
324  SDValue PromoteIntBinOp(SDValue Op);
325  SDValue PromoteIntShiftOp(SDValue Op);
326  SDValue PromoteExtend(SDValue Op);
327  bool PromoteLoad(SDValue Op);
328 
329  /// Call the node-specific routine that knows how to fold each
330  /// particular type of node. If that doesn't do anything, try the
331  /// target-specific DAG combines.
332  SDValue combine(SDNode *N);
333 
334  // Visitation implementation - Implement dag node combining for different
335  // node types. The semantics are as follows:
336  // Return Value:
337  // SDValue.getNode() == 0 - No change was made
338  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
339  // otherwise - N should be replaced by the returned Operand.
340  //
341  SDValue visitTokenFactor(SDNode *N);
342  SDValue visitMERGE_VALUES(SDNode *N);
343  SDValue visitADD(SDNode *N);
344  SDValue visitADDLike(SDNode *N);
345  SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
346  SDValue visitSUB(SDNode *N);
347  SDValue visitADDSAT(SDNode *N);
348  SDValue visitSUBSAT(SDNode *N);
349  SDValue visitADDC(SDNode *N);
350  SDValue visitADDO(SDNode *N);
351  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
352  SDValue visitSUBC(SDNode *N);
353  SDValue visitSUBO(SDNode *N);
354  SDValue visitADDE(SDNode *N);
355  SDValue visitADDCARRY(SDNode *N);
356  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
357  SDValue visitSUBE(SDNode *N);
358  SDValue visitSUBCARRY(SDNode *N);
359  SDValue visitMUL(SDNode *N);
360  SDValue useDivRem(SDNode *N);
361  SDValue visitSDIV(SDNode *N);
362  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
363  SDValue visitUDIV(SDNode *N);
364  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
365  SDValue visitREM(SDNode *N);
366  SDValue visitMULHU(SDNode *N);
367  SDValue visitMULHS(SDNode *N);
368  SDValue visitSMUL_LOHI(SDNode *N);
369  SDValue visitUMUL_LOHI(SDNode *N);
370  SDValue visitMULO(SDNode *N);
371  SDValue visitIMINMAX(SDNode *N);
372  SDValue visitAND(SDNode *N);
373  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
374  SDValue visitOR(SDNode *N);
375  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
376  SDValue visitXOR(SDNode *N);
377  SDValue SimplifyVBinOp(SDNode *N);
378  SDValue visitSHL(SDNode *N);
379  SDValue visitSRA(SDNode *N);
380  SDValue visitSRL(SDNode *N);
381  SDValue visitFunnelShift(SDNode *N);
382  SDValue visitRotate(SDNode *N);
383  SDValue visitABS(SDNode *N);
384  SDValue visitBSWAP(SDNode *N);
385  SDValue visitBITREVERSE(SDNode *N);
386  SDValue visitCTLZ(SDNode *N);
387  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
388  SDValue visitCTTZ(SDNode *N);
389  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
390  SDValue visitCTPOP(SDNode *N);
391  SDValue visitSELECT(SDNode *N);
392  SDValue visitVSELECT(SDNode *N);
393  SDValue visitSELECT_CC(SDNode *N);
394  SDValue visitSETCC(SDNode *N);
395  SDValue visitSETCCCARRY(SDNode *N);
396  SDValue visitSIGN_EXTEND(SDNode *N);
397  SDValue visitZERO_EXTEND(SDNode *N);
398  SDValue visitANY_EXTEND(SDNode *N);
399  SDValue visitAssertExt(SDNode *N);
400  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
401  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
402  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
403  SDValue visitTRUNCATE(SDNode *N);
404  SDValue visitBITCAST(SDNode *N);
405  SDValue visitBUILD_PAIR(SDNode *N);
406  SDValue visitFADD(SDNode *N);
407  SDValue visitFSUB(SDNode *N);
408  SDValue visitFMUL(SDNode *N);
409  SDValue visitFMA(SDNode *N);
410  SDValue visitFDIV(SDNode *N);
411  SDValue visitFREM(SDNode *N);
412  SDValue visitFSQRT(SDNode *N);
413  SDValue visitFCOPYSIGN(SDNode *N);
414  SDValue visitFPOW(SDNode *N);
415  SDValue visitSINT_TO_FP(SDNode *N);
416  SDValue visitUINT_TO_FP(SDNode *N);
417  SDValue visitFP_TO_SINT(SDNode *N);
418  SDValue visitFP_TO_UINT(SDNode *N);
419  SDValue visitFP_ROUND(SDNode *N);
420  SDValue visitFP_ROUND_INREG(SDNode *N);
421  SDValue visitFP_EXTEND(SDNode *N);
422  SDValue visitFNEG(SDNode *N);
423  SDValue visitFABS(SDNode *N);
424  SDValue visitFCEIL(SDNode *N);
425  SDValue visitFTRUNC(SDNode *N);
426  SDValue visitFFLOOR(SDNode *N);
427  SDValue visitFMINNUM(SDNode *N);
428  SDValue visitFMAXNUM(SDNode *N);
429  SDValue visitFMINIMUM(SDNode *N);
430  SDValue visitFMAXIMUM(SDNode *N);
431  SDValue visitBRCOND(SDNode *N);
432  SDValue visitBR_CC(SDNode *N);
433  SDValue visitLOAD(SDNode *N);
434 
435  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
436  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
437 
438  SDValue visitSTORE(SDNode *N);
439  SDValue visitLIFETIME_END(SDNode *N);
440  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
441  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
442  SDValue visitBUILD_VECTOR(SDNode *N);
443  SDValue visitCONCAT_VECTORS(SDNode *N);
444  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
445  SDValue visitVECTOR_SHUFFLE(SDNode *N);
446  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
447  SDValue visitINSERT_SUBVECTOR(SDNode *N);
448  SDValue visitMLOAD(SDNode *N);
449  SDValue visitMSTORE(SDNode *N);
450  SDValue visitMGATHER(SDNode *N);
451  SDValue visitMSCATTER(SDNode *N);
452  SDValue visitFP_TO_FP16(SDNode *N);
453  SDValue visitFP16_TO_FP(SDNode *N);
454  SDValue visitVECREDUCE(SDNode *N);
455 
456  SDValue visitFADDForFMACombine(SDNode *N);
457  SDValue visitFSUBForFMACombine(SDNode *N);
458  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
459 
460  SDValue XformToShuffleWithZero(SDNode *N);
461  SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
462  SDValue N1);
463  SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
464  SDValue N1, SDNodeFlags Flags);
465 
466  SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
467 
468  SDValue foldSelectOfConstants(SDNode *N);
469  SDValue foldVSelectOfConstants(SDNode *N);
470  SDValue foldBinOpIntoSelect(SDNode *BO);
471  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
472  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
473  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
474  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
475  SDValue N2, SDValue N3, ISD::CondCode CC,
476  bool NotExtCompare = false);
477  SDValue convertSelectOfFPConstantsToLoadOffset(
478  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
479  ISD::CondCode CC);
480  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
481  SDValue N2, SDValue N3, ISD::CondCode CC);
482  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
483  const SDLoc &DL);
484  SDValue unfoldMaskedMerge(SDNode *N);
485  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
486  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
487  const SDLoc &DL, bool foldBooleans);
488  SDValue rebuildSetCC(SDValue N);
489 
490  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
491  SDValue &CC) const;
492  bool isOneUseSetCC(SDValue N) const;
493 
494  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
495  unsigned HiOp);
496  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
497  SDValue CombineExtLoad(SDNode *N);
498  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
499  SDValue combineRepeatedFPDivisors(SDNode *N);
500  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
501  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
502  SDValue BuildSDIV(SDNode *N);
503  SDValue BuildSDIVPow2(SDNode *N);
504  SDValue BuildUDIV(SDNode *N);
505  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
506  SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
507  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
508  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
509  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
510  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
511  SDNodeFlags Flags, bool Reciprocal);
512  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
513  SDNodeFlags Flags, bool Reciprocal);
514  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
515  bool DemandHighBits = true);
516  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
517  SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
518  SDValue InnerPos, SDValue InnerNeg,
519  unsigned PosOpcode, unsigned NegOpcode,
520  const SDLoc &DL);
521  SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
522  SDValue MatchLoadCombine(SDNode *N);
523  SDValue ReduceLoadWidth(SDNode *N);
524  SDValue ReduceLoadOpStoreWidth(SDNode *N);
526  SDValue TransformFPLoadStorePair(SDNode *N);
527  SDValue convertBuildVecZextToZext(SDNode *N);
528  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
529  SDValue reduceBuildVecToShuffle(SDNode *N);
530  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
531  ArrayRef<int> VectorMask, SDValue VecIn1,
532  SDValue VecIn2, unsigned LeftIdx,
533  bool DidSplitVec);
534  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
535 
536  /// Walk up chain skipping non-aliasing memory nodes,
537  /// looking for aliasing nodes and adding them to the Aliases vector.
538  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
539  SmallVectorImpl<SDValue> &Aliases);
540 
541  /// Return true if there is any possibility that the two addresses overlap.
542  bool isAlias(SDNode *Op0, SDNode *Op1) const;
543 
544  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
545  /// chain (aliasing node.)
546  SDValue FindBetterChain(SDNode *N, SDValue Chain);
547 
548  /// Try to replace a store and any possibly adjacent stores on
549  /// consecutive chains with better chains. Return true only if St is
550  /// replaced.
551  ///
552  /// Notice that other chains may still be replaced even if the function
553  /// returns false.
554  bool findBetterNeighborChains(StoreSDNode *St);
555 
556  // Helper for findBetterNeighborChains. Walk up store chain add additional
557  // chained stores that do not overlap and can be parallelized.
558  bool parallelizeChainedStores(StoreSDNode *St);
559 
560  /// Holds a pointer to an LSBaseSDNode as well as information on where it
561  /// is located in a sequence of memory operations connected by a chain.
562  struct MemOpLink {
563  // Ptr to the mem node.
564  LSBaseSDNode *MemNode;
565 
566  // Offset from the base ptr.
567  int64_t OffsetFromBase;
568 
569  MemOpLink(LSBaseSDNode *N, int64_t Offset)
570  : MemNode(N), OffsetFromBase(Offset) {}
571  };
572 
573  /// This is a helper function for visitMUL to check the profitability
574  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
575  /// MulNode is the original multiply, AddNode is (add x, c1),
576  /// and ConstNode is c2.
577  bool isMulAddWithConstProfitable(SDNode *MulNode,
578  SDValue &AddNode,
579  SDValue &ConstNode);
580 
581  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
582  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
583  /// the type of the loaded value to be extended.
584  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
585  EVT LoadResultTy, EVT &ExtVT);
586 
587  /// Helper function to calculate whether the given Load/Store can have its
588  /// width reduced to ExtVT.
589  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
590  EVT &MemVT, unsigned ShAmt = 0);
591 
592  /// Used by BackwardsPropagateMask to find suitable loads.
593  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
594  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
595  ConstantSDNode *Mask, SDNode *&NodeToMask);
596  /// Attempt to propagate a given AND node back to load leaves so that they
597  /// can be combined into narrow loads.
598  bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
599 
600  /// Helper function for MergeConsecutiveStores which merges the
601  /// component store chains.
602  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
603  unsigned NumStores);
604 
605  /// This is a helper function for MergeConsecutiveStores. When the
606  /// source elements of the consecutive stores are all constants or
607  /// all extracted vector elements, try to merge them into one
608  /// larger store introducing bitcasts if necessary. \return True
609  /// if a merged store was created.
610  bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
611  EVT MemVT, unsigned NumStores,
612  bool IsConstantSrc, bool UseVector,
613  bool UseTrunc);
614 
615  /// This is a helper function for MergeConsecutiveStores. Stores
616  /// that potentially may be merged with St are placed in
617  /// StoreNodes. RootNode is a chain predecessor to all store
618  /// candidates.
619  void getStoreMergeCandidates(StoreSDNode *St,
620  SmallVectorImpl<MemOpLink> &StoreNodes,
621  SDNode *&Root);
622 
623  /// Helper function for MergeConsecutiveStores. Checks if
624  /// candidate stores have indirect dependency through their
625  /// operands. RootNode is the predecessor to all stores calculated
626  /// by getStoreMergeCandidates and is used to prune the dependency check.
627  /// \return True if safe to merge.
628  bool checkMergeStoreCandidatesForDependencies(
629  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
630  SDNode *RootNode);
631 
632  /// Merge consecutive store operations into a wide store.
633  /// This optimization uses wide integers or vectors when possible.
634  /// \return number of stores that were merged into a merged store (the
635  /// affected nodes are stored as a prefix in \p StoreNodes).
636  bool MergeConsecutiveStores(StoreSDNode *St);
637 
638  /// Try to transform a truncation where C is a constant:
639  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
640  ///
641  /// \p N needs to be a truncation and its first operand an AND. Other
642  /// requirements are checked by the function (e.g. that trunc is
643  /// single-use) and if missed an empty SDValue is returned.
644  SDValue distributeTruncateThroughAnd(SDNode *N);
645 
646  /// Helper function to determine whether the target supports operation
647  /// given by \p Opcode for type \p VT, that is, whether the operation
648  /// is legal or custom before legalizing operations, and whether is
649  /// legal (but not custom) after legalization.
650  bool hasOperation(unsigned Opcode, EVT VT) {
651  if (LegalOperations)
652  return TLI.isOperationLegal(Opcode, VT);
653  return TLI.isOperationLegalOrCustom(Opcode, VT);
654  }
655 
656  public:
657  /// Runs the dag combiner on all nodes in the work list
658  void Run(CombineLevel AtLevel);
659 
660  SelectionDAG &getDAG() const { return DAG; }
661 
662  /// Returns a type large enough to hold any valid shift amount - before type
663  /// legalization these can be huge.
664  EVT getShiftAmountTy(EVT LHSTy) {
665  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
666  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
667  }
668 
669  /// This method returns true if we are running before type legalization or
670  /// if the specified VT is legal.
671  bool isTypeLegal(const EVT &VT) {
672  if (!LegalTypes) return true;
673  return TLI.isTypeLegal(VT);
674  }
675 
676  /// Convenience wrapper around TargetLowering::getSetCCResultType
677  EVT getSetCCResultType(EVT VT) const {
678  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
679  }
680 
681  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
682  SDValue OrigLoad, SDValue ExtLoad,
684  };
685 
686 /// This class is a DAGUpdateListener that removes any deleted
687 /// nodes from the worklist.
688 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
689  DAGCombiner &DC;
690 
691 public:
692  explicit WorklistRemover(DAGCombiner &dc)
693  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
694 
695  void NodeDeleted(SDNode *N, SDNode *E) override {
696  DC.removeFromWorklist(N);
697  }
698 };
699 
700 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
701  DAGCombiner &DC;
702 
703 public:
704  explicit WorklistInserter(DAGCombiner &dc)
705  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
706 
707  // FIXME: Ideally we could add N to the worklist, but this causes exponential
708  // compile time costs in large DAGs, e.g. Halide.
709  void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
710 };
711 
712 } // end anonymous namespace
713 
714 //===----------------------------------------------------------------------===//
715 // TargetLowering::DAGCombinerInfo implementation
716 //===----------------------------------------------------------------------===//
717 
719  ((DAGCombiner*)DC)->AddToWorklist(N);
720 }
721 
723 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
724  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
725 }
726 
728 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
729  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
730 }
731 
733 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
734  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
735 }
736 
739  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
740 }
741 
742 //===----------------------------------------------------------------------===//
743 // Helper Functions
744 //===----------------------------------------------------------------------===//
745 
746 void DAGCombiner::deleteAndRecombine(SDNode *N) {
747  removeFromWorklist(N);
748 
749  // If the operands of this node are only used by the node, they will now be
750  // dead. Make sure to re-visit them and recursively delete dead nodes.
751  for (const SDValue &Op : N->ops())
752  // For an operand generating multiple values, one of the values may
753  // become dead allowing further simplification (e.g. split index
754  // arithmetic from an indexed load).
755  if (Op->hasOneUse() || Op->getNumValues() > 1)
756  AddToWorklist(Op.getNode());
757 
758  DAG.DeleteNode(N);
759 }
760 
761 /// Return 1 if we can compute the negated form of the specified expression for
762 /// the same cost as the expression itself, or 2 if we can compute the negated
763 /// form more cheaply than the expression itself.
764 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
765  const TargetLowering &TLI,
766  const TargetOptions *Options,
767  bool ForCodeSize,
768  unsigned Depth = 0) {
769  // fneg is removable even if it has multiple uses.
770  if (Op.getOpcode() == ISD::FNEG) return 2;
771 
772  // Don't allow anything with multiple uses unless we know it is free.
773  EVT VT = Op.getValueType();
774  const SDNodeFlags Flags = Op->getFlags();
775  if (!Op.hasOneUse())
776  if (!(Op.getOpcode() == ISD::FP_EXTEND &&
777  TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
778  return 0;
779 
780  // Don't recurse exponentially.
781  if (Depth > 6) return 0;
782 
783  switch (Op.getOpcode()) {
784  default: return false;
785  case ISD::ConstantFP: {
786  if (!LegalOperations)
787  return 1;
788 
789  // Don't invert constant FP values after legalization unless the target says
790  // the negated constant is legal.
791  return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
792  TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
793  ForCodeSize);
794  }
795  case ISD::FADD:
796  if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
797  return 0;
798 
799  // After operation legalization, it might not be legal to create new FSUBs.
800  if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
801  return 0;
802 
803  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
804  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
805  Options, ForCodeSize, Depth + 1))
806  return V;
807  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
808  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
809  ForCodeSize, Depth + 1);
810  case ISD::FSUB:
811  // We can't turn -(A-B) into B-A when we honor signed zeros.
812  if (!Options->NoSignedZerosFPMath &&
813  !Flags.hasNoSignedZeros())
814  return 0;
815 
816  // fold (fneg (fsub A, B)) -> (fsub B, A)
817  return 1;
818 
819  case ISD::FMUL:
820  case ISD::FDIV:
821  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
822  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
823  Options, ForCodeSize, Depth + 1))
824  return V;
825 
826  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
827  ForCodeSize, Depth + 1);
828 
829  case ISD::FP_EXTEND:
830  case ISD::FP_ROUND:
831  case ISD::FSIN:
832  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
833  ForCodeSize, Depth + 1);
834  }
835 }
836 
837 /// If isNegatibleForFree returns true, return the newly negated expression.
839  bool LegalOperations, bool ForCodeSize,
840  unsigned Depth = 0) {
841  const TargetOptions &Options = DAG.getTarget().Options;
842  // fneg is removable even if it has multiple uses.
843  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
844 
845  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
846 
847  const SDNodeFlags Flags = Op.getNode()->getFlags();
848 
849  switch (Op.getOpcode()) {
850  default: llvm_unreachable("Unknown code");
851  case ISD::ConstantFP: {
852  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
853  V.changeSign();
854  return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
855  }
856  case ISD::FADD:
857  assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
858 
859  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
860  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
861  DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
862  Depth+1))
863  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
864  GetNegatedExpression(Op.getOperand(0), DAG,
865  LegalOperations, ForCodeSize,
866  Depth+1),
867  Op.getOperand(1), Flags);
868  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
869  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
870  GetNegatedExpression(Op.getOperand(1), DAG,
871  LegalOperations, ForCodeSize,
872  Depth+1),
873  Op.getOperand(0), Flags);
874  case ISD::FSUB:
875  // fold (fneg (fsub 0, B)) -> B
876  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
877  if (N0CFP->isZero())
878  return Op.getOperand(1);
879 
880  // fold (fneg (fsub A, B)) -> (fsub B, A)
881  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
882  Op.getOperand(1), Op.getOperand(0), Flags);
883 
884  case ISD::FMUL:
885  case ISD::FDIV:
886  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
887  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
888  DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
889  Depth+1))
890  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
891  GetNegatedExpression(Op.getOperand(0), DAG,
892  LegalOperations, ForCodeSize,
893  Depth+1),
894  Op.getOperand(1), Flags);
895 
896  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
897  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
898  Op.getOperand(0),
899  GetNegatedExpression(Op.getOperand(1), DAG,
900  LegalOperations, ForCodeSize,
901  Depth+1), Flags);
902 
903  case ISD::FP_EXTEND:
904  case ISD::FSIN:
905  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
906  GetNegatedExpression(Op.getOperand(0), DAG,
907  LegalOperations, ForCodeSize,
908  Depth+1));
909  case ISD::FP_ROUND:
910  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
911  GetNegatedExpression(Op.getOperand(0), DAG,
912  LegalOperations, ForCodeSize,
913  Depth+1),
914  Op.getOperand(1));
915  }
916 }
917 
918 // APInts must be the same size for most operations, this helper
919 // function zero extends the shorter of the pair so that they match.
920 // We provide an Offset so that we can create bitwidths that won't overflow.
921 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
922  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
923  LHS = LHS.zextOrSelf(Bits);
924  RHS = RHS.zextOrSelf(Bits);
925 }
926 
927 // Return true if this node is a setcc, or is a select_cc
928 // that selects between the target values used for true and false, making it
929 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
930 // the appropriate nodes based on the type of node we are checking. This
931 // simplifies life a bit for the callers.
932 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
933  SDValue &CC) const {
934  if (N.getOpcode() == ISD::SETCC) {
935  LHS = N.getOperand(0);
936  RHS = N.getOperand(1);
937  CC = N.getOperand(2);
938  return true;
939  }
940 
941  if (N.getOpcode() != ISD::SELECT_CC ||
942  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
943  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
944  return false;
945 
946  if (TLI.getBooleanContents(N.getValueType()) ==
948  return false;
949 
950  LHS = N.getOperand(0);
951  RHS = N.getOperand(1);
952  CC = N.getOperand(4);
953  return true;
954 }
955 
956 /// Return true if this is a SetCC-equivalent operation with only one use.
957 /// If this is true, it allows the users to invert the operation for free when
958 /// it is profitable to do so.
959 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
960  SDValue N0, N1, N2;
961  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
962  return true;
963  return false;
964 }
965 
966 // Returns the SDNode if it is a constant float BuildVector
967 // or constant float.
969  if (isa<ConstantFPSDNode>(N))
970  return N.getNode();
972  return N.getNode();
973  return nullptr;
974 }
975 
976 // Determines if it is a constant integer or a build vector of constant
977 // integers (and undefs).
978 // Do not permit build vector implicit truncation.
979 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
980  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
981  return !(Const->isOpaque() && NoOpaques);
982  if (N.getOpcode() != ISD::BUILD_VECTOR)
983  return false;
984  unsigned BitWidth = N.getScalarValueSizeInBits();
985  for (const SDValue &Op : N->op_values()) {
986  if (Op.isUndef())
987  continue;
989  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
990  (Const->isOpaque() && NoOpaques))
991  return false;
992  }
993  return true;
994 }
995 
996 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
997 // undef's.
998 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
999  if (V.getOpcode() != ISD::BUILD_VECTOR)
1000  return false;
1001  return isConstantOrConstantVector(V, NoOpaques) ||
1003 }
1004 
1005 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1006 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1007 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1008  SDValue N0, SDValue N1) {
1009  EVT VT = N0.getValueType();
1010 
1011  if (N0.getOpcode() != Opc)
1012  return SDValue();
1013 
1014  // Don't reassociate reductions.
1015  if (N0->getFlags().hasVectorReduction())
1016  return SDValue();
1017 
1019  if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1020  // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1021  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
1022  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1023  return SDValue();
1024  }
1025  if (N0.hasOneUse()) {
1026  // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1027  // iff (op x, c1) has one use
1028  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1029  if (!OpNode.getNode())
1030  return SDValue();
1031  AddToWorklist(OpNode.getNode());
1032  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1033  }
1034  }
1035  return SDValue();
1036 }
1037 
1038 // Try to reassociate commutative binops.
1039 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1040  SDValue N1, SDNodeFlags Flags) {
1041  assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1042  // Don't reassociate reductions.
1043  if (Flags.hasVectorReduction())
1044  return SDValue();
1045  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1046  return Combined;
1047  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1048  return Combined;
1049  return SDValue();
1050 }
1051 
1052 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1053  bool AddTo) {
1054  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1055  ++NodesCombined;
1056  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1057  To[0].getNode()->dump(&DAG);
1058  dbgs() << " and " << NumTo - 1 << " other values\n");
1059  for (unsigned i = 0, e = NumTo; i != e; ++i)
1060  assert((!To[i].getNode() ||
1061  N->getValueType(i) == To[i].getValueType()) &&
1062  "Cannot combine value to value of different type!");
1063 
1064  WorklistRemover DeadNodes(*this);
1065  DAG.ReplaceAllUsesWith(N, To);
1066  if (AddTo) {
1067  // Push the new nodes and any users onto the worklist
1068  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1069  if (To[i].getNode()) {
1070  AddToWorklist(To[i].getNode());
1071  AddUsersToWorklist(To[i].getNode());
1072  }
1073  }
1074  }
1075 
1076  // Finally, if the node is now dead, remove it from the graph. The node
1077  // may not be dead if the replacement process recursively simplified to
1078  // something else needing this node.
1079  if (N->use_empty())
1080  deleteAndRecombine(N);
1081  return SDValue(N, 0);
1082 }
1083 
1084 void DAGCombiner::
1085 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1086  // Replace all uses. If any nodes become isomorphic to other nodes and
1087  // are deleted, make sure to remove them from our worklist.
1088  WorklistRemover DeadNodes(*this);
1089  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1090 
1091  // Push the new node and any (possibly new) users onto the worklist.
1092  AddToWorklist(TLO.New.getNode());
1093  AddUsersToWorklist(TLO.New.getNode());
1094 
1095  // Finally, if the node is now dead, remove it from the graph. The node
1096  // may not be dead if the replacement process recursively simplified to
1097  // something else needing this node.
1098  if (TLO.Old.getNode()->use_empty())
1099  deleteAndRecombine(TLO.Old.getNode());
1100 }
1101 
1102 /// Check the specified integer node value to see if it can be simplified or if
1103 /// things it uses can be simplified by bit propagation. If so, return true.
1104 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1105  const APInt &DemandedElts) {
1106  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1107  KnownBits Known;
1108  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1109  return false;
1110 
1111  // Revisit the node.
1112  AddToWorklist(Op.getNode());
1113 
1114  // Replace the old value with the new one.
1115  ++NodesCombined;
1116  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1117  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1118  dbgs() << '\n');
1119 
1120  CommitTargetLoweringOpt(TLO);
1121  return true;
1122 }
1123 
1124 /// Check the specified vector node value to see if it can be simplified or
1125 /// if things it uses can be simplified as it only uses some of the elements.
1126 /// If so, return true.
1127 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1128  const APInt &DemandedElts,
1129  bool AssumeSingleUse) {
1130  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1131  APInt KnownUndef, KnownZero;
1132  if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1133  TLO, 0, AssumeSingleUse))
1134  return false;
1135 
1136  // Revisit the node.
1137  AddToWorklist(Op.getNode());
1138 
1139  // Replace the old value with the new one.
1140  ++NodesCombined;
1141  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1142  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1143  dbgs() << '\n');
1144 
1145  CommitTargetLoweringOpt(TLO);
1146  return true;
1147 }
1148 
1149 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1150  SDLoc DL(Load);
1151  EVT VT = Load->getValueType(0);
1152  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1153 
1154  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1155  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1156  WorklistRemover DeadNodes(*this);
1157  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1158  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1159  deleteAndRecombine(Load);
1160  AddToWorklist(Trunc.getNode());
1161 }
1162 
1163 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1164  Replace = false;
1165  SDLoc DL(Op);
1166  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1167  LoadSDNode *LD = cast<LoadSDNode>(Op);
1168  EVT MemVT = LD->getMemoryVT();
1170  : LD->getExtensionType();
1171  Replace = true;
1172  return DAG.getExtLoad(ExtType, DL, PVT,
1173  LD->getChain(), LD->getBasePtr(),
1174  MemVT, LD->getMemOperand());
1175  }
1176 
1177  unsigned Opc = Op.getOpcode();
1178  switch (Opc) {
1179  default: break;
1180  case ISD::AssertSext:
1181  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1182  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1183  break;
1184  case ISD::AssertZext:
1185  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1186  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1187  break;
1188  case ISD::Constant: {
1189  unsigned ExtOpc =
1191  return DAG.getNode(ExtOpc, DL, PVT, Op);
1192  }
1193  }
1194 
1195  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1196  return SDValue();
1197  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1198 }
1199 
1200 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1202  return SDValue();
1203  EVT OldVT = Op.getValueType();
1204  SDLoc DL(Op);
1205  bool Replace = false;
1206  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1207  if (!NewOp.getNode())
1208  return SDValue();
1209  AddToWorklist(NewOp.getNode());
1210 
1211  if (Replace)
1212  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1213  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1214  DAG.getValueType(OldVT));
1215 }
1216 
1217 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1218  EVT OldVT = Op.getValueType();
1219  SDLoc DL(Op);
1220  bool Replace = false;
1221  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1222  if (!NewOp.getNode())
1223  return SDValue();
1224  AddToWorklist(NewOp.getNode());
1225 
1226  if (Replace)
1227  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1228  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1229 }
1230 
1231 /// Promote the specified integer binary operation if the target indicates it is
1232 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1233 /// i32 since i16 instructions are longer.
1234 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1235  if (!LegalOperations)
1236  return SDValue();
1237 
1238  EVT VT = Op.getValueType();
1239  if (VT.isVector() || !VT.isInteger())
1240  return SDValue();
1241 
1242  // If operation type is 'undesirable', e.g. i16 on x86, consider
1243  // promoting it.
1244  unsigned Opc = Op.getOpcode();
1245  if (TLI.isTypeDesirableForOp(Opc, VT))
1246  return SDValue();
1247 
1248  EVT PVT = VT;
1249  // Consult target whether it is a good idea to promote this operation and
1250  // what's the right type to promote it to.
1251  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1252  assert(PVT != VT && "Don't know what type to promote to!");
1253 
1254  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1255 
1256  bool Replace0 = false;
1257  SDValue N0 = Op.getOperand(0);
1258  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1259 
1260  bool Replace1 = false;
1261  SDValue N1 = Op.getOperand(1);
1262  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1263  SDLoc DL(Op);
1264 
1265  SDValue RV =
1266  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1267 
1268  // We are always replacing N0/N1's use in N and only need
1269  // additional replacements if there are additional uses.
1270  Replace0 &= !N0->hasOneUse();
1271  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1272 
1273  // Combine Op here so it is preserved past replacements.
1274  CombineTo(Op.getNode(), RV);
1275 
1276  // If operands have a use ordering, make sure we deal with
1277  // predecessor first.
1278  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1279  std::swap(N0, N1);
1280  std::swap(NN0, NN1);
1281  }
1282 
1283  if (Replace0) {
1284  AddToWorklist(NN0.getNode());
1285  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1286  }
1287  if (Replace1) {
1288  AddToWorklist(NN1.getNode());
1289  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1290  }
1291  return Op;
1292  }
1293  return SDValue();
1294 }
1295 
1296 /// Promote the specified integer shift operation if the target indicates it is
1297 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1298 /// i32 since i16 instructions are longer.
1299 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1300  if (!LegalOperations)
1301  return SDValue();
1302 
1303  EVT VT = Op.getValueType();
1304  if (VT.isVector() || !VT.isInteger())
1305  return SDValue();
1306 
1307  // If operation type is 'undesirable', e.g. i16 on x86, consider
1308  // promoting it.
1309  unsigned Opc = Op.getOpcode();
1310  if (TLI.isTypeDesirableForOp(Opc, VT))
1311  return SDValue();
1312 
1313  EVT PVT = VT;
1314  // Consult target whether it is a good idea to promote this operation and
1315  // what's the right type to promote it to.
1316  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1317  assert(PVT != VT && "Don't know what type to promote to!");
1318 
1319  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1320 
1321  bool Replace = false;
1322  SDValue N0 = Op.getOperand(0);
1323  SDValue N1 = Op.getOperand(1);
1324  if (Opc == ISD::SRA)
1325  N0 = SExtPromoteOperand(N0, PVT);
1326  else if (Opc == ISD::SRL)
1327  N0 = ZExtPromoteOperand(N0, PVT);
1328  else
1329  N0 = PromoteOperand(N0, PVT, Replace);
1330 
1331  if (!N0.getNode())
1332  return SDValue();
1333 
1334  SDLoc DL(Op);
1335  SDValue RV =
1336  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1337 
1338  AddToWorklist(N0.getNode());
1339  if (Replace)
1340  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1341 
1342  // Deal with Op being deleted.
1343  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1344  return RV;
1345  }
1346  return SDValue();
1347 }
1348 
1349 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1350  if (!LegalOperations)
1351  return SDValue();
1352 
1353  EVT VT = Op.getValueType();
1354  if (VT.isVector() || !VT.isInteger())
1355  return SDValue();
1356 
1357  // If operation type is 'undesirable', e.g. i16 on x86, consider
1358  // promoting it.
1359  unsigned Opc = Op.getOpcode();
1360  if (TLI.isTypeDesirableForOp(Opc, VT))
1361  return SDValue();
1362 
1363  EVT PVT = VT;
1364  // Consult target whether it is a good idea to promote this operation and
1365  // what's the right type to promote it to.
1366  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1367  assert(PVT != VT && "Don't know what type to promote to!");
1368  // fold (aext (aext x)) -> (aext x)
1369  // fold (aext (zext x)) -> (zext x)
1370  // fold (aext (sext x)) -> (sext x)
1371  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1372  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1373  }
1374  return SDValue();
1375 }
1376 
1377 bool DAGCombiner::PromoteLoad(SDValue Op) {
1378  if (!LegalOperations)
1379  return false;
1380 
1381  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1382  return false;
1383 
1384  EVT VT = Op.getValueType();
1385  if (VT.isVector() || !VT.isInteger())
1386  return false;
1387 
1388  // If operation type is 'undesirable', e.g. i16 on x86, consider
1389  // promoting it.
1390  unsigned Opc = Op.getOpcode();
1391  if (TLI.isTypeDesirableForOp(Opc, VT))
1392  return false;
1393 
1394  EVT PVT = VT;
1395  // Consult target whether it is a good idea to promote this operation and
1396  // what's the right type to promote it to.
1397  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1398  assert(PVT != VT && "Don't know what type to promote to!");
1399 
1400  SDLoc DL(Op);
1401  SDNode *N = Op.getNode();
1402  LoadSDNode *LD = cast<LoadSDNode>(N);
1403  EVT MemVT = LD->getMemoryVT();
1405  : LD->getExtensionType();
1406  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1407  LD->getChain(), LD->getBasePtr(),
1408  MemVT, LD->getMemOperand());
1409  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1410 
1411  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1412  Result.getNode()->dump(&DAG); dbgs() << '\n');
1413  WorklistRemover DeadNodes(*this);
1414  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1415  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1416  deleteAndRecombine(N);
1417  AddToWorklist(Result.getNode());
1418  return true;
1419  }
1420  return false;
1421 }
1422 
1423 /// Recursively delete a node which has no uses and any operands for
1424 /// which it is the only use.
1425 ///
1426 /// Note that this both deletes the nodes and removes them from the worklist.
1427 /// It also adds any nodes who have had a user deleted to the worklist as they
1428 /// may now have only one use and subject to other combines.
1429 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1430  if (!N->use_empty())
1431  return false;
1432 
1434  Nodes.insert(N);
1435  do {
1436  N = Nodes.pop_back_val();
1437  if (!N)
1438  continue;
1439 
1440  if (N->use_empty()) {
1441  for (const SDValue &ChildN : N->op_values())
1442  Nodes.insert(ChildN.getNode());
1443 
1444  removeFromWorklist(N);
1445  DAG.DeleteNode(N);
1446  } else {
1447  AddToWorklist(N);
1448  }
1449  } while (!Nodes.empty());
1450  return true;
1451 }
1452 
1453 //===----------------------------------------------------------------------===//
1454 // Main DAG Combiner implementation
1455 //===----------------------------------------------------------------------===//
1456 
1457 void DAGCombiner::Run(CombineLevel AtLevel) {
1458  // set the instance variables, so that the various visit routines may use it.
1459  Level = AtLevel;
1460  LegalOperations = Level >= AfterLegalizeVectorOps;
1461  LegalTypes = Level >= AfterLegalizeTypes;
1462 
1463  WorklistInserter AddNodes(*this);
1464 
1465  // Add all the dag nodes to the worklist.
1466  for (SDNode &Node : DAG.allnodes())
1467  AddToWorklist(&Node);
1468 
1469  // Create a dummy node (which is not added to allnodes), that adds a reference
1470  // to the root node, preventing it from being deleted, and tracking any
1471  // changes of the root.
1472  HandleSDNode Dummy(DAG.getRoot());
1473 
1474  // While we have a valid worklist entry node, try to combine it.
1475  while (SDNode *N = getNextWorklistEntry()) {
1476  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1477  // N is deleted from the DAG, since they too may now be dead or may have a
1478  // reduced number of uses, allowing other xforms.
1479  if (recursivelyDeleteUnusedNodes(N))
1480  continue;
1481 
1482  WorklistRemover DeadNodes(*this);
1483 
1484  // If this combine is running after legalizing the DAG, re-legalize any
1485  // nodes pulled off the worklist.
1486  if (Level == AfterLegalizeDAG) {
1487  SmallSetVector<SDNode *, 16> UpdatedNodes;
1488  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1489 
1490  for (SDNode *LN : UpdatedNodes) {
1491  AddToWorklist(LN);
1492  AddUsersToWorklist(LN);
1493  }
1494  if (!NIsValid)
1495  continue;
1496  }
1497 
1498  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1499 
1500  // Add any operands of the new node which have not yet been combined to the
1501  // worklist as well. Because the worklist uniques things already, this
1502  // won't repeatedly process the same operand.
1503  CombinedNodes.insert(N);
1504  for (const SDValue &ChildN : N->op_values())
1505  if (!CombinedNodes.count(ChildN.getNode()))
1506  AddToWorklist(ChildN.getNode());
1507 
1508  SDValue RV = combine(N);
1509 
1510  if (!RV.getNode())
1511  continue;
1512 
1513  ++NodesCombined;
1514 
1515  // If we get back the same node we passed in, rather than a new node or
1516  // zero, we know that the node must have defined multiple values and
1517  // CombineTo was used. Since CombineTo takes care of the worklist
1518  // mechanics for us, we have no work to do in this case.
1519  if (RV.getNode() == N)
1520  continue;
1521 
1523  RV.getOpcode() != ISD::DELETED_NODE &&
1524  "Node was deleted but visit returned new node!");
1525 
1526  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1527 
1528  if (N->getNumValues() == RV.getNode()->getNumValues())
1529  DAG.ReplaceAllUsesWith(N, RV.getNode());
1530  else {
1531  assert(N->getValueType(0) == RV.getValueType() &&
1532  N->getNumValues() == 1 && "Type mismatch");
1533  DAG.ReplaceAllUsesWith(N, &RV);
1534  }
1535 
1536  // Push the new node and any users onto the worklist
1537  AddToWorklist(RV.getNode());
1538  AddUsersToWorklist(RV.getNode());
1539 
1540  // Finally, if the node is now dead, remove it from the graph. The node
1541  // may not be dead if the replacement process recursively simplified to
1542  // something else needing this node. This will also take care of adding any
1543  // operands which have lost a user to the worklist.
1544  recursivelyDeleteUnusedNodes(N);
1545  }
1546 
1547  // If the root changed (e.g. it was a dead load, update the root).
1548  DAG.setRoot(Dummy.getValue());
1549  DAG.RemoveDeadNodes();
1550 }
1551 
1552 SDValue DAGCombiner::visit(SDNode *N) {
1553  switch (N->getOpcode()) {
1554  default: break;
1555  case ISD::TokenFactor: return visitTokenFactor(N);
1556  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1557  case ISD::ADD: return visitADD(N);
1558  case ISD::SUB: return visitSUB(N);
1559  case ISD::SADDSAT:
1560  case ISD::UADDSAT: return visitADDSAT(N);
1561  case ISD::SSUBSAT:
1562  case ISD::USUBSAT: return visitSUBSAT(N);
1563  case ISD::ADDC: return visitADDC(N);
1564  case ISD::SADDO:
1565  case ISD::UADDO: return visitADDO(N);
1566  case ISD::SUBC: return visitSUBC(N);
1567  case ISD::SSUBO:
1568  case ISD::USUBO: return visitSUBO(N);
1569  case ISD::ADDE: return visitADDE(N);
1570  case ISD::ADDCARRY: return visitADDCARRY(N);
1571  case ISD::SUBE: return visitSUBE(N);
1572  case ISD::SUBCARRY: return visitSUBCARRY(N);
1573  case ISD::MUL: return visitMUL(N);
1574  case ISD::SDIV: return visitSDIV(N);
1575  case ISD::UDIV: return visitUDIV(N);
1576  case ISD::SREM:
1577  case ISD::UREM: return visitREM(N);
1578  case ISD::MULHU: return visitMULHU(N);
1579  case ISD::MULHS: return visitMULHS(N);
1580  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1581  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1582  case ISD::SMULO:
1583  case ISD::UMULO: return visitMULO(N);
1584  case ISD::SMIN:
1585  case ISD::SMAX:
1586  case ISD::UMIN:
1587  case ISD::UMAX: return visitIMINMAX(N);
1588  case ISD::AND: return visitAND(N);
1589  case ISD::OR: return visitOR(N);
1590  case ISD::XOR: return visitXOR(N);
1591  case ISD::SHL: return visitSHL(N);
1592  case ISD::SRA: return visitSRA(N);
1593  case ISD::SRL: return visitSRL(N);
1594  case ISD::ROTR:
1595  case ISD::ROTL: return visitRotate(N);
1596  case ISD::FSHL:
1597  case ISD::FSHR: return visitFunnelShift(N);
1598  case ISD::ABS: return visitABS(N);
1599  case ISD::BSWAP: return visitBSWAP(N);
1600  case ISD::BITREVERSE: return visitBITREVERSE(N);
1601  case ISD::CTLZ: return visitCTLZ(N);
1602  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1603  case ISD::CTTZ: return visitCTTZ(N);
1604  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1605  case ISD::CTPOP: return visitCTPOP(N);
1606  case ISD::SELECT: return visitSELECT(N);
1607  case ISD::VSELECT: return visitVSELECT(N);
1608  case ISD::SELECT_CC: return visitSELECT_CC(N);
1609  case ISD::SETCC: return visitSETCC(N);
1610  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1611  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1612  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1613  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1614  case ISD::AssertSext:
1615  case ISD::AssertZext: return visitAssertExt(N);
1616  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1617  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1618  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1619  case ISD::TRUNCATE: return visitTRUNCATE(N);
1620  case ISD::BITCAST: return visitBITCAST(N);
1621  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1622  case ISD::FADD: return visitFADD(N);
1623  case ISD::FSUB: return visitFSUB(N);
1624  case ISD::FMUL: return visitFMUL(N);
1625  case ISD::FMA: return visitFMA(N);
1626  case ISD::FDIV: return visitFDIV(N);
1627  case ISD::FREM: return visitFREM(N);
1628  case ISD::FSQRT: return visitFSQRT(N);
1629  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1630  case ISD::FPOW: return visitFPOW(N);
1631  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1632  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1633  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1634  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1635  case ISD::FP_ROUND: return visitFP_ROUND(N);
1636  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1637  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1638  case ISD::FNEG: return visitFNEG(N);
1639  case ISD::FABS: return visitFABS(N);
1640  case ISD::FFLOOR: return visitFFLOOR(N);
1641  case ISD::FMINNUM: return visitFMINNUM(N);
1642  case ISD::FMAXNUM: return visitFMAXNUM(N);
1643  case ISD::FMINIMUM: return visitFMINIMUM(N);
1644  case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1645  case ISD::FCEIL: return visitFCEIL(N);
1646  case ISD::FTRUNC: return visitFTRUNC(N);
1647  case ISD::BRCOND: return visitBRCOND(N);
1648  case ISD::BR_CC: return visitBR_CC(N);
1649  case ISD::LOAD: return visitLOAD(N);
1650  case ISD::STORE: return visitSTORE(N);
1651  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1652  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1653  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1654  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1655  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1656  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1657  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1658  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1659  case ISD::MGATHER: return visitMGATHER(N);
1660  case ISD::MLOAD: return visitMLOAD(N);
1661  case ISD::MSCATTER: return visitMSCATTER(N);
1662  case ISD::MSTORE: return visitMSTORE(N);
1663  case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1664  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1665  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1666  case ISD::VECREDUCE_FADD:
1667  case ISD::VECREDUCE_FMUL:
1668  case ISD::VECREDUCE_ADD:
1669  case ISD::VECREDUCE_MUL:
1670  case ISD::VECREDUCE_AND:
1671  case ISD::VECREDUCE_OR:
1672  case ISD::VECREDUCE_XOR:
1673  case ISD::VECREDUCE_SMAX:
1674  case ISD::VECREDUCE_SMIN:
1675  case ISD::VECREDUCE_UMAX:
1676  case ISD::VECREDUCE_UMIN:
1677  case ISD::VECREDUCE_FMAX:
1678  case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1679  }
1680  return SDValue();
1681 }
1682 
1683 SDValue DAGCombiner::combine(SDNode *N) {
1684  SDValue RV = visit(N);
1685 
1686  // If nothing happened, try a target-specific DAG combine.
1687  if (!RV.getNode()) {
1689  "Node was deleted but visit returned NULL!");
1690 
1691  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1693 
1694  // Expose the DAG combiner to the target combiner impls.
1696  DagCombineInfo(DAG, Level, false, this);
1697 
1698  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1699  }
1700  }
1701 
1702  // If nothing happened still, try promoting the operation.
1703  if (!RV.getNode()) {
1704  switch (N->getOpcode()) {
1705  default: break;
1706  case ISD::ADD:
1707  case ISD::SUB:
1708  case ISD::MUL:
1709  case ISD::AND:
1710  case ISD::OR:
1711  case ISD::XOR:
1712  RV = PromoteIntBinOp(SDValue(N, 0));
1713  break;
1714  case ISD::SHL:
1715  case ISD::SRA:
1716  case ISD::SRL:
1717  RV = PromoteIntShiftOp(SDValue(N, 0));
1718  break;
1719  case ISD::SIGN_EXTEND:
1720  case ISD::ZERO_EXTEND:
1721  case ISD::ANY_EXTEND:
1722  RV = PromoteExtend(SDValue(N, 0));
1723  break;
1724  case ISD::LOAD:
1725  if (PromoteLoad(SDValue(N, 0)))
1726  RV = SDValue(N, 0);
1727  break;
1728  }
1729  }
1730 
1731  // If N is a commutative binary node, try eliminate it if the commuted
1732  // version is already present in the DAG.
1733  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1734  N->getNumValues() == 1) {
1735  SDValue N0 = N->getOperand(0);
1736  SDValue N1 = N->getOperand(1);
1737 
1738  // Constant operands are canonicalized to RHS.
1739  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1740  SDValue Ops[] = {N1, N0};
1741  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1742  N->getFlags());
1743  if (CSENode)
1744  return SDValue(CSENode, 0);
1745  }
1746  }
1747 
1748  return RV;
1749 }
1750 
1751 /// Given a node, return its input chain if it has one, otherwise return a null
1752 /// sd operand.
1754  if (unsigned NumOps = N->getNumOperands()) {
1755  if (N->getOperand(0).getValueType() == MVT::Other)
1756  return N->getOperand(0);
1757  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1758  return N->getOperand(NumOps-1);
1759  for (unsigned i = 1; i < NumOps-1; ++i)
1760  if (N->getOperand(i).getValueType() == MVT::Other)
1761  return N->getOperand(i);
1762  }
1763  return SDValue();
1764 }
1765 
1766 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1767  // If N has two operands, where one has an input chain equal to the other,
1768  // the 'other' chain is redundant.
1769  if (N->getNumOperands() == 2) {
1770  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1771  return N->getOperand(0);
1772  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1773  return N->getOperand(1);
1774  }
1775 
1776  // Don't simplify token factors if optnone.
1777  if (OptLevel == CodeGenOpt::None)
1778  return SDValue();
1779 
1780  // If the sole user is a token factor, we should make sure we have a
1781  // chance to merge them together. This prevents TF chains from inhibiting
1782  // optimizations.
1783  if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1784  AddToWorklist(*(N->use_begin()));
1785 
1786  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1787  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1788  SmallPtrSet<SDNode*, 16> SeenOps;
1789  bool Changed = false; // If we should replace this token factor.
1790 
1791  // Start out with this token factor.
1792  TFs.push_back(N);
1793 
1794  // Iterate through token factors. The TFs grows when new token factors are
1795  // encountered.
1796  for (unsigned i = 0; i < TFs.size(); ++i) {
1797  SDNode *TF = TFs[i];
1798 
1799  // Check each of the operands.
1800  for (const SDValue &Op : TF->op_values()) {
1801  switch (Op.getOpcode()) {
1802  case ISD::EntryToken:
1803  // Entry tokens don't need to be added to the list. They are
1804  // redundant.
1805  Changed = true;
1806  break;
1807 
1808  case ISD::TokenFactor:
1809  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1810  // Queue up for processing.
1811  TFs.push_back(Op.getNode());
1812  // Clean up in case the token factor is removed.
1813  AddToWorklist(Op.getNode());
1814  Changed = true;
1815  break;
1816  }
1818 
1819  default:
1820  // Only add if it isn't already in the list.
1821  if (SeenOps.insert(Op.getNode()).second)
1822  Ops.push_back(Op);
1823  else
1824  Changed = true;
1825  break;
1826  }
1827  }
1828  }
1829 
1830  // Remove Nodes that are chained to another node in the list. Do so
1831  // by walking up chains breath-first stopping when we've seen
1832  // another operand. In general we must climb to the EntryNode, but we can exit
1833  // early if we find all remaining work is associated with just one operand as
1834  // no further pruning is possible.
1835 
1836  // List of nodes to search through and original Ops from which they originate.
1838  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1839  SmallPtrSet<SDNode *, 16> SeenChains;
1840  bool DidPruneOps = false;
1841 
1842  unsigned NumLeftToConsider = 0;
1843  for (const SDValue &Op : Ops) {
1844  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1845  OpWorkCount.push_back(1);
1846  }
1847 
1848  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1849  // If this is an Op, we can remove the op from the list. Remark any
1850  // search associated with it as from the current OpNumber.
1851  if (SeenOps.count(Op) != 0) {
1852  Changed = true;
1853  DidPruneOps = true;
1854  unsigned OrigOpNumber = 0;
1855  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1856  OrigOpNumber++;
1857  assert((OrigOpNumber != Ops.size()) &&
1858  "expected to find TokenFactor Operand");
1859  // Re-mark worklist from OrigOpNumber to OpNumber
1860  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1861  if (Worklist[i].second == OrigOpNumber) {
1862  Worklist[i].second = OpNumber;
1863  }
1864  }
1865  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1866  OpWorkCount[OrigOpNumber] = 0;
1867  NumLeftToConsider--;
1868  }
1869  // Add if it's a new chain
1870  if (SeenChains.insert(Op).second) {
1871  OpWorkCount[OpNumber]++;
1872  Worklist.push_back(std::make_pair(Op, OpNumber));
1873  }
1874  };
1875 
1876  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1877  // We need at least be consider at least 2 Ops to prune.
1878  if (NumLeftToConsider <= 1)
1879  break;
1880  auto CurNode = Worklist[i].first;
1881  auto CurOpNumber = Worklist[i].second;
1882  assert((OpWorkCount[CurOpNumber] > 0) &&
1883  "Node should not appear in worklist");
1884  switch (CurNode->getOpcode()) {
1885  case ISD::EntryToken:
1886  // Hitting EntryToken is the only way for the search to terminate without
1887  // hitting
1888  // another operand's search. Prevent us from marking this operand
1889  // considered.
1890  NumLeftToConsider++;
1891  break;
1892  case ISD::TokenFactor:
1893  for (const SDValue &Op : CurNode->op_values())
1894  AddToWorklist(i, Op.getNode(), CurOpNumber);
1895  break;
1896  case ISD::LIFETIME_START:
1897  case ISD::LIFETIME_END:
1898  case ISD::CopyFromReg:
1899  case ISD::CopyToReg:
1900  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1901  break;
1902  default:
1903  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1904  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1905  break;
1906  }
1907  OpWorkCount[CurOpNumber]--;
1908  if (OpWorkCount[CurOpNumber] == 0)
1909  NumLeftToConsider--;
1910  }
1911 
1912  // If we've changed things around then replace token factor.
1913  if (Changed) {
1914  SDValue Result;
1915  if (Ops.empty()) {
1916  // The entry token is the only possible outcome.
1917  Result = DAG.getEntryNode();
1918  } else {
1919  if (DidPruneOps) {
1920  SmallVector<SDValue, 8> PrunedOps;
1921  //
1922  for (const SDValue &Op : Ops) {
1923  if (SeenChains.count(Op.getNode()) == 0)
1924  PrunedOps.push_back(Op);
1925  }
1926  Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1927  } else {
1928  Result = DAG.getTokenFactor(SDLoc(N), Ops);
1929  }
1930  }
1931  return Result;
1932  }
1933  return SDValue();
1934 }
1935 
1936 /// MERGE_VALUES can always be eliminated.
1937 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1938  WorklistRemover DeadNodes(*this);
1939  // Replacing results may cause a different MERGE_VALUES to suddenly
1940  // be CSE'd with N, and carry its uses with it. Iterate until no
1941  // uses remain, to ensure that the node can be safely deleted.
1942  // First add the users of this node to the work list so that they
1943  // can be tried again once they have new operands.
1944  AddUsersToWorklist(N);
1945  do {
1946  // Do as a single replacement to avoid rewalking use lists.
1948  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1949  Ops.push_back(N->getOperand(i));
1950  DAG.ReplaceAllUsesWith(N, Ops.data());
1951  } while (!N->use_empty());
1952  deleteAndRecombine(N);
1953  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1954 }
1955 
1956 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1957 /// ConstantSDNode pointer else nullptr.
1960  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1961 }
1962 
1963 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1964  assert(TLI.isBinOp(BO->getOpcode()) && "Unexpected binary operator");
1965 
1966  // Don't do this unless the old select is going away. We want to eliminate the
1967  // binary operator, not replace a binop with a select.
1968  // TODO: Handle ISD::SELECT_CC.
1969  unsigned SelOpNo = 0;
1970  SDValue Sel = BO->getOperand(0);
1971  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1972  SelOpNo = 1;
1973  Sel = BO->getOperand(1);
1974  }
1975 
1976  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1977  return SDValue();
1978 
1979  SDValue CT = Sel.getOperand(1);
1980  if (!isConstantOrConstantVector(CT, true) &&
1982  return SDValue();
1983 
1984  SDValue CF = Sel.getOperand(2);
1985  if (!isConstantOrConstantVector(CF, true) &&
1987  return SDValue();
1988 
1989  // Bail out if any constants are opaque because we can't constant fold those.
1990  // The exception is "and" and "or" with either 0 or -1 in which case we can
1991  // propagate non constant operands into select. I.e.:
1992  // and (select Cond, 0, -1), X --> select Cond, 0, X
1993  // or X, (select Cond, -1, 0) --> select Cond, -1, X
1994  auto BinOpcode = BO->getOpcode();
1995  bool CanFoldNonConst =
1996  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1999 
2000  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2001  if (!CanFoldNonConst &&
2002  !isConstantOrConstantVector(CBO, true) &&
2004  return SDValue();
2005 
2006  EVT VT = Sel.getValueType();
2007 
2008  // In case of shift value and shift amount may have different VT. For instance
2009  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2010  // swapped operands and value types do not match. NB: x86 is fine if operands
2011  // are not swapped with shift amount VT being not bigger than shifted value.
2012  // TODO: that is possible to check for a shift operation, correct VTs and
2013  // still perform optimization on x86 if needed.
2014  if (SelOpNo && VT != CBO.getValueType())
2015  return SDValue();
2016 
2017  // We have a select-of-constants followed by a binary operator with a
2018  // constant. Eliminate the binop by pulling the constant math into the select.
2019  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2020  SDLoc DL(Sel);
2021  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2022  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2023  if (!CanFoldNonConst && !NewCT.isUndef() &&
2024  !isConstantOrConstantVector(NewCT, true) &&
2026  return SDValue();
2027 
2028  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2029  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2030  if (!CanFoldNonConst && !NewCF.isUndef() &&
2031  !isConstantOrConstantVector(NewCF, true) &&
2033  return SDValue();
2034 
2035  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2036 }
2037 
2039  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2040  "Expecting add or sub");
2041 
2042  // Match a constant operand and a zext operand for the math instruction:
2043  // add Z, C
2044  // sub C, Z
2045  bool IsAdd = N->getOpcode() == ISD::ADD;
2046  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2047  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2048  auto *CN = dyn_cast<ConstantSDNode>(C);
2049  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2050  return SDValue();
2051 
2052  // Match the zext operand as a setcc of a boolean.
2053  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2054  Z.getOperand(0).getValueType() != MVT::i1)
2055  return SDValue();
2056 
2057  // Match the compare as: setcc (X & 1), 0, eq.
2058  SDValue SetCC = Z.getOperand(0);
2059  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2060  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2061  SetCC.getOperand(0).getOpcode() != ISD::AND ||
2062  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2063  return SDValue();
2064 
2065  // We are adding/subtracting a constant and an inverted low bit. Turn that
2066  // into a subtract/add of the low bit with incremented/decremented constant:
2067  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2068  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2069  EVT VT = C.getValueType();
2070  SDLoc DL(N);
2071  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2072  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2073  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2074  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2075 }
2076 
2077 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2078 /// a shift and add with a different constant.
2080  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2081  "Expecting add or sub");
2082 
2083  // We need a constant operand for the add/sub, and the other operand is a
2084  // logical shift right: add (srl), C or sub C, (srl).
2085  bool IsAdd = N->getOpcode() == ISD::ADD;
2086  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2087  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2088  ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2089  if (!C || ShiftOp.getOpcode() != ISD::SRL)
2090  return SDValue();
2091 
2092  // The shift must be of a 'not' value.
2093  SDValue Not = ShiftOp.getOperand(0);
2094  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2095  return SDValue();
2096 
2097  // The shift must be moving the sign bit to the least-significant-bit.
2098  EVT VT = ShiftOp.getValueType();
2099  SDValue ShAmt = ShiftOp.getOperand(1);
2100  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2101  if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2102  return SDValue();
2103 
2104  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2105  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2106  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2107  SDLoc DL(N);
2108  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2109  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2110  APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2111  return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2112 }
2113 
2114 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2115 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2116 /// are no common bits set in the operands).
2117 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2118  SDValue N0 = N->getOperand(0);
2119  SDValue N1 = N->getOperand(1);
2120  EVT VT = N0.getValueType();
2121  SDLoc DL(N);
2122 
2123  // fold vector ops
2124  if (VT.isVector()) {
2125  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2126  return FoldedVOp;
2127 
2128  // fold (add x, 0) -> x, vector edition
2130  return N0;
2132  return N1;
2133  }
2134 
2135  // fold (add x, undef) -> undef
2136  if (N0.isUndef())
2137  return N0;
2138 
2139  if (N1.isUndef())
2140  return N1;
2141 
2143  // canonicalize constant to RHS
2145  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2146  // fold (add c1, c2) -> c1+c2
2147  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2148  N1.getNode());
2149  }
2150 
2151  // fold (add x, 0) -> x
2152  if (isNullConstant(N1))
2153  return N0;
2154 
2155  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2156  // fold ((c1-A)+c2) -> (c1+c2)-A
2157  if (N0.getOpcode() == ISD::SUB &&
2158  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2159  // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2160  return DAG.getNode(ISD::SUB, DL, VT,
2161  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2162  N0.getOperand(1));
2163  }
2164 
2165  // add (sext i1 X), 1 -> zext (not i1 X)
2166  // We don't transform this pattern:
2167  // add (zext i1 X), -1 -> sext (not i1 X)
2168  // because most (?) targets generate better code for the zext form.
2169  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2170  isOneOrOneSplat(N1)) {
2171  SDValue X = N0.getOperand(0);
2172  if ((!LegalOperations ||
2173  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2174  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2175  X.getScalarValueSizeInBits() == 1) {
2176  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2177  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2178  }
2179  }
2180 
2181  // Undo the add -> or combine to merge constant offsets from a frame index.
2182  if (N0.getOpcode() == ISD::OR &&
2183  isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2184  isa<ConstantSDNode>(N0.getOperand(1)) &&
2185  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2186  SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2187  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2188  }
2189  }
2190 
2191  if (SDValue NewSel = foldBinOpIntoSelect(N))
2192  return NewSel;
2193 
2194  // reassociate add
2195  if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2196  return RADD;
2197 
2198  // fold ((0-A) + B) -> B-A
2199  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2200  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2201 
2202  // fold (A + (0-B)) -> A-B
2203  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2204  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2205 
2206  // fold (A+(B-A)) -> B
2207  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2208  return N1.getOperand(0);
2209 
2210  // fold ((B-A)+A) -> B
2211  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2212  return N0.getOperand(0);
2213 
2214  // fold ((A-B)+(C-A)) -> (C-B)
2215  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2216  N0.getOperand(0) == N1.getOperand(1))
2217  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2218  N0.getOperand(1));
2219 
2220  // fold ((A-B)+(B-C)) -> (A-C)
2221  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2222  N0.getOperand(1) == N1.getOperand(0))
2223  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2224  N1.getOperand(1));
2225 
2226  // fold (A+(B-(A+C))) to (B-C)
2227  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2228  N0 == N1.getOperand(1).getOperand(0))
2229  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2230  N1.getOperand(1).getOperand(1));
2231 
2232  // fold (A+(B-(C+A))) to (B-C)
2233  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2234  N0 == N1.getOperand(1).getOperand(1))
2235  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2236  N1.getOperand(1).getOperand(0));
2237 
2238  // fold (A+((B-A)+or-C)) to (B+or-C)
2239  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2240  N1.getOperand(0).getOpcode() == ISD::SUB &&
2241  N0 == N1.getOperand(0).getOperand(1))
2242  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2243  N1.getOperand(1));
2244 
2245  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2246  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2247  SDValue N00 = N0.getOperand(0);
2248  SDValue N01 = N0.getOperand(1);
2249  SDValue N10 = N1.getOperand(0);
2250  SDValue N11 = N1.getOperand(1);
2251 
2253  return DAG.getNode(ISD::SUB, DL, VT,
2254  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2255  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2256  }
2257 
2258  // fold (add (umax X, C), -C) --> (usubsat X, C)
2259  if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2260  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2261  return (!Max && !Op) ||
2262  (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2263  };
2264  if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2265  /*AllowUndefs*/ true))
2266  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2267  N0.getOperand(1));
2268  }
2269 
2270  if (SimplifyDemandedBits(SDValue(N, 0)))
2271  return SDValue(N, 0);
2272 
2273  if (isOneOrOneSplat(N1)) {
2274  // fold (add (xor a, -1), 1) -> (sub 0, a)
2275  if (isBitwiseNot(N0))
2276  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2277  N0.getOperand(0));
2278 
2279  // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2280  if (N0.getOpcode() == ISD::ADD ||
2281  N0.getOpcode() == ISD::UADDO ||
2282  N0.getOpcode() == ISD::SADDO) {
2283  SDValue A, Xor;
2284 
2285  if (isBitwiseNot(N0.getOperand(0))) {
2286  A = N0.getOperand(1);
2287  Xor = N0.getOperand(0);
2288  } else if (isBitwiseNot(N0.getOperand(1))) {
2289  A = N0.getOperand(0);
2290  Xor = N0.getOperand(1);
2291  }
2292 
2293  if (Xor)
2294  return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2295  }
2296  }
2297 
2298  if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2299  return Combined;
2300 
2301  if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2302  return Combined;
2303 
2304  return SDValue();
2305 }
2306 
2307 SDValue DAGCombiner::visitADD(SDNode *N) {
2308  SDValue N0 = N->getOperand(0);
2309  SDValue N1 = N->getOperand(1);
2310  EVT VT = N0.getValueType();
2311  SDLoc DL(N);
2312 
2313  if (SDValue Combined = visitADDLike(N))
2314  return Combined;
2315 
2316  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2317  return V;
2318 
2319  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2320  return V;
2321 
2322  // fold (a+b) -> (a|b) iff a and b share no bits.
2323  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2324  DAG.haveNoCommonBitsSet(N0, N1))
2325  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2326 
2327  return SDValue();
2328 }
2329 
2330 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2331  unsigned Opcode = N->getOpcode();
2332  SDValue N0 = N->getOperand(0);
2333  SDValue N1 = N->getOperand(1);
2334  EVT VT = N0.getValueType();
2335  SDLoc DL(N);
2336 
2337  // fold vector ops
2338  if (VT.isVector()) {
2339  // TODO SimplifyVBinOp
2340 
2341  // fold (add_sat x, 0) -> x, vector edition
2343  return N0;
2345  return N1;
2346  }
2347 
2348  // fold (add_sat x, undef) -> -1
2349  if (N0.isUndef() || N1.isUndef())
2350  return DAG.getAllOnesConstant(DL, VT);
2351 
2353  // canonicalize constant to RHS
2355  return DAG.getNode(Opcode, DL, VT, N1, N0);
2356  // fold (add_sat c1, c2) -> c3
2357  return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2358  N1.getNode());
2359  }
2360 
2361  // fold (add_sat x, 0) -> x
2362  if (isNullConstant(N1))
2363  return N0;
2364 
2365  // If it cannot overflow, transform into an add.
2366  if (Opcode == ISD::UADDSAT)
2367  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2368  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2369 
2370  return SDValue();
2371 }
2372 
2373 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2374  bool Masked = false;
2375 
2376  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2377  while (true) {
2378  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2379  V = V.getOperand(0);
2380  continue;
2381  }
2382 
2383  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2384  Masked = true;
2385  V = V.getOperand(0);
2386  continue;
2387  }
2388 
2389  break;
2390  }
2391 
2392  // If this is not a carry, return.
2393  if (V.getResNo() != 1)
2394  return SDValue();
2395 
2396  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2397  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2398  return SDValue();
2399 
2400  EVT VT = V.getNode()->getValueType(0);
2401  if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2402  return SDValue();
2403 
2404  // If the result is masked, then no matter what kind of bool it is we can
2405  // return. If it isn't, then we need to make sure the bool type is either 0 or
2406  // 1 and not other values.
2407  if (Masked ||
2408  TLI.getBooleanContents(V.getValueType()) ==
2410  return V;
2411 
2412  return SDValue();
2413 }
2414 
2415 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2416 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2417 /// the opcode and bypass the mask operation.
2418 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2419  SelectionDAG &DAG, const SDLoc &DL) {
2420  if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2421  return SDValue();
2422 
2423  EVT VT = N0.getValueType();
2424  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2425  return SDValue();
2426 
2427  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2428  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2429  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2430 }
2431 
2432 /// Helper for doing combines based on N0 and N1 being added to each other.
2433 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2434  SDNode *LocReference) {
2435  EVT VT = N0.getValueType();
2436  SDLoc DL(LocReference);
2437 
2438  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2439  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2441  return DAG.getNode(ISD::SUB, DL, VT, N0,
2442  DAG.getNode(ISD::SHL, DL, VT,
2443  N1.getOperand(0).getOperand(1),
2444  N1.getOperand(1)));
2445 
2446  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2447  return V;
2448 
2449  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2450  // rather than 'add 0/-1' (the zext should get folded).
2451  // add (sext i1 Y), X --> sub X, (zext i1 Y)
2452  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2453  N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2455  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2456  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2457  }
2458 
2459  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2460  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2461  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2462  if (TN->getVT() == MVT::i1) {
2463  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2464  DAG.getConstant(1, DL, VT));
2465  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2466  }
2467  }
2468 
2469  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2470  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2471  N1.getResNo() == 0)
2472  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2473  N0, N1.getOperand(0), N1.getOperand(2));
2474 
2475  // (add X, Carry) -> (addcarry X, 0, Carry)
2477  if (SDValue Carry = getAsCarry(TLI, N1))
2478  return DAG.getNode(ISD::ADDCARRY, DL,
2479  DAG.getVTList(VT, Carry.getValueType()), N0,
2480  DAG.getConstant(0, DL, VT), Carry);
2481 
2482  return SDValue();
2483 }
2484 
2485 SDValue DAGCombiner::visitADDC(SDNode *N) {
2486  SDValue N0 = N->getOperand(0);
2487  SDValue N1 = N->getOperand(1);
2488  EVT VT = N0.getValueType();
2489  SDLoc DL(N);
2490 
2491  // If the flag result is dead, turn this into an ADD.
2492  if (!N->hasAnyUseOfValue(1))
2493  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2494  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2495 
2496  // canonicalize constant to RHS.
2499  if (N0C && !N1C)
2500  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2501 
2502  // fold (addc x, 0) -> x + no carry out
2503  if (isNullConstant(N1))
2504  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2505  DL, MVT::Glue));
2506 
2507  // If it cannot overflow, transform into an add.
2508  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2509  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2510  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2511 
2512  return SDValue();
2513 }
2514 
2515 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2516  SelectionDAG &DAG, const TargetLowering &TLI) {
2517  EVT VT = V.getValueType();
2518 
2519  SDValue Cst;
2520  switch (TLI.getBooleanContents(VT)) {
2523  Cst = DAG.getConstant(1, DL, VT);
2524  break;
2526  Cst = DAG.getConstant(-1, DL, VT);
2527  break;
2528  }
2529 
2530  return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2531 }
2532 
2534  if (V.getOpcode() != ISD::XOR)
2535  return SDValue();
2536 
2537  ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2538  if (!Const)
2539  return SDValue();
2540 
2541  EVT VT = V.getValueType();
2542 
2543  bool IsFlip = false;
2544  switch(TLI.getBooleanContents(VT)) {
2546  IsFlip = Const->isOne();
2547  break;
2549  IsFlip = Const->isAllOnesValue();
2550  break;
2552  IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2553  break;
2554  }
2555 
2556  if (IsFlip)
2557  return V.getOperand(0);
2558  return SDValue();
2559 }
2560 
2561 SDValue DAGCombiner::visitADDO(SDNode *N) {
2562  SDValue N0 = N->getOperand(0);
2563  SDValue N1 = N->getOperand(1);
2564  EVT VT = N0.getValueType();
2565  bool IsSigned = (ISD::SADDO == N->getOpcode());
2566 
2567  EVT CarryVT = N->getValueType(1);
2568  SDLoc DL(N);
2569 
2570  // If the flag result is dead, turn this into an ADD.
2571  if (!N->hasAnyUseOfValue(1))
2572  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2573  DAG.getUNDEF(CarryVT));
2574 
2575  // canonicalize constant to RHS.
2578  return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2579 
2580  // fold (addo x, 0) -> x + no carry out
2581  if (isNullOrNullSplat(N1))
2582  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2583 
2584  if (!IsSigned) {
2585  // If it cannot overflow, transform into an add.
2586  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2587  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2588  DAG.getConstant(0, DL, CarryVT));
2589 
2590  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2591  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2592  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2593  DAG.getConstant(0, DL, VT), N0.getOperand(0));
2594  return CombineTo(N, Sub,
2595  flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2596  }
2597 
2598  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2599  return Combined;
2600 
2601  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2602  return Combined;
2603  }
2604 
2605  return SDValue();
2606 }
2607 
2608 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2609  EVT VT = N0.getValueType();
2610  if (VT.isVector())
2611  return SDValue();
2612 
2613  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2614  // If Y + 1 cannot overflow.
2615  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2616  SDValue Y = N1.getOperand(0);
2617  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2618  if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2619  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2620  N1.getOperand(2));
2621  }
2622 
2623  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2625  if (SDValue Carry = getAsCarry(TLI, N1))
2626  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2627  DAG.getConstant(0, SDLoc(N), VT), Carry);
2628 
2629  return SDValue();
2630 }
2631 
2632 SDValue DAGCombiner::visitADDE(SDNode *N) {
2633  SDValue N0 = N->getOperand(0);
2634  SDValue N1 = N->getOperand(1);
2635  SDValue CarryIn = N->getOperand(2);
2636 
2637  // canonicalize constant to RHS
2640  if (N0C && !N1C)
2641  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2642  N1, N0, CarryIn);
2643 
2644  // fold (adde x, y, false) -> (addc x, y)
2645  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2646  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2647 
2648  return SDValue();
2649 }
2650 
2651 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2652  SDValue N0 = N->getOperand(0);
2653  SDValue N1 = N->getOperand(1);
2654  SDValue CarryIn = N->getOperand(2);
2655  SDLoc DL(N);
2656 
2657  // canonicalize constant to RHS
2660  if (N0C && !N1C)
2661  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2662 
2663  // fold (addcarry x, y, false) -> (uaddo x, y)
2664  if (isNullConstant(CarryIn)) {
2665  if (!LegalOperations ||
2667  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2668  }
2669 
2670  EVT CarryVT = CarryIn.getValueType();
2671 
2672  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2673  if (isNullConstant(N0) && isNullConstant(N1)) {
2674  EVT VT = N0.getValueType();
2675  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2676  AddToWorklist(CarryExt.getNode());
2677  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2678  DAG.getConstant(1, DL, VT)),
2679  DAG.getConstant(0, DL, CarryVT));
2680  }
2681 
2682  // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2683  if (isBitwiseNot(N0) && isNullConstant(N1)) {
2684  if (SDValue B = extractBooleanFlip(CarryIn, TLI)) {
2685  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2686  DAG.getConstant(0, DL, N0.getValueType()),
2687  N0.getOperand(0), B);
2688  return CombineTo(N, Sub,
2689  flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2690  }
2691  }
2692 
2693  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2694  return Combined;
2695 
2696  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2697  return Combined;
2698 
2699  return SDValue();
2700 }
2701 
2702 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2703  SDNode *N) {
2704  // Iff the flag result is dead:
2705  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2706  if ((N0.getOpcode() == ISD::ADD ||
2707  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2708  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2709  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2710  N0.getOperand(0), N0.getOperand(1), CarryIn);
2711 
2712  /**
2713  * When one of the addcarry argument is itself a carry, we may be facing
2714  * a diamond carry propagation. In which case we try to transform the DAG
2715  * to ensure linear carry propagation if that is possible.
2716  *
2717  * We are trying to get:
2718  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2719  */
2720  if (auto Y = getAsCarry(TLI, N1)) {
2721  /**
2722  * (uaddo A, B)
2723  * / \
2724  * Carry Sum
2725  * | \
2726  * | (addcarry *, 0, Z)
2727  * | /
2728  * \ Carry
2729  * | /
2730  * (addcarry X, *, *)
2731  */
2732  if (Y.getOpcode() == ISD::UADDO &&
2733  CarryIn.getResNo() == 1 &&
2734  CarryIn.getOpcode() == ISD::ADDCARRY &&
2735  isNullConstant(CarryIn.getOperand(1)) &&
2736  CarryIn.getOperand(0) == Y.getValue(0)) {
2737  auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2738  Y.getOperand(0), Y.getOperand(1),
2739  CarryIn.getOperand(2));
2740  AddToWorklist(NewY.getNode());
2741  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2742  DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2743  NewY.getValue(1));
2744  }
2745  }
2746 
2747  return SDValue();
2748 }
2749 
2750 // Since it may not be valid to emit a fold to zero for vector initializers
2751 // check if we can before folding.
2752 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2753  SelectionDAG &DAG, bool LegalOperations) {
2754  if (!VT.isVector())
2755  return DAG.getConstant(0, DL, VT);
2756  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2757  return DAG.getConstant(0, DL, VT);
2758  return SDValue();
2759 }
2760 
2761 SDValue DAGCombiner::visitSUB(SDNode *N) {
2762  SDValue N0 = N->getOperand(0);
2763  SDValue N1 = N->getOperand(1);
2764  EVT VT = N0.getValueType();
2765  SDLoc DL(N);
2766 
2767  // fold vector ops
2768  if (VT.isVector()) {
2769  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2770  return FoldedVOp;
2771 
2772  // fold (sub x, 0) -> x, vector edition
2774  return N0;
2775  }
2776 
2777  // fold (sub x, x) -> 0
2778  // FIXME: Refactor this and xor and other similar operations together.
2779  if (N0 == N1)
2780  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2783  // fold (sub c1, c2) -> c1-c2
2784  return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2785  N1.getNode());
2786  }
2787 
2788  if (SDValue NewSel = foldBinOpIntoSelect(N))
2789  return NewSel;
2790 
2792 
2793  // fold (sub x, c) -> (add x, -c)
2794  if (N1C) {
2795  return DAG.getNode(ISD::ADD, DL, VT, N0,
2796  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2797  }
2798 
2799  if (isNullOrNullSplat(N0)) {
2800  unsigned BitWidth = VT.getScalarSizeInBits();
2801  // Right-shifting everything out but the sign bit followed by negation is
2802  // the same as flipping arithmetic/logical shift type without the negation:
2803  // -(X >>u 31) -> (X >>s 31)
2804  // -(X >>s 31) -> (X >>u 31)
2805  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2806  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2807  if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2808  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2809  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2810  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2811  }
2812  }
2813 
2814  // 0 - X --> 0 if the sub is NUW.
2815  if (N->getFlags().hasNoUnsignedWrap())
2816  return N0;
2817 
2818  if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2819  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2820  // N1 must be 0 because negating the minimum signed value is undefined.
2821  if (N->getFlags().hasNoSignedWrap())
2822  return N0;
2823 
2824  // 0 - X --> X if X is 0 or the minimum signed value.
2825  return N1;
2826  }
2827  }
2828 
2829  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2830  if (isAllOnesOrAllOnesSplat(N0))
2831  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2832 
2833  // fold (A - (0-B)) -> A+B
2834  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2835  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2836 
2837  // fold A-(A-B) -> B
2838  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2839  return N1.getOperand(1);
2840 
2841  // fold (A+B)-A -> B
2842  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2843  return N0.getOperand(1);
2844 
2845  // fold (A+B)-B -> A
2846  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2847  return N0.getOperand(0);
2848 
2849  // fold C2-(A+C1) -> (C2-C1)-A
2850  if (N1.getOpcode() == ISD::ADD) {
2851  SDValue N11 = N1.getOperand(1);
2852  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2853  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2854  SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2855  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2856  }
2857  }
2858 
2859  // fold ((A+(B+or-C))-B) -> A+or-C
2860  if (N0.getOpcode() == ISD::ADD &&
2861  (N0.getOperand(1).getOpcode() == ISD::SUB ||
2862  N0.getOperand(1).getOpcode() == ISD::ADD) &&
2863  N0.getOperand(1).getOperand(0) == N1)
2864  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2865  N0.getOperand(1).getOperand(1));
2866 
2867  // fold ((A+(C+B))-B) -> A+C
2868  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2869  N0.getOperand(1).getOperand(1) == N1)
2870  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2871  N0.getOperand(1).getOperand(0));
2872 
2873  // fold ((A-(B-C))-C) -> A-B
2874  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2875  N0.getOperand(1).getOperand(1) == N1)
2876  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2877  N0.getOperand(1).getOperand(0));
2878 
2879  // fold (A-(B-C)) -> A+(C-B)
2880  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2881  return DAG.getNode(ISD::ADD, DL, VT, N0,
2882  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2883  N1.getOperand(0)));
2884 
2885  // fold (X - (-Y * Z)) -> (X + (Y * Z))
2886  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2887  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2889  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2890  N1.getOperand(0).getOperand(1),
2891  N1.getOperand(1));
2892  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2893  }
2894  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2896  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2897  N1.getOperand(0),
2898  N1.getOperand(1).getOperand(1));
2899  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2900  }
2901  }
2902 
2903  // If either operand of a sub is undef, the result is undef
2904  if (N0.isUndef())
2905  return N0;
2906  if (N1.isUndef())
2907  return N1;
2908 
2909  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2910  return V;
2911 
2912  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2913  return V;
2914 
2915  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
2916  return V;
2917 
2918  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
2919  // rather than 'sub 0/1' (the sext should get folded).
2920  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
2921  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
2922  N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
2923  TLI.getBooleanContents(VT) ==
2925  SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
2926  return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
2927  }
2928 
2929  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2930  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2931  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2932  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2933  SDValue S0 = N1.getOperand(0);
2934  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2935  unsigned OpSizeInBits = VT.getScalarSizeInBits();
2937  if (C->getAPIntValue() == (OpSizeInBits - 1))
2938  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2939  }
2940  }
2941  }
2942 
2943  // If the relocation model supports it, consider symbol offsets.
2944  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2945  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2946  // fold (sub Sym, c) -> Sym-c
2947  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2948  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2949  GA->getOffset() -
2950  (uint64_t)N1C->getSExtValue());
2951  // fold (sub Sym+c1, Sym+c2) -> c1-c2
2952  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2953  if (GA->getGlobal() == GB->getGlobal())
2954  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2955  DL, VT);
2956  }
2957 
2958  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2959  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2960  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2961  if (TN->getVT() == MVT::i1) {
2962  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2963  DAG.getConstant(1, DL, VT));
2964  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2965  }
2966  }
2967 
2968  // Prefer an add for more folding potential and possibly better codegen:
2969  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2970  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2971  SDValue ShAmt = N1.getOperand(1);
2972  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2973  if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2974  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2975  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2976  }
2977  }
2978 
2979  return SDValue();
2980 }
2981 
2982 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
2983  SDValue N0 = N->getOperand(0);
2984  SDValue N1 = N->getOperand(1);
2985  EVT VT = N0.getValueType();
2986  SDLoc DL(N);
2987 
2988  // fold vector ops
2989  if (VT.isVector()) {
2990  // TODO SimplifyVBinOp
2991 
2992  // fold (sub_sat x, 0) -> x, vector edition
2994  return N0;
2995  }
2996 
2997  // fold (sub_sat x, undef) -> 0
2998  if (N0.isUndef() || N1.isUndef())
2999  return DAG.getConstant(0, DL, VT);
3000 
3001  // fold (sub_sat x, x) -> 0
3002  if (N0 == N1)
3003  return DAG.getConstant(0, DL, VT);
3004 
3007  // fold (sub_sat c1, c2) -> c3
3008  return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
3009  N1.getNode());
3010  }
3011 
3012  // fold (sub_sat x, 0) -> x
3013  if (isNullConstant(N1))
3014  return N0;
3015 
3016  return SDValue();
3017 }
3018 
3019 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3020  SDValue N0 = N->getOperand(0);
3021  SDValue N1 = N->getOperand(1);
3022  EVT VT = N0.getValueType();
3023  SDLoc DL(N);
3024 
3025  // If the flag result is dead, turn this into an SUB.
3026  if (!N->hasAnyUseOfValue(1))
3027  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3028  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3029 
3030  // fold (subc x, x) -> 0 + no borrow
3031  if (N0 == N1)
3032  return CombineTo(N, DAG.getConstant(0, DL, VT),
3033  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3034 
3035  // fold (subc x, 0) -> x + no borrow
3036  if (isNullConstant(N1))
3037  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3038 
3039  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3040  if (isAllOnesConstant(N0))
3041  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3042  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3043 
3044  return SDValue();
3045 }
3046 
3047 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3048  SDValue N0 = N->getOperand(0);
3049  SDValue N1 = N->getOperand(1);
3050  EVT VT = N0.getValueType();
3051  bool IsSigned = (ISD::SSUBO == N->getOpcode());
3052 
3053  EVT CarryVT = N->getValueType(1);
3054  SDLoc DL(N);
3055 
3056  // If the flag result is dead, turn this into an SUB.
3057  if (!N->hasAnyUseOfValue(1))
3058  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3059  DAG.getUNDEF(CarryVT));
3060 
3061  // fold (subo x, x) -> 0 + no borrow
3062  if (N0 == N1)
3063  return CombineTo(N, DAG.getConstant(0, DL, VT),
3064  DAG.getConstant(0, DL, CarryVT));
3065 
3067 
3068  // fold (subox, c) -> (addo x, -c)
3069  if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3070  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3071  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3072  }
3073 
3074  // fold (subo x, 0) -> x + no borrow
3075  if (isNullOrNullSplat(N1))
3076  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3077 
3078  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3079  if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3080  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3081  DAG.getConstant(0, DL, CarryVT));
3082 
3083  return SDValue();
3084 }
3085 
3086 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3087  SDValue N0 = N->getOperand(0);
3088  SDValue N1 = N->getOperand(1);
3089  SDValue CarryIn = N->getOperand(2);
3090 
3091  // fold (sube x, y, false) -> (subc x, y)
3092  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3093  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3094 
3095  return SDValue();
3096 }
3097 
3098 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3099  SDValue N0 = N->getOperand(0);
3100  SDValue N1 = N->getOperand(1);
3101  SDValue CarryIn = N->getOperand(2);
3102 
3103  // fold (subcarry x, y, false) -> (usubo x, y)
3104  if (isNullConstant(CarryIn)) {
3105  if (!LegalOperations ||
3107  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3108  }
3109 
3110  return SDValue();
3111 }
3112 
3113 SDValue DAGCombiner::visitMUL(SDNode *N) {
3114  SDValue N0 = N->getOperand(0);
3115  SDValue N1 = N->getOperand(1);
3116  EVT VT = N0.getValueType();
3117 
3118  // fold (mul x, undef) -> 0
3119  if (N0.isUndef() || N1.isUndef())
3120  return DAG.getConstant(0, SDLoc(N), VT);
3121 
3122  bool N0IsConst = false;
3123  bool N1IsConst = false;
3124  bool N1IsOpaqueConst = false;
3125  bool N0IsOpaqueConst = false;
3126  APInt ConstValue0, ConstValue1;
3127  // fold vector ops
3128  if (VT.isVector()) {
3129  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3130  return FoldedVOp;
3131 
3132  N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3133  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3134  assert((!N0IsConst ||
3135  ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3136  "Splat APInt should be element width");
3137  assert((!N1IsConst ||
3138  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3139  "Splat APInt should be element width");
3140  } else {
3141  N0IsConst = isa<ConstantSDNode>(N0);
3142  if (N0IsConst) {
3143  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3144  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3145  }
3146  N1IsConst = isa<ConstantSDNode>(N1);
3147  if (N1IsConst) {
3148  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3149  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3150  }
3151  }
3152 
3153  // fold (mul c1, c2) -> c1*c2
3154  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
3155  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3156  N0.getNode(), N1.getNode());
3157 
3158  // canonicalize constant to RHS (vector doesn't have to splat)
3161  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3162  // fold (mul x, 0) -> 0
3163  if (N1IsConst && ConstValue1.isNullValue())
3164  return N1;
3165  // fold (mul x, 1) -> x
3166  if (N1IsConst && ConstValue1.isOneValue())
3167  return N0;
3168 
3169  if (SDValue NewSel = foldBinOpIntoSelect(N))
3170  return NewSel;
3171 
3172  // fold (mul x, -1) -> 0-x
3173  if (N1IsConst && ConstValue1.isAllOnesValue()) {
3174  SDLoc DL(N);
3175  return DAG.getNode(ISD::SUB, DL, VT,
3176  DAG.getConstant(0, DL, VT), N0);
3177  }
3178  // fold (mul x, (1 << c)) -> x << c
3179  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3180  DAG.isKnownToBeAPowerOfTwo(N1) &&
3181  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3182  SDLoc DL(N);
3183  SDValue LogBase2 = BuildLogBase2(N1, DL);
3184  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3185  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3186  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3187  }
3188  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3189  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3190  unsigned Log2Val = (-ConstValue1).logBase2();
3191  SDLoc DL(N);
3192  // FIXME: If the input is something that is easily negated (e.g. a
3193  // single-use add), we should put the negate there.
3194  return DAG.getNode(ISD::SUB, DL, VT,
3195  DAG.getConstant(0, DL, VT),
3196  DAG.getNode(ISD::SHL, DL, VT, N0,
3197  DAG.getConstant(Log2Val, DL,
3198  getShiftAmountTy(N0.getValueType()))));
3199  }
3200 
3201  // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3202  // mul x, (2^N + 1) --> add (shl x, N), x
3203  // mul x, (2^N - 1) --> sub (shl x, N), x
3204  // Examples: x * 33 --> (x << 5) + x
3205  // x * 15 --> (x << 4) - x
3206  // x * -33 --> -((x << 5) + x)
3207  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3208  if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
3209  // TODO: We could handle more general decomposition of any constant by
3210  // having the target set a limit on number of ops and making a
3211  // callback to determine that sequence (similar to sqrt expansion).
3212  unsigned MathOp = ISD::DELETED_NODE;
3213  APInt MulC = ConstValue1.abs();
3214  if ((MulC - 1).isPowerOf2())
3215  MathOp = ISD::ADD;
3216  else if ((MulC + 1).isPowerOf2())
3217  MathOp = ISD::SUB;
3218 
3219  if (MathOp != ISD::DELETED_NODE) {
3220  unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
3221  : (MulC + 1).logBase2();
3222  assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
3223  "Not expecting multiply-by-constant that could have simplified");
3224  SDLoc DL(N);
3225  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
3226  DAG.getConstant(ShAmt, DL, VT));
3227  SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3228  if (ConstValue1.isNegative())
3229  R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3230  return R;
3231  }
3232  }
3233 
3234  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3235  if (N0.getOpcode() == ISD::SHL &&
3236  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3237  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3238  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3240  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3241  }
3242 
3243  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3244  // use.
3245  {
3246  SDValue Sh(nullptr, 0), Y(nullptr, 0);
3247 
3248  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3249  if (N0.getOpcode() == ISD::SHL &&
3251  N0.getNode()->hasOneUse()) {
3252  Sh = N0; Y = N1;
3253  } else if (N1.getOpcode() == ISD::SHL &&
3255  N1.getNode()->hasOneUse()) {
3256  Sh = N1; Y = N0;
3257  }
3258 
3259  if (Sh.getNode()) {
3260  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3261  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3262  }
3263  }
3264 
3265  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3267  N0.getOpcode() == ISD::ADD &&
3269  isMulAddWithConstProfitable(N, N0, N1))
3270  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3271  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3272  N0.getOperand(0), N1),
3273  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3274  N0.getOperand(1), N1));
3275 
3276  // reassociate mul
3277  if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3278  return RMUL;
3279 
3280  return SDValue();
3281 }
3282 
3283 /// Return true if divmod libcall is available.
3284 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3285  const TargetLowering &TLI) {
3286  RTLIB::Libcall LC;
3287  EVT NodeType = Node->getValueType(0);
3288  if (!NodeType.isSimple())
3289  return false;
3290  switch (NodeType.getSimpleVT().SimpleTy) {
3291  default: return false; // No libcall for vector types.
3292  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3293  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3294  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3295  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3296  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3297  }
3298 
3299  return TLI.getLibcallName(LC) != nullptr;
3300 }
3301 
3302 /// Issue divrem if both quotient and remainder are needed.
3303 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3304  if (Node->use_empty())
3305  return SDValue(); // This is a dead node, leave it alone.
3306 
3307  unsigned Opcode = Node->getOpcode();
3308  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3309  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3310 
3311  // DivMod lib calls can still work on non-legal types if using lib-calls.
3312  EVT VT = Node->getValueType(0);
3313  if (VT.isVector() || !VT.isInteger())
3314  return SDValue();
3315 
3316  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3317  return SDValue();
3318 
3319  // If DIVREM is going to get expanded into a libcall,
3320  // but there is no libcall available, then don't combine.
3321  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3322  !isDivRemLibcallAvailable(Node, isSigned, TLI))
3323  return SDValue();
3324 
3325  // If div is legal, it's better to do the normal expansion
3326  unsigned OtherOpcode = 0;
3327  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3328  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3329  if (TLI.isOperationLegalOrCustom(Opcode, VT))
3330  return SDValue();
3331  } else {
3332  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3333  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3334  return SDValue();
3335  }
3336 
3337  SDValue Op0 = Node->getOperand(0);
3338  SDValue Op1 = Node->getOperand(1);
3339  SDValue combined;
3340  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3341  UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3342  SDNode *User = *UI;
3343  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3344  User->use_empty())
3345  continue;
3346  // Convert the other matching node(s), too;
3347  // otherwise, the DIVREM may get target-legalized into something
3348  // target-specific that we won't be able to recognize.
3349  unsigned UserOpc = User->getOpcode();
3350  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3351  User->getOperand(0) == Op0 &&
3352  User->getOperand(1) == Op1) {
3353  if (!combined) {
3354  if (UserOpc == OtherOpcode) {
3355  SDVTList VTs = DAG.getVTList(VT, VT);
3356  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3357  } else if (UserOpc == DivRemOpc) {
3358  combined = SDValue(User, 0);
3359  } else {
3360  assert(UserOpc == Opcode);
3361  continue;
3362  }
3363  }
3364  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3365  CombineTo(User, combined);
3366  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3367  CombineTo(User, combined.getValue(1));
3368  }
3369  }
3370  return combined;
3371 }
3372 
3374  SDValue N0 = N->getOperand(0);
3375  SDValue N1 = N->getOperand(1);
3376  EVT VT = N->getValueType(0);
3377  SDLoc DL(N);
3378 
3379  unsigned Opc = N->getOpcode();
3380  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3382 
3383  // X / undef -> undef
3384  // X % undef -> undef
3385  // X / 0 -> undef
3386  // X % 0 -> undef
3387  // NOTE: This includes vectors where any divisor element is zero/undef.
3388  if (DAG.isUndef(Opc, {N0, N1}))
3389  return DAG.getUNDEF(VT);
3390 
3391  // undef / X -> 0
3392  // undef % X -> 0
3393  if (N0.isUndef())
3394  return DAG.getConstant(0, DL, VT);
3395 
3396  // 0 / X -> 0
3397  // 0 % X -> 0
3399  if (N0C && N0C->isNullValue())
3400  return N0;
3401 
3402  // X / X -> 1
3403  // X % X -> 0
3404  if (N0 == N1)
3405  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3406 
3407  // X / 1 -> X
3408  // X % 1 -> 0
3409  // If this is a boolean op (single-bit element type), we can't have
3410  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3411  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3412  // it's a 1.
3413  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3414  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3415 
3416  return SDValue();
3417 }
3418 
3419 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3420  SDValue N0 = N->getOperand(0);
3421  SDValue N1 = N->getOperand(1);
3422  EVT VT = N->getValueType(0);
3423  EVT CCVT = getSetCCResultType(VT);
3424 
3425  // fold vector ops
3426  if (VT.isVector())
3427  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3428  return FoldedVOp;
3429 
3430  SDLoc DL(N);
3431 
3432  // fold (sdiv c1, c2) -> c1/c2
3435  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3436  return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3437  // fold (sdiv X, -1) -> 0-X
3438  if (N1C && N1C->isAllOnesValue())
3439  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3440  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3441  if (N1C && N1C->getAPIntValue().isMinSignedValue())
3442  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3443  DAG.getConstant(1, DL, VT),
3444  DAG.getConstant(0, DL, VT));
3445 
3446  if (SDValue V = simplifyDivRem(N, DAG))
3447  return V;
3448 
3449  if (SDValue NewSel = foldBinOpIntoSelect(N))
3450  return NewSel;
3451 
3452  // If we know the sign bits of both operands are zero, strength reduce to a
3453  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3454  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3455  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3456 
3457  if (SDValue V = visitSDIVLike(N0, N1, N)) {
3458  // If the corresponding remainder node exists, update its users with
3459  // (Dividend - (Quotient * Divisor).
3460  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3461  { N0, N1 })) {
3462  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3463  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3464  AddToWorklist(Mul.getNode());
3465  AddToWorklist(Sub.getNode());
3466  CombineTo(RemNode, Sub);
3467  }
3468  return V;
3469  }
3470 
3471  // sdiv, srem -> sdivrem
3472  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3473  // true. Otherwise, we break the simplification logic in visitREM().
3475  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3476  if (SDValue DivRem = useDivRem(N))
3477  return DivRem;
3478 
3479  return SDValue();
3480 }
3481 
3482 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3483  SDLoc DL(N);
3484  EVT VT = N->getValueType(0);
3485  EVT CCVT = getSetCCResultType(VT);
3486  unsigned BitWidth = VT.getScalarSizeInBits();
3487 
3488  // Helper for determining whether a value is a power-2 constant scalar or a
3489  // vector of such elements.
3490  auto IsPowerOfTwo = [](ConstantSDNode *C) {
3491  if (C->isNullValue() || C->isOpaque())
3492  return false;
3493  if (C->getAPIntValue().isPowerOf2())
3494  return true;
3495  if ((-C->getAPIntValue()).isPowerOf2())
3496  return true;
3497  return false;
3498  };
3499 
3500  // fold (sdiv X, pow2) -> simple ops after legalize
3501  // FIXME: We check for the exact bit here because the generic lowering gives
3502  // better results in that case. The target-specific lowering should learn how
3503  // to handle exact sdivs efficiently.
3504  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3505  // Target-specific implementation of sdiv x, pow2.
3506  if (SDValue Res = BuildSDIVPow2(N))
3507  return Res;
3508 
3509  // Create constants that are functions of the shift amount value.
3510  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3511  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3512  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3513  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3514  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3515  if (!isConstantOrConstantVector(Inexact))
3516  return SDValue();
3517 
3518  // Splat the sign bit into the register
3519  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3520  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3521  AddToWorklist(Sign.getNode());
3522 
3523  // Add (N0 < 0) ? abs2 - 1 : 0;
3524  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3525  AddToWorklist(Srl.getNode());
3526  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3527  AddToWorklist(Add.getNode());
3528  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3529  AddToWorklist(Sra.getNode());
3530 
3531  // Special case: (sdiv X, 1) -> X
3532  // Special Case: (sdiv X, -1) -> 0-X
3533  SDValue One = DAG.getConstant(1, DL, VT);
3534  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3535  SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3536  SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3537  SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3538  Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3539 
3540  // If dividing by a positive value, we're done. Otherwise, the result must
3541  // be negated.
3542  SDValue Zero = DAG.getConstant(0, DL, VT);
3543  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3544 
3545  // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3546  SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3547  SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3548  return Res;
3549  }
3550 
3551  // If integer divide is expensive and we satisfy the requirements, emit an
3552  // alternate sequence. Targets may check function attributes for size/speed
3553  // trade-offs.
3555  if (isConstantOrConstantVector(N1) &&
3556  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3557  if (SDValue Op = BuildSDIV(N))
3558  return Op;
3559 
3560  return SDValue();
3561 }
3562 
3563 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3564  SDValue N0 = N->getOperand(0);
3565  SDValue N1 = N->getOperand(1);
3566  EVT VT = N->getValueType(0);
3567  EVT CCVT = getSetCCResultType(VT);
3568 
3569  // fold vector ops
3570  if (VT.isVector())
3571  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3572  return FoldedVOp;
3573 
3574  SDLoc DL(N);
3575 
3576  // fold (udiv c1, c2) -> c1/c2
3579  if (N0C && N1C)
3580  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3581  N0C, N1C))
3582  return Folded;
3583  // fold (udiv X, -1) -> select(X == -1, 1, 0)
3584  if (N1C && N1C->getAPIntValue().isAllOnesValue())
3585  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3586  DAG.getConstant(1, DL, VT),
3587  DAG.getConstant(0, DL, VT));
3588 
3589  if (SDValue V = simplifyDivRem(N, DAG))
3590  return V;
3591 
3592  if (SDValue NewSel = foldBinOpIntoSelect(N))
3593  return NewSel;
3594 
3595  if (SDValue V = visitUDIVLike(N0, N1, N)) {
3596  // If the corresponding remainder node exists, update its users with
3597  // (Dividend - (Quotient * Divisor).
3598  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3599  { N0, N1 })) {
3600  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3601  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3602  AddToWorklist(Mul.getNode());
3603  AddToWorklist(Sub.getNode());
3604  CombineTo(RemNode, Sub);
3605  }
3606  return V;
3607  }
3608 
3609  // sdiv, srem -> sdivrem
3610  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3611  // true. Otherwise, we break the simplification logic in visitREM().
3613  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3614  if (SDValue DivRem = useDivRem(N))
3615  return DivRem;
3616 
3617  return SDValue();
3618 }
3619 
3620 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3621  SDLoc DL(N);
3622  EVT VT = N->getValueType(0);
3623 
3624  // fold (udiv x, (1 << c)) -> x >>u c
3625  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3626  DAG.isKnownToBeAPowerOfTwo(N1)) {
3627  SDValue LogBase2 = BuildLogBase2(N1, DL);
3628  AddToWorklist(LogBase2.getNode());
3629 
3630  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3631  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3632  AddToWorklist(Trunc.getNode());
3633  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3634  }
3635 
3636  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3637  if (N1.getOpcode() == ISD::SHL) {
3638  SDValue N10 = N1.getOperand(0);
3639  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3640  DAG.isKnownToBeAPowerOfTwo(N10)) {
3641  SDValue LogBase2 = BuildLogBase2(N10, DL);
3642  AddToWorklist(LogBase2.getNode());
3643 
3644  EVT ADDVT = N1.getOperand(1).getValueType();
3645  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3646  AddToWorklist(Trunc.getNode());
3647  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3648  AddToWorklist(Add.getNode());
3649  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3650  }
3651  }
3652 
3653  // fold (udiv x, c) -> alternate
3655  if (isConstantOrConstantVector(N1) &&
3656  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3657  if (SDValue Op = BuildUDIV(N))
3658  return Op;
3659 
3660  return SDValue();
3661 }
3662 
3663 // handles ISD::SREM and ISD::UREM
3664 SDValue DAGCombiner::visitREM(SDNode *N) {
3665  unsigned Opcode = N->getOpcode();
3666  SDValue N0 = N->getOperand(0);
3667  SDValue N1 = N->getOperand(1);
3668  EVT VT = N->getValueType(0);
3669  EVT CCVT = getSetCCResultType(VT);
3670 
3671  bool isSigned = (Opcode == ISD::SREM);
3672  SDLoc DL(N);
3673 
3674  // fold (rem c1, c2) -> c1%c2
3677  if (N0C && N1C)
3678  if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3679  return Folded;
3680  // fold (urem X, -1) -> select(X == -1, 0, x)
3681  if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3682  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3683  DAG.getConstant(0, DL, VT), N0);
3684 
3685  if (SDValue V = simplifyDivRem(N, DAG))
3686  return V;
3687 
3688  if (SDValue NewSel = foldBinOpIntoSelect(N))
3689  return NewSel;
3690 
3691  if (isSigned) {
3692  // If we know the sign bits of both operands are zero, strength reduce to a
3693  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3694  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3695  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3696  } else {
3697  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3698  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3699  // fold (urem x, pow2) -> (and x, pow2-1)
3700  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3701  AddToWorklist(Add.getNode());
3702  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3703  }
3704  if (N1.getOpcode() == ISD::SHL &&
3705  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3706  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3707  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3708  AddToWorklist(Add.getNode());
3709  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3710  }
3711  }
3712 
3714 
3715  // If X/C can be simplified by the division-by-constant logic, lower
3716  // X%C to the equivalent of X-X/C*C.
3717  // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3718  // speculative DIV must not cause a DIVREM conversion. We guard against this
3719  // by skipping the simplification if isIntDivCheap(). When div is not cheap,
3720  // combine will not return a DIVREM. Regardless, checking cheapness here
3721  // makes sense since the simplification results in fatter code.
3722  if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3723  SDValue OptimizedDiv =
3724  isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3725  if (OptimizedDiv.getNode()) {
3726  // If the equivalent Div node also exists, update its users.
3727  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3728  if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
3729  { N0, N1 }))
3730  CombineTo(DivNode, OptimizedDiv);
3731  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3732  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3733  AddToWorklist(OptimizedDiv.getNode());
3734  AddToWorklist(Mul.getNode());
3735  return Sub;
3736  }
3737  }
3738 
3739  // sdiv, srem -> sdivrem
3740  if (SDValue DivRem = useDivRem(N))
3741  return DivRem.getValue(1);
3742 
3743  return SDValue();
3744 }
3745 
3746 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3747  SDValue N0 = N->getOperand(0);
3748  SDValue N1 = N->getOperand(1);
3749  EVT VT = N->getValueType(0);
3750  SDLoc DL(N);
3751 
3752  if (VT.isVector()) {
3753  // fold (mulhs x, 0) -> 0
3755  return N1;
3757  return N0;
3758  }
3759 
3760  // fold (mulhs x, 0) -> 0
3761  if (isNullConstant(N1))
3762  return N1;
3763  // fold (mulhs x, 1) -> (sra x, size(x)-1)
3764  if (isOneConstant(N1))
3765  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3766  DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3767  getShiftAmountTy(N0.getValueType())));
3768 
3769  // fold (mulhs x, undef) -> 0
3770  if (N0.isUndef() || N1.isUndef())
3771  return DAG.getConstant(0, DL, VT);
3772 
3773  // If the type twice as wide is legal, transform the mulhs to a wider multiply
3774  // plus a shift.
3775  if (VT.isSimple() && !VT.isVector()) {
3776  MVT Simple = VT.getSimpleVT();
3777  unsigned SimpleSize = Simple.getSizeInBits();
3778  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3779  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3780  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3781  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3782  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3783  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3784  DAG.getConstant(SimpleSize, DL,
3785  getShiftAmountTy(N1.getValueType())));
3786  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3787  }
3788  }
3789 
3790  return SDValue();
3791 }
3792 
3793 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3794  SDValue N0 = N->getOperand(0);
3795  SDValue N1 = N->getOperand(1);
3796  EVT VT = N->getValueType(0);
3797  SDLoc DL(N);
3798 
3799  if (VT.isVector()) {
3800  // fold (mulhu x, 0) -> 0
3802  return N1;
3804  return N0;
3805  }
3806 
3807  // fold (mulhu x, 0) -> 0
3808  if (isNullConstant(N1))
3809  return N1;
3810  // fold (mulhu x, 1) -> 0
3811  if (isOneConstant(N1))
3812  return DAG.getConstant(0, DL, N0.getValueType());
3813  // fold (mulhu x, undef) -> 0
3814  if (N0.isUndef() || N1.isUndef())
3815  return DAG.getConstant(0, DL, VT);
3816 
3817  // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3818  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3819  DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3820  unsigned NumEltBits = VT.getScalarSizeInBits();
3821  SDValue LogBase2 = BuildLogBase2(N1, DL);
3822  SDValue SRLAmt = DAG.getNode(
3823  ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3824  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3825  SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3826  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3827  }
3828 
3829  // If the type twice as wide is legal, transform the mulhu to a wider multiply
3830  // plus a shift.
3831  if (VT.isSimple() && !VT.isVector()) {
3832  MVT Simple = VT.getSimpleVT();
3833  unsigned SimpleSize = Simple.getSizeInBits();
3834  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3835  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3836  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3837  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3838  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3839  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3840  DAG.getConstant(SimpleSize, DL,
3841  getShiftAmountTy(N1.getValueType())));
3842  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3843  }
3844  }
3845 
3846  return SDValue();
3847 }
3848 
3849 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3850 /// give the opcodes for the two computations that are being performed. Return
3851 /// true if a simplification was made.
3852 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3853  unsigned HiOp) {
3854  // If the high half is not needed, just compute the low half.
3855  bool HiExists = N->hasAnyUseOfValue(1);
3856  if (!HiExists && (!LegalOperations ||
3857  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3858  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3859  return CombineTo(N, Res, Res);
3860  }
3861 
3862  // If the low half is not needed, just compute the high half.
3863  bool LoExists = N->hasAnyUseOfValue(0);
3864  if (!LoExists && (!LegalOperations ||
3865  TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
3866  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3867  return CombineTo(N, Res, Res);
3868  }
3869 
3870  // If both halves are used, return as it is.
3871  if (LoExists && HiExists)
3872  return SDValue();
3873 
3874  // If the two computed results can be simplified separately, separate them.
3875  if (LoExists) {
3876  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3877  AddToWorklist(Lo.getNode());
3878  SDValue LoOpt = combine(Lo.getNode());
3879  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3880  (!LegalOperations ||
3881  TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
3882  return CombineTo(N, LoOpt, LoOpt);
3883  }
3884 
3885  if (HiExists) {
3886  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3887  AddToWorklist(Hi.getNode());
3888  SDValue HiOpt = combine(Hi.getNode());
3889  if (HiOpt.getNode() && HiOpt != Hi &&
3890  (!LegalOperations ||
3891  TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
3892  return CombineTo(N, HiOpt, HiOpt);
3893  }
3894 
3895  return SDValue();
3896 }
3897 
3898 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3899  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3900  return Res;
3901 
3902  EVT VT = N->getValueType(0);
3903  SDLoc DL(N);
3904 
3905  // If the type is twice as wide is legal, transform the mulhu to a wider
3906  // multiply plus a shift.
3907  if (VT.isSimple() && !VT.isVector()) {
3908  MVT Simple = VT.getSimpleVT();
3909  unsigned SimpleSize = Simple.getSizeInBits();
3910  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3911  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3912  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3913  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3914  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3915  // Compute the high part as N1.
3916  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3917  DAG.getConstant(SimpleSize, DL,
3918  getShiftAmountTy(Lo.getValueType())));
3919  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3920  // Compute the low part as N0.
3921  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3922  return CombineTo(N, Lo, Hi);
3923  }
3924  }
3925 
3926  return SDValue();
3927 }
3928 
3929 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3930  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3931  return Res;
3932 
3933  EVT VT = N->getValueType(0);
3934  SDLoc DL(N);
3935 
3936  // If the type is twice as wide is legal, transform the mulhu to a wider
3937  // multiply plus a shift.
3938  if (VT.isSimple() && !VT.isVector()) {
3939  MVT Simple = VT.getSimpleVT();
3940  unsigned SimpleSize = Simple.getSizeInBits();
3941  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3942  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3943  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3944  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3945  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3946  // Compute the high part as N1.
3947  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3948  DAG.getConstant(SimpleSize, DL,
3949  getShiftAmountTy(Lo.getValueType())));
3950  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3951  // Compute the low part as N0.
3952  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3953  return CombineTo(N, Lo, Hi);
3954  }
3955  }
3956 
3957  return SDValue();
3958 }
3959 
3960 SDValue DAGCombiner::visitMULO(SDNode *N) {
3961  bool IsSigned = (ISD::SMULO == N->getOpcode());
3962 
3963  // (mulo x, 2) -> (addo x, x)
3965  if (C2->getAPIntValue() == 2)
3966  return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
3967  N->getVTList(), N->getOperand(0), N->getOperand(0));
3968 
3969  return SDValue();
3970 }
3971 
3972 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3973  SDValue N0 = N->getOperand(0);
3974  SDValue N1 = N->getOperand(1);
3975  EVT VT = N0.getValueType();
3976 
3977  // fold vector ops
3978  if (VT.isVector())
3979  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3980  return FoldedVOp;
3981 
3982  // fold operation with constant operands.
3985  if (N0C && N1C)
3986  return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3987 
3988  // canonicalize constant to RHS
3991  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3992 
3993  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3994  // Only do this if the current op isn't legal and the flipped is.
3995  unsigned Opcode = N->getOpcode();
3996  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3997  if (!TLI.isOperationLegal(Opcode, VT) &&
3998  (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3999  (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4000  unsigned AltOpcode;
4001  switch (Opcode) {
4002  case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4003  case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4004  case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4005  case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4006  default: llvm_unreachable("Unknown MINMAX opcode");
4007  }
4008  if (TLI.isOperationLegal(AltOpcode, VT))
4009  return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4010  }
4011 
4012  return SDValue();
4013 }
4014 
4015 /// If this is a bitwise logic instruction and both operands have the same
4016 /// opcode, try to sink the other opcode after the logic instruction.
4017 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4018  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4019  EVT VT = N0.getValueType();
4020  unsigned LogicOpcode = N->getOpcode();
4021  unsigned HandOpcode = N0.getOpcode();
4022  assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4023  LogicOpcode == ISD::XOR) && "Expected logic opcode");
4024  assert(HandOpcode == N1.getOpcode() && "Bad input!");
4025 
4026  // Bail early if none of these transforms apply.
4027  if (N0.getNumOperands() == 0)
4028  return SDValue();
4029 
4030  // FIXME: We should check number of uses of the operands to not increase
4031  // the instruction count for all transforms.
4032 
4033  // Handle size-changing casts.
4034  SDValue X = N0.getOperand(0);
4035  SDValue Y = N1.getOperand(0);
4036  EVT XVT = X.getValueType();
4037  SDLoc DL(N);
4038  if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4039  HandOpcode == ISD::SIGN_EXTEND) {
4040  // If both operands have other uses, this transform would create extra
4041  // instructions without eliminating anything.
4042  if (!N0.hasOneUse() && !N1.hasOneUse())
4043  return SDValue();
4044  // We need matching integer source types.
4045  if (XVT != Y.getValueType())
4046  return SDValue();
4047  // Don't create an illegal op during or after legalization. Don't ever
4048  // create an unsupported vector op.
4049  if ((VT.isVector() || LegalOperations) &&
4050  !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4051  return SDValue();
4052  // Avoid infinite looping with PromoteIntBinOp.
4053  // TODO: Should we apply desirable/legal constraints to all opcodes?
4054  if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4055  !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4056  return SDValue();
4057  // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4058  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4059  return DAG.getNode(HandOpcode, DL, VT, Logic);
4060  }
4061 
4062  // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4063  if (HandOpcode == ISD::TRUNCATE) {
4064  // If both operands have other uses, this transform would create extra
4065  // instructions without eliminating anything.
4066  if (!N0.hasOneUse() && !N1.hasOneUse())
4067  return SDValue();
4068  // We need matching source types.
4069  if (XVT != Y.getValueType())
4070  return SDValue();
4071  // Don't create an illegal op during or after legalization.
4072  if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4073  return SDValue();
4074  // Be extra careful sinking truncate. If it's free, there's no benefit in
4075  // widening a binop. Also, don't create a logic op on an illegal type.
4076  if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4077  return SDValue();
4078  if (!TLI.isTypeLegal(XVT))
4079  return SDValue();
4080  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4081  return DAG.getNode(HandOpcode, DL, VT, Logic);
4082  }
4083 
4084  // For binops SHL/SRL/SRA/AND:
4085  // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4086  if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4087  HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4088  N0.getOperand(1) == N1.getOperand(1)) {
4089  // If either operand has other uses, this transform is not an improvement.
4090  if (!N0.hasOneUse() || !N1.hasOneUse())
4091  return SDValue();
4092  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4093  return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4094  }
4095 
4096  // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4097  if (HandOpcode == ISD::BSWAP) {
4098  // If either operand has other uses, this transform is not an improvement.
4099  if (!N0.hasOneUse() || !N1.hasOneUse())
4100  return SDValue();
4101  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4102  return DAG.getNode(HandOpcode, DL, VT, Logic);
4103  }
4104 
4105  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4106  // Only perform this optimization up until type legalization, before
4107  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4108  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4109  // we don't want to undo this promotion.
4110  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4111  // on scalars.
4112  if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4113  Level <= AfterLegalizeTypes) {
4114  // Input types must be integer and the same.
4115  if (XVT.isInteger() && XVT == Y.getValueType()) {
4116  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4117  return DAG.getNode(HandOpcode, DL, VT, Logic);
4118  }
4119  }
4120 
4121  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4122  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4123  // If both shuffles use the same mask, and both shuffle within a single
4124  // vector, then it is worthwhile to move the swizzle after the operation.
4125  // The type-legalizer generates this pattern when loading illegal
4126  // vector types from memory. In many cases this allows additional shuffle
4127  // optimizations.
4128  // There are other cases where moving the shuffle after the xor/and/or
4129  // is profitable even if shuffles don't perform a swizzle.
4130  // If both shuffles use the same mask, and both shuffles have the same first
4131  // or second operand, then it might still be profitable to move the shuffle
4132  // after the xor/and/or operation.
4133  if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4134  auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4135  auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4136  assert(X.getValueType() == Y.getValueType() &&
4137  "Inputs to shuffles are not the same type");
4138 
4139  // Check that both shuffles use the same mask. The masks are known to be of
4140  // the same length because the result vector type is the same.
4141  // Check also that shuffles have only one use to avoid introducing extra
4142  // instructions.
4143  if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4144  !SVN0->getMask().equals(SVN1->getMask()))
4145  return SDValue();
4146 
4147  // Don't try to fold this node if it requires introducing a
4148  // build vector of all zeros that might be illegal at this stage.
4149  SDValue ShOp = N0.getOperand(1);
4150  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4151  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4152 
4153  // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4154  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4155  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4156  N0.getOperand(0), N1.getOperand(0));
4157  return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4158  }
4159 
4160  // Don't try to fold this node if it requires introducing a
4161  // build vector of all zeros that might be illegal at this stage.
4162  ShOp = N0.getOperand(0);
4163  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4164  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4165 
4166  // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4167  if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4168  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4169  N1.getOperand(1));
4170  return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4171  }
4172  }
4173 
4174  return SDValue();
4175 }
4176 
4177 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4178 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4179  const SDLoc &DL) {
4180  SDValue LL, LR, RL, RR, N0CC, N1CC;
4181  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4182  !isSetCCEquivalent(N1, RL, RR, N1CC))
4183  return SDValue();
4184 
4185  assert(N0.getValueType() == N1.getValueType() &&
4186  "Unexpected operand types for bitwise logic op");
4187  assert(LL.getValueType() == LR.getValueType() &&
4188  RL.getValueType() == RR.getValueType() &&
4189  "Unexpected operand types for setcc");
4190 
4191  // If we're here post-legalization or the logic op type is not i1, the logic
4192  // op type must match a setcc result type. Also, all folds require new
4193  // operations on the left and right operands, so those types must match.
4194  EVT VT = N0.getValueType();
4195  EVT OpVT = LL.getValueType();
4196  if (LegalOperations || VT.getScalarType() != MVT::i1)
4197  if (VT != getSetCCResultType(OpVT))
4198  return SDValue();
4199  if (OpVT != RL.getValueType())
4200  return SDValue();
4201 
4202  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4203  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4204  bool IsInteger = OpVT.isInteger();
4205  if (LR == RR && CC0 == CC1 && IsInteger) {
4206  bool IsZero = isNullOrNullSplat(LR);
4207  bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4208 
4209  // All bits clear?
4210  bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4211  // All sign bits clear?
4212  bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4213  // Any bits set?
4214  bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4215  // Any sign bits set?
4216  bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4217 
4218  // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4219  // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4220  // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4221  // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4222  if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4223  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4224  AddToWorklist(Or.getNode());
4225  return DAG.getSetCC(DL, VT, Or, LR, CC1);
4226  }
4227 
4228  // All bits set?
4229  bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4230  // All sign bits set?
4231  bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4232  // Any bits clear?
4233  bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4234  // Any sign bits clear?
4235  bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4236 
4237  // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4238  // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4239  // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4240  // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4241  if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4242  SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4243  AddToWorklist(And.getNode());
4244  return DAG.getSetCC(DL, VT, And, LR, CC1);
4245  }
4246  }
4247 
4248  // TODO: What is the 'or' equivalent of this fold?
4249  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4250  if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4251  IsInteger && CC0 == ISD::SETNE &&
4252  ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4253  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4254  SDValue One = DAG.getConstant(1, DL, OpVT);
4255  SDValue Two = DAG.getConstant(2, DL, OpVT);
4256  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4257  AddToWorklist(Add.getNode());
4258  return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4259  }
4260 
4261  // Try more general transforms if the predicates match and the only user of
4262  // the compares is the 'and' or 'or'.
4263  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4264  N0.hasOneUse() && N1.hasOneUse()) {
4265  // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4266  // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4267  if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4268  SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4269  SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4270  SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4271  SDValue Zero = DAG.getConstant(0, DL, OpVT);
4272  return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4273  }
4274 
4275  // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4276  if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4277  // Match a shared variable operand and 2 non-opaque constant operands.
4280  if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4281  // Canonicalize larger constant as C0.
4282  if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4283  std::swap(C0, C1);
4284 
4285  // The difference of the constants must be a single bit.
4286  const APInt &C0Val = C0->getAPIntValue();
4287  const APInt &C1Val = C1->getAPIntValue();
4288  if ((C0Val - C1Val).isPowerOf2()) {
4289  // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4290  // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4291  SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4292  SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4293  SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4294  SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4295  SDValue Zero = DAG.getConstant(0, DL, OpVT);
4296  return DAG.getSetCC(DL, VT, And, Zero, CC0);
4297  }
4298  }
4299  }
4300  }
4301 
4302  // Canonicalize equivalent operands to LL == RL.
4303  if (LL == RR && LR == RL) {
4304  CC1 = ISD::getSetCCSwappedOperands(CC1);
4305  std::swap(RL, RR);
4306  }
4307 
4308  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4309  // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4310  if (LL == RL && LR == RR) {
4311  ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4312  : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4313  if (NewCC != ISD::SETCC_INVALID &&
4314  (!LegalOperations ||
4315  (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4316  TLI.isOperationLegal(ISD::SETCC, OpVT))))
4317  return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4318  }
4319 
4320  return SDValue();
4321 }
4322 
4323 /// This contains all DAGCombine rules which reduce two values combined by
4324 /// an And operation to a single value. This makes them reusable in the context
4325 /// of visitSELECT(). Rules involving constants are not included as
4326 /// visitSELECT() already handles those cases.
4327 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4328  EVT VT = N1.getValueType();
4329  SDLoc DL(N);
4330 
4331  // fold (and x, undef) -> 0
4332  if (N0.isUndef() || N1.isUndef())
4333  return DAG.getConstant(0, DL, VT);
4334 
4335  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4336  return V;
4337 
4338  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4339  VT.getSizeInBits() <= 64) {
4340  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4341  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4342  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4343  // immediate for an add, but it is legal if its top c2 bits are set,
4344  // transform the ADD so the immediate doesn't need to be materialized
4345  // in a register.
4346  APInt ADDC = ADDI->getAPIntValue();
4347  APInt SRLC = SRLI->getAPIntValue();
4348  if (ADDC.getMinSignedBits() <= 64 &&
4349  SRLC.ult(VT.getSizeInBits()) &&
4350  !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4352  SRLC.getZExtValue());
4353  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4354  ADDC |= Mask;
4355  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4356  SDLoc DL0(N0);
4357  SDValue NewAdd =
4358  DAG.getNode(ISD::ADD, DL0, VT,
4359  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4360  CombineTo(N0.getNode(), NewAdd);
4361  // Return N so it doesn't get rechecked!
4362  return SDValue(N, 0);
4363  }
4364  }
4365  }
4366  }
4367  }
4368  }
4369 
4370  // Reduce bit extract of low half of an integer to the narrower type.
4371  // (and (srl i64:x, K), KMask) ->
4372  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4373  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4374  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4375  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4376  unsigned Size = VT.getSizeInBits();
4377  const APInt &AndMask = CAnd->getAPIntValue();
4378  unsigned ShiftBits = CShift->getZExtValue();
4379 
4380  // Bail out, this node will probably disappear anyway.
4381  if (ShiftBits == 0)
4382  return SDValue();
4383 
4384  unsigned MaskBits = AndMask.countTrailingOnes();
4385  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4386 
4387  if (AndMask.isMask() &&
4388  // Required bits must not span the two halves of the integer and
4389  // must fit in the half size type.
4390  (ShiftBits + MaskBits <= Size / 2) &&
4391  TLI.isNarrowingProfitable(VT, HalfVT) &&
4392  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4393  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4394  TLI.isTruncateFree(VT, HalfVT) &&
4395  TLI.isZExtFree(HalfVT, VT)) {
4396  // The isNarrowingProfitable is to avoid regressions on PPC and
4397  // AArch64 which match a few 64-bit bit insert / bit extract patterns
4398  // on downstream users of this. Those patterns could probably be
4399  // extended to handle extensions mixed in.
4400 
4401  SDValue SL(N0);
4402  assert(MaskBits <= Size);
4403 
4404  // Extracting the highest bit of the low half.
4405  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4406  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4407  N0.getOperand(0));
4408 
4409  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4410  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4411  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4412  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4413  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4414  }
4415  }
4416  }
4417  }
4418 
4419  return SDValue();
4420 }
4421 
4422 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4423  EVT LoadResultTy, EVT &ExtVT) {
4424  if (!AndC->getAPIntValue().isMask())
4425  return false;
4426 
4427  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4428 
4429  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4430  EVT LoadedVT = LoadN->getMemoryVT();
4431 
4432  if (ExtVT == LoadedVT &&
4433  (!LegalOperations ||
4434  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4435  // ZEXTLOAD will match without needing to change the size of the value being
4436  // loaded.
4437  return true;
4438  }
4439 
4440  // Do not change the width of a volatile load.
4441  if (LoadN->isVolatile())
4442  return false;
4443 
4444  // Do not generate loads of non-round integer types since these can
4445  // be expensive (and would be wrong if the type is not byte sized).
4446  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4447  return false;
4448 
4449  if (LegalOperations &&
4450  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4451  return false;
4452 
4453  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4454  return false;
4455 
4456  return true;
4457 }
4458 
4459 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4460  ISD::LoadExtType ExtType, EVT &MemVT,
4461  unsigned ShAmt) {
4462  if (!LDST)
4463  return false;
4464  // Only allow byte offsets.
4465  if (ShAmt % 8)
4466  return false;
4467 
4468  // Do not generate loads of non-round integer types since these can
4469  // be expensive (and would be wrong if the type is not byte sized).
4470  if (!MemVT.isRound())
4471  return false;
4472 
4473  // Don't change the width of a volatile load.
4474  if (LDST->isVolatile())
4475  return false;
4476 
4477  // Verify that we are actually reducing a load width here.
4478  if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4479  return false;
4480 
4481  // Ensure that this isn't going to produce an unsupported unaligned access.
4482  if (ShAmt &&
4483  !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4484  LDST->getAddressSpace(), ShAmt / 8))
4485  return false;
4486 
4487  // It's not possible to generate a constant of extended or untyped type.
4488  EVT PtrType = LDST->getBasePtr().getValueType();
4489  if (PtrType == MVT::Untyped || PtrType.isExtended())
4490  return false;
4491 
4492  if (isa<LoadSDNode>(LDST)) {
4493  LoadSDNode *Load = cast<LoadSDNode>(LDST);
4494  // Don't transform one with multiple uses, this would require adding a new
4495  // load.
4496  if (!SDValue(Load, 0).hasOneUse())
4497  return false;
4498 
4499  if (LegalOperations &&
4500  !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4501  return false;
4502 
4503  // For the transform to be legal, the load must produce only two values
4504  // (the value loaded and the chain). Don't transform a pre-increment
4505  // load, for example, which produces an extra value. Otherwise the
4506  // transformation is not equivalent, and the downstream logic to replace
4507  // uses gets things wrong.
4508  if (Load->getNumValues() > 2)
4509  return false;
4510 
4511  // If the load that we're shrinking is an extload and we're not just
4512  // discarding the extension we can't simply shrink the load. Bail.
4513  // TODO: It would be possible to merge the extensions in some cases.
4514  if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4515  Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4516  return false;
4517 
4518  if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4519  return false;
4520  } else {
4521  assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4522  StoreSDNode *Store = cast<StoreSDNode>(LDST);
4523  // Can't write outside the original store
4524  if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4525  return false;
4526 
4527  if (LegalOperations &&
4528  !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4529  return false;
4530  }
4531  return true;
4532 }
4533 
4534 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4536  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4538  SDNode *&NodeToMask) {
4539  // Recursively search for the operands, looking for loads which can be
4540  // narrowed.
4541  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4542  SDValue Op = N->getOperand(i);
4543 
4544  if (Op.getValueType().isVector())
4545  return false;
4546 
4547  // Some constants may need fixing up later if they are too large.
4548  if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4549  if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4550  (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4551  NodesWithConsts.insert(N);
4552  continue;
4553  }
4554 
4555  if (!Op.hasOneUse())
4556  return false;
4557 
4558  switch(Op.getOpcode()) {
4559  case ISD::LOAD: {
4560  auto *Load = cast<LoadSDNode>(Op);
4561  EVT ExtVT;
4562  if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4563  isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4564 
4565  // ZEXTLOAD is already small enough.
4566  if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4567  ExtVT.bitsGE(Load->getMemoryVT()))
4568  continue;
4569 
4570  // Use LE to convert equal sized loads to zext.
4571  if (ExtVT.bitsLE(Load->getMemoryVT()))
4572  Loads.push_back(Load);
4573 
4574  continue;
4575  }
4576  return false;
4577  }
4578  case ISD::ZERO_EXTEND:
4579  case ISD::AssertZext: {
4580  unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4581  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4582  EVT VT = Op.getOpcode() == ISD::AssertZext ?
4583  cast<VTSDNode>(Op.getOperand(1))->getVT() :
4584  Op.getOperand(0).getValueType();
4585 
4586  // We can accept extending nodes if the mask is wider or an equal
4587  // width to the original type.
4588  if (ExtVT.bitsGE(VT))
4589  continue;
4590  break;
4591  }
4592  case ISD::OR:
4593  case ISD::XOR:
4594  case ISD::AND:
4595  if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4596  NodeToMask))
4597  return false;
4598  continue;
4599  }
4600 
4601  // Allow one node which will masked along with any loads found.
4602  if (NodeToMask)
4603  return false;
4604 
4605  // Also ensure that the node to be masked only produces one data result.
4606  NodeToMask = Op.getNode();
4607  if (NodeToMask->getNumValues() > 1) {
4608  bool HasValue = false;
4609  for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4610  MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4611  if (VT != MVT::Glue && VT != MVT::Other) {
4612  if (HasValue) {
4613  NodeToMask = nullptr;
4614  return false;
4615  }
4616  HasValue = true;
4617  }
4618  }
4619  assert(HasValue && "Node to be masked has no data result?");
4620  }
4621  }
4622  return true;
4623 }
4624 
4625 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4626  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4627  if (!Mask)
4628  return false;
4629 
4630  if (!Mask->getAPIntValue().isMask())
4631  return false;
4632 
4633  // No need to do anything if the and directly uses a load.
4634  if (isa<LoadSDNode>(N->getOperand(0)))
4635  return false;
4636 
4638  SmallPtrSet<SDNode*, 2> NodesWithConsts;
4639  SDNode *FixupNode = nullptr;
4640  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4641  if (Loads.size() == 0)
4642  return false;
4643 
4644  LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4645  SDValue MaskOp = N->getOperand(1);
4646 
4647  // If it exists, fixup the single node we allow in the tree that needs
4648  // masking.
4649  if (FixupNode) {
4650  LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4651  SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4652  FixupNode->getValueType(0),
4653  SDValue(FixupNode, 0), MaskOp);
4654  DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4655  if (And.getOpcode() == ISD ::AND)
4656  DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4657  }
4658 
4659  // Narrow any constants that need it.
4660  for (auto *LogicN : NodesWithConsts) {
4661  SDValue Op0 = LogicN->getOperand(0);
4662  SDValue Op1 = LogicN->getOperand(1);
4663 
4664  if (isa<ConstantSDNode>(Op0))
4665  std::swap(Op0, Op1);
4666 
4667  SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4668  Op1, MaskOp);
4669 
4670  DAG.UpdateNodeOperands(LogicN, Op0, And);
4671  }
4672 
4673  // Create narrow loads.
4674  for (auto *Load : Loads) {
4675  LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4676  SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4677  SDValue(Load, 0), MaskOp);
4678  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4679  if (And.getOpcode() == ISD ::AND)
4680  And = SDValue(
4681  DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4682  SDValue NewLoad = ReduceLoadWidth(And.getNode());
4683  assert(NewLoad &&
4684  "Shouldn't be masking the load if it can't be narrowed");
4685  CombineTo(Load, NewLoad, NewLoad.getValue(1));
4686  }
4687  DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4688  return true;
4689  }
4690  return false;
4691 }
4692 
4693 // Unfold
4694 // x & (-1 'logical shift' y)
4695 // To
4696 // (x 'opposite logical shift' y) 'logical shift' y
4697 // if it is better for performance.
4698 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4699  assert(N->getOpcode() == ISD::AND);
4700 
4701  SDValue N0 = N->getOperand(0);
4702  SDValue N1 = N->getOperand(1);
4703 
4704  // Do we actually prefer shifts over mask?
4705  if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
4706  return SDValue();
4707 
4708  // Try to match (-1 '[outer] logical shift' y)
4709  unsigned OuterShift;
4710  unsigned InnerShift; // The opposite direction to the OuterShift.
4711  SDValue Y; // Shift amount.
4712  auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4713  if (!M.hasOneUse())
4714  return false;
4715  OuterShift = M->getOpcode();
4716  if (OuterShift == ISD::SHL)
4717  InnerShift = ISD::SRL;
4718  else if (OuterShift == ISD::SRL)
4719  InnerShift = ISD::SHL;
4720  else
4721  return false;
4722  if (!isAllOnesConstant(M->getOperand(0)))
4723  return false;
4724  Y = M->getOperand(1);
4725  return true;
4726  };
4727 
4728  SDValue X;
4729  if (matchMask(N1))
4730  X = N0;
4731  else if (matchMask(N0))
4732  X = N1;
4733  else
4734  return SDValue();
4735 
4736  SDLoc DL(N);
4737  EVT VT = N->getValueType(0);
4738 
4739  // tmp = x 'opposite logical shift' y
4740  SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4741  // ret = tmp 'logical shift' y
4742  SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4743 
4744  return T1;
4745 }
4746 
4747 SDValue DAGCombiner::visitAND(SDNode *N) {
4748  SDValue N0 = N->getOperand(0);
4749  SDValue N1 = N->getOperand(1);
4750  EVT VT = N1.getValueType();
4751 
4752  // x & x --> x
4753  if (N0 == N1)
4754  return N0;
4755 
4756  // fold vector ops
4757  if (VT.isVector()) {
4758  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4759  return FoldedVOp;
4760 
4761  // fold (and x, 0) -> 0, vector edition
4763  // do not return N0, because undef node may exist in N0
4765  SDLoc(N), N0.getValueType());
4767  // do not return N1, because undef node may exist in N1
4769  SDLoc(N), N1.getValueType());
4770 
4771  // fold (and x, -1) -> x, vector edition
4773  return N1;
4775  return N0;
4776  }
4777 
4778  // fold (and c1, c2) -> c1&c2
4781  if (N0C && N1C && !N1C->isOpaque())
4782  return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4783  // canonicalize constant to RHS
4786  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4787  // fold (and x, -1) -> x
4788  if (isAllOnesConstant(N1))
4789  return N0;
4790  // if (and x, c) is known to be zero, return 0
4791  unsigned BitWidth = VT.getScalarSizeInBits();
4792  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4793  APInt::getAllOnesValue(BitWidth)))
4794  return DAG.getConstant(0, SDLoc(N), VT);
4795 
4796  if (SDValue NewSel = foldBinOpIntoSelect(N))
4797  return NewSel;
4798 
4799  // reassociate and
4800  if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4801  return RAND;
4802 
4803  // Try to convert a constant mask AND into a shuffle clear mask.
4804  if (VT.isVector())
4805  if (SDValue Shuffle = XformToShuffleWithZero(N))
4806  return Shuffle;
4807 
4808  // fold (and (or x, C), D) -> D if (C & D) == D
4809  auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4810  return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4811  };
4812  if (N0.getOpcode() == ISD::OR &&
4813  ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4814  return N1;
4815  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4816  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4817  SDValue N0Op0 = N0.getOperand(0);
4818  APInt Mask = ~N1C->getAPIntValue();
4819  Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4820  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4821  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4822  N0.getValueType(), N0Op0);
4823 
4824  // Replace uses of the AND with uses of the Zero extend node.
4825  CombineTo(N, Zext);
4826 
4827  // We actually want to replace all uses of the any_extend with the
4828  // zero_extend, to avoid duplicating things. This will later cause this
4829  // AND to be folded.
4830  CombineTo(N0.getNode(), Zext);
4831  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4832  }
4833  }
4834  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4835  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4836  // already be zero by virtue of the width of the base type of the load.
4837  //
4838  // the 'X' node here can either be nothing or an extract_vector_elt to catch
4839  // more cases.
4840  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4842  N0.getOperand(0).getOpcode() == ISD::LOAD &&
4843  N0.getOperand(0).getResNo() == 0) ||
4844  (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4845  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4846  N0 : N0.getOperand(0) );
4847 
4848  // Get the constant (if applicable) the zero'th operand is being ANDed with.
4849  // This can be a pure constant or a vector splat, in which case we treat the
4850  // vector as a scalar and use the splat value.
4852  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4853  Constant = C->getAPIntValue();
4854  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4855  APInt SplatValue, SplatUndef;
4856  unsigned SplatBitSize;
4857  bool HasAnyUndefs;
4858  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4859  SplatBitSize, HasAnyUndefs);
4860  if (IsSplat) {
4861  // Undef bits can contribute to a possible optimisation if set, so
4862  // set them.
4863  SplatValue |= SplatUndef;
4864 
4865  // The splat value may be something like "0x00FFFFFF", which means 0 for
4866  // the first vector value and FF for the rest, repeating. We need a mask
4867  // that will apply equally to all members of the vector, so AND all the
4868  // lanes of the constant together.
4869  EVT VT = Vector->getValueType(0);
4870  unsigned BitWidth = VT.getScalarSizeInBits();
4871 
4872  // If the splat value has been compressed to a bitlength lower
4873  // than the size of the vector lane, we need to re-expand it to
4874  // the lane size.
4875  if (BitWidth > SplatBitSize)
4876  for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4877  SplatBitSize < BitWidth;
4878  SplatBitSize = SplatBitSize * 2)
4879  SplatValue |= SplatValue.shl(SplatBitSize);
4880 
4881  // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4882  // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4883  if (SplatBitSize % BitWidth == 0) {
4884  Constant = APInt::getAllOnesValue(BitWidth);
4885  for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4886  Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4887  }
4888  }
4889  }
4890 
4891  // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4892  // actually legal and isn't going to get expanded, else this is a false
4893  // optimis