LLVM 17.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/Constant.h"
50#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/Metadata.h"
58#include "llvm/Support/Debug.h"
66#include <algorithm>
67#include <cassert>
68#include <cstdint>
69#include <functional>
70#include <iterator>
71#include <optional>
72#include <string>
73#include <tuple>
74#include <utility>
75#include <variant>
76
77using namespace llvm;
78
79#define DEBUG_TYPE "dagcombine"
80
81STATISTIC(NodesCombined , "Number of dag nodes combined");
82STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
83STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
84STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
85STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
86STATISTIC(SlicedLoads, "Number of load sliced");
87STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
88
89static cl::opt<bool>
90CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
91 cl::desc("Enable DAG combiner's use of IR alias analysis"));
92
93static cl::opt<bool>
94UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
95 cl::desc("Enable DAG combiner's use of TBAA"));
96
97#ifndef NDEBUG
99CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
100 cl::desc("Only use DAG-combiner alias analysis in this"
101 " function"));
102#endif
103
104/// Hidden option to stress test load slicing, i.e., when this option
105/// is enabled, load slicing bypasses most of its profitability guards.
106static cl::opt<bool>
107StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
108 cl::desc("Bypass the profitability model of load slicing"),
109 cl::init(false));
110
111static cl::opt<bool>
112 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
113 cl::desc("DAG combiner may split indexing from loads"));
114
115static cl::opt<bool>
116 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
117 cl::desc("DAG combiner enable merging multiple stores "
118 "into a wider store"));
119
121 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
122 cl::desc("Limit the number of operands to inline for Token Factors"));
123
125 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
126 cl::desc("Limit the number of times for the same StoreNode and RootNode "
127 "to bail out in store merging dependence check"));
128
130 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
131 cl::desc("DAG combiner enable reducing the width of load/op/store "
132 "sequence"));
133
135 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
136 cl::desc("DAG combiner enable load/<replace bytes>/store with "
137 "a narrower store"));
138
140 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
141 cl::desc(
142 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
143
144namespace {
145
146 class DAGCombiner {
147 SelectionDAG &DAG;
148 const TargetLowering &TLI;
149 const SelectionDAGTargetInfo *STI;
151 CodeGenOpt::Level OptLevel;
152 bool LegalDAG = false;
153 bool LegalOperations = false;
154 bool LegalTypes = false;
155 bool ForCodeSize;
156 bool DisableGenericCombines;
157
158 /// Worklist of all of the nodes that need to be simplified.
159 ///
160 /// This must behave as a stack -- new nodes to process are pushed onto the
161 /// back and when processing we pop off of the back.
162 ///
163 /// The worklist will not contain duplicates but may contain null entries
164 /// due to nodes being deleted from the underlying DAG.
166
167 /// Mapping from an SDNode to its position on the worklist.
168 ///
169 /// This is used to find and remove nodes from the worklist (by nulling
170 /// them) when they are deleted from the underlying DAG. It relies on
171 /// stable indices of nodes within the worklist.
173 /// This records all nodes attempted to add to the worklist since we
174 /// considered a new worklist entry. As we keep do not add duplicate nodes
175 /// in the worklist, this is different from the tail of the worklist.
177
178 /// Set of nodes which have been combined (at least once).
179 ///
180 /// This is used to allow us to reliably add any operands of a DAG node
181 /// which have not yet been combined to the worklist.
182 SmallPtrSet<SDNode *, 32> CombinedNodes;
183
184 /// Map from candidate StoreNode to the pair of RootNode and count.
185 /// The count is used to track how many times we have seen the StoreNode
186 /// with the same RootNode bail out in dependence check. If we have seen
187 /// the bail out for the same pair many times over a limit, we won't
188 /// consider the StoreNode with the same RootNode as store merging
189 /// candidate again.
191
192 // AA - Used for DAG load/store alias analysis.
193 AliasAnalysis *AA;
194
195 /// When an instruction is simplified, add all users of the instruction to
196 /// the work lists because they might get more simplified now.
197 void AddUsersToWorklist(SDNode *N) {
198 for (SDNode *Node : N->uses())
199 AddToWorklist(Node);
200 }
201
202 /// Convenient shorthand to add a node and all of its user to the worklist.
203 void AddToWorklistWithUsers(SDNode *N) {
204 AddUsersToWorklist(N);
205 AddToWorklist(N);
206 }
207
208 // Prune potentially dangling nodes. This is called after
209 // any visit to a node, but should also be called during a visit after any
210 // failed combine which may have created a DAG node.
211 void clearAddedDanglingWorklistEntries() {
212 // Check any nodes added to the worklist to see if they are prunable.
213 while (!PruningList.empty()) {
214 auto *N = PruningList.pop_back_val();
215 if (N->use_empty())
216 recursivelyDeleteUnusedNodes(N);
217 }
218 }
219
220 SDNode *getNextWorklistEntry() {
221 // Before we do any work, remove nodes that are not in use.
222 clearAddedDanglingWorklistEntries();
223 SDNode *N = nullptr;
224 // The Worklist holds the SDNodes in order, but it may contain null
225 // entries.
226 while (!N && !Worklist.empty()) {
227 N = Worklist.pop_back_val();
228 }
229
230 if (N) {
231 bool GoodWorklistEntry = WorklistMap.erase(N);
232 (void)GoodWorklistEntry;
233 assert(GoodWorklistEntry &&
234 "Found a worklist entry without a corresponding map entry!");
235 }
236 return N;
237 }
238
239 /// Call the node-specific routine that folds each particular type of node.
240 SDValue visit(SDNode *N);
241
242 public:
243 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
244 : DAG(D), TLI(D.getTargetLoweringInfo()),
245 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
246 ForCodeSize = DAG.shouldOptForSize();
247 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
248
249 MaximumLegalStoreInBits = 0;
250 // We use the minimum store size here, since that's all we can guarantee
251 // for the scalable vector types.
252 for (MVT VT : MVT::all_valuetypes())
253 if (EVT(VT).isSimple() && VT != MVT::Other &&
254 TLI.isTypeLegal(EVT(VT)) &&
255 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
256 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
257 }
258
259 void ConsiderForPruning(SDNode *N) {
260 // Mark this for potential pruning.
261 PruningList.insert(N);
262 }
263
264 /// Add to the worklist making sure its instance is at the back (next to be
265 /// processed.)
266 void AddToWorklist(SDNode *N) {
267 assert(N->getOpcode() != ISD::DELETED_NODE &&
268 "Deleted Node added to Worklist");
269
270 // Skip handle nodes as they can't usefully be combined and confuse the
271 // zero-use deletion strategy.
272 if (N->getOpcode() == ISD::HANDLENODE)
273 return;
274
275 ConsiderForPruning(N);
276
277 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
278 Worklist.push_back(N);
279 }
280
281 /// Remove all instances of N from the worklist.
282 void removeFromWorklist(SDNode *N) {
283 CombinedNodes.erase(N);
284 PruningList.remove(N);
285 StoreRootCountMap.erase(N);
286
287 auto It = WorklistMap.find(N);
288 if (It == WorklistMap.end())
289 return; // Not in the worklist.
290
291 // Null out the entry rather than erasing it to avoid a linear operation.
292 Worklist[It->second] = nullptr;
293 WorklistMap.erase(It);
294 }
295
296 void deleteAndRecombine(SDNode *N);
297 bool recursivelyDeleteUnusedNodes(SDNode *N);
298
299 /// Replaces all uses of the results of one DAG node with new values.
300 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
301 bool AddTo = true);
302
303 /// Replaces all uses of the results of one DAG node with new values.
304 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
305 return CombineTo(N, &Res, 1, AddTo);
306 }
307
308 /// Replaces all uses of the results of one DAG node with new values.
309 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
310 bool AddTo = true) {
311 SDValue To[] = { Res0, Res1 };
312 return CombineTo(N, To, 2, AddTo);
313 }
314
315 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
316
317 private:
318 unsigned MaximumLegalStoreInBits;
319
320 /// Check the specified integer node value to see if it can be simplified or
321 /// if things it uses can be simplified by bit propagation.
322 /// If so, return true.
323 bool SimplifyDemandedBits(SDValue Op) {
324 unsigned BitWidth = Op.getScalarValueSizeInBits();
326 return SimplifyDemandedBits(Op, DemandedBits);
327 }
328
329 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
330 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
331 KnownBits Known;
332 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
333 return false;
334
335 // Revisit the node.
336 AddToWorklist(Op.getNode());
337
338 CommitTargetLoweringOpt(TLO);
339 return true;
340 }
341
342 /// Check the specified vector node value to see if it can be simplified or
343 /// if things it uses can be simplified as it only uses some of the
344 /// elements. If so, return true.
345 bool SimplifyDemandedVectorElts(SDValue Op) {
346 // TODO: For now just pretend it cannot be simplified.
347 if (Op.getValueType().isScalableVector())
348 return false;
349
350 unsigned NumElts = Op.getValueType().getVectorNumElements();
351 APInt DemandedElts = APInt::getAllOnes(NumElts);
352 return SimplifyDemandedVectorElts(Op, DemandedElts);
353 }
354
355 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
356 const APInt &DemandedElts,
357 bool AssumeSingleUse = false);
358 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
359 bool AssumeSingleUse = false);
360
361 bool CombineToPreIndexedLoadStore(SDNode *N);
362 bool CombineToPostIndexedLoadStore(SDNode *N);
363 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
364 bool SliceUpLoad(SDNode *N);
365
366 // Looks up the chain to find a unique (unaliased) store feeding the passed
367 // load. If no such store is found, returns a nullptr.
368 // Note: This will look past a CALLSEQ_START if the load is chained to it so
369 // so that it can find stack stores for byval params.
370 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
371 // Scalars have size 0 to distinguish from singleton vectors.
372 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
373 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
374 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
375
376 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
377 /// load.
378 ///
379 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
380 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
381 /// \param EltNo index of the vector element to load.
382 /// \param OriginalLoad load that EVE came from to be replaced.
383 /// \returns EVE on success SDValue() on failure.
384 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
385 SDValue EltNo,
386 LoadSDNode *OriginalLoad);
387 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
388 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
389 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
390 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
391 SDValue PromoteIntBinOp(SDValue Op);
392 SDValue PromoteIntShiftOp(SDValue Op);
393 SDValue PromoteExtend(SDValue Op);
394 bool PromoteLoad(SDValue Op);
395
396 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
397 SDValue RHS, SDValue True, SDValue False,
399
400 /// Call the node-specific routine that knows how to fold each
401 /// particular type of node. If that doesn't do anything, try the
402 /// target-specific DAG combines.
403 SDValue combine(SDNode *N);
404
405 // Visitation implementation - Implement dag node combining for different
406 // node types. The semantics are as follows:
407 // Return Value:
408 // SDValue.getNode() == 0 - No change was made
409 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
410 // otherwise - N should be replaced by the returned Operand.
411 //
412 SDValue visitTokenFactor(SDNode *N);
413 SDValue visitMERGE_VALUES(SDNode *N);
414 SDValue visitADD(SDNode *N);
415 SDValue visitADDLike(SDNode *N);
416 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
417 SDValue visitSUB(SDNode *N);
418 SDValue visitADDSAT(SDNode *N);
419 SDValue visitSUBSAT(SDNode *N);
420 SDValue visitADDC(SDNode *N);
421 SDValue visitADDO(SDNode *N);
422 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
423 SDValue visitSUBC(SDNode *N);
424 SDValue visitSUBO(SDNode *N);
425 SDValue visitADDE(SDNode *N);
426 SDValue visitADDCARRY(SDNode *N);
427 SDValue visitSADDO_CARRY(SDNode *N);
428 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
429 SDValue visitSUBE(SDNode *N);
430 SDValue visitSUBCARRY(SDNode *N);
431 SDValue visitSSUBO_CARRY(SDNode *N);
432 SDValue visitMUL(SDNode *N);
433 SDValue visitMULFIX(SDNode *N);
434 SDValue useDivRem(SDNode *N);
435 SDValue visitSDIV(SDNode *N);
436 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
437 SDValue visitUDIV(SDNode *N);
438 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
439 SDValue visitREM(SDNode *N);
440 SDValue visitMULHU(SDNode *N);
441 SDValue visitMULHS(SDNode *N);
442 SDValue visitAVG(SDNode *N);
443 SDValue visitABD(SDNode *N);
444 SDValue visitSMUL_LOHI(SDNode *N);
445 SDValue visitUMUL_LOHI(SDNode *N);
446 SDValue visitMULO(SDNode *N);
447 SDValue visitIMINMAX(SDNode *N);
448 SDValue visitAND(SDNode *N);
449 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
450 SDValue visitOR(SDNode *N);
451 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
452 SDValue visitXOR(SDNode *N);
453 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
454 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
455 SDValue visitSHL(SDNode *N);
456 SDValue visitSRA(SDNode *N);
457 SDValue visitSRL(SDNode *N);
458 SDValue visitFunnelShift(SDNode *N);
459 SDValue visitSHLSAT(SDNode *N);
460 SDValue visitRotate(SDNode *N);
461 SDValue visitABS(SDNode *N);
462 SDValue visitBSWAP(SDNode *N);
463 SDValue visitBITREVERSE(SDNode *N);
464 SDValue visitCTLZ(SDNode *N);
465 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
466 SDValue visitCTTZ(SDNode *N);
467 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
468 SDValue visitCTPOP(SDNode *N);
469 SDValue visitSELECT(SDNode *N);
470 SDValue visitVSELECT(SDNode *N);
471 SDValue visitSELECT_CC(SDNode *N);
472 SDValue visitSETCC(SDNode *N);
473 SDValue visitSETCCCARRY(SDNode *N);
474 SDValue visitSIGN_EXTEND(SDNode *N);
475 SDValue visitZERO_EXTEND(SDNode *N);
476 SDValue visitANY_EXTEND(SDNode *N);
477 SDValue visitAssertExt(SDNode *N);
478 SDValue visitAssertAlign(SDNode *N);
479 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
480 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
481 SDValue visitTRUNCATE(SDNode *N);
482 SDValue visitBITCAST(SDNode *N);
483 SDValue visitFREEZE(SDNode *N);
484 SDValue visitBUILD_PAIR(SDNode *N);
485 SDValue visitFADD(SDNode *N);
486 SDValue visitVP_FADD(SDNode *N);
487 SDValue visitSTRICT_FADD(SDNode *N);
488 SDValue visitFSUB(SDNode *N);
489 SDValue visitFMUL(SDNode *N);
490 SDValue visitFMA(SDNode *N);
491 SDValue visitFDIV(SDNode *N);
492 SDValue visitFREM(SDNode *N);
493 SDValue visitFSQRT(SDNode *N);
494 SDValue visitFCOPYSIGN(SDNode *N);
495 SDValue visitFPOW(SDNode *N);
496 SDValue visitSINT_TO_FP(SDNode *N);
497 SDValue visitUINT_TO_FP(SDNode *N);
498 SDValue visitFP_TO_SINT(SDNode *N);
499 SDValue visitFP_TO_UINT(SDNode *N);
500 SDValue visitFP_ROUND(SDNode *N);
501 SDValue visitFP_EXTEND(SDNode *N);
502 SDValue visitFNEG(SDNode *N);
503 SDValue visitFABS(SDNode *N);
504 SDValue visitFCEIL(SDNode *N);
505 SDValue visitFTRUNC(SDNode *N);
506 SDValue visitFFLOOR(SDNode *N);
507 SDValue visitFMinMax(SDNode *N);
508 SDValue visitBRCOND(SDNode *N);
509 SDValue visitBR_CC(SDNode *N);
510 SDValue visitLOAD(SDNode *N);
511
512 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
513 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
514
515 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
516
517 SDValue visitSTORE(SDNode *N);
518 SDValue visitLIFETIME_END(SDNode *N);
519 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
520 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
521 SDValue visitBUILD_VECTOR(SDNode *N);
522 SDValue visitCONCAT_VECTORS(SDNode *N);
523 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
524 SDValue visitVECTOR_SHUFFLE(SDNode *N);
525 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
526 SDValue visitINSERT_SUBVECTOR(SDNode *N);
527 SDValue visitMLOAD(SDNode *N);
528 SDValue visitMSTORE(SDNode *N);
529 SDValue visitMGATHER(SDNode *N);
530 SDValue visitMSCATTER(SDNode *N);
531 SDValue visitVPGATHER(SDNode *N);
532 SDValue visitVPSCATTER(SDNode *N);
533 SDValue visitFP_TO_FP16(SDNode *N);
534 SDValue visitFP16_TO_FP(SDNode *N);
535 SDValue visitFP_TO_BF16(SDNode *N);
536 SDValue visitVECREDUCE(SDNode *N);
537 SDValue visitVPOp(SDNode *N);
538
539 template <class MatchContextClass>
540 SDValue visitFADDForFMACombine(SDNode *N);
541 SDValue visitFSUBForFMACombine(SDNode *N);
542 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
543
544 SDValue XformToShuffleWithZero(SDNode *N);
545 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
546 const SDLoc &DL,
547 SDNode *N,
548 SDValue N0,
549 SDValue N1);
550 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
551 SDValue N1);
552 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
553 SDValue N1, SDNodeFlags Flags);
554 SDValue reassociateReduction(unsigned ResOpc, unsigned Opc, const SDLoc &DL,
555 EVT VT, SDValue N0, SDValue N1,
556 SDNodeFlags Flags = SDNodeFlags());
557
558 SDValue visitShiftByConstant(SDNode *N);
559
560 SDValue foldSelectOfConstants(SDNode *N);
561 SDValue foldVSelectOfConstants(SDNode *N);
562 SDValue foldBinOpIntoSelect(SDNode *BO);
563 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
564 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
565 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
566 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
568 bool NotExtCompare = false);
569 SDValue convertSelectOfFPConstantsToLoadOffset(
570 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
572 SDValue foldSignChangeInBitcast(SDNode *N);
573 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
575 SDValue foldSelectOfBinops(SDNode *N);
576 SDValue foldSextSetcc(SDNode *N);
577 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
578 const SDLoc &DL);
579 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
580 SDValue foldABSToABD(SDNode *N);
581 SDValue unfoldMaskedMerge(SDNode *N);
582 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
583 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
584 const SDLoc &DL, bool foldBooleans);
585 SDValue rebuildSetCC(SDValue N);
586
587 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
588 SDValue &CC, bool MatchStrict = false) const;
589 bool isOneUseSetCC(SDValue N) const;
590
591 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
592 unsigned HiOp);
593 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
594 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
595 const TargetLowering &TLI);
596
597 SDValue CombineExtLoad(SDNode *N);
598 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
599 SDValue combineRepeatedFPDivisors(SDNode *N);
600 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
601 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
602 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
603 SDValue BuildSDIV(SDNode *N);
604 SDValue BuildSDIVPow2(SDNode *N);
605 SDValue BuildUDIV(SDNode *N);
606 SDValue BuildSREMPow2(SDNode *N);
607 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
608 SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
609 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
610 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
611 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
612 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
613 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
614 SDNodeFlags Flags, bool Reciprocal);
615 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
616 SDNodeFlags Flags, bool Reciprocal);
617 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
618 bool DemandHighBits = true);
619 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
620 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
621 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
622 unsigned PosOpcode, unsigned NegOpcode,
623 const SDLoc &DL);
624 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
625 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
626 unsigned PosOpcode, unsigned NegOpcode,
627 const SDLoc &DL);
628 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
629 SDValue MatchLoadCombine(SDNode *N);
630 SDValue mergeTruncStores(StoreSDNode *N);
631 SDValue reduceLoadWidth(SDNode *N);
632 SDValue ReduceLoadOpStoreWidth(SDNode *N);
634 SDValue TransformFPLoadStorePair(SDNode *N);
635 SDValue convertBuildVecZextToZext(SDNode *N);
636 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
637 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
638 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
639 SDValue reduceBuildVecToShuffle(SDNode *N);
640 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
641 ArrayRef<int> VectorMask, SDValue VecIn1,
642 SDValue VecIn2, unsigned LeftIdx,
643 bool DidSplitVec);
644 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
645
646 /// Walk up chain skipping non-aliasing memory nodes,
647 /// looking for aliasing nodes and adding them to the Aliases vector.
648 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
649 SmallVectorImpl<SDValue> &Aliases);
650
651 /// Return true if there is any possibility that the two addresses overlap.
652 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
653
654 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
655 /// chain (aliasing node.)
656 SDValue FindBetterChain(SDNode *N, SDValue Chain);
657
658 /// Try to replace a store and any possibly adjacent stores on
659 /// consecutive chains with better chains. Return true only if St is
660 /// replaced.
661 ///
662 /// Notice that other chains may still be replaced even if the function
663 /// returns false.
664 bool findBetterNeighborChains(StoreSDNode *St);
665
666 // Helper for findBetterNeighborChains. Walk up store chain add additional
667 // chained stores that do not overlap and can be parallelized.
668 bool parallelizeChainedStores(StoreSDNode *St);
669
670 /// Holds a pointer to an LSBaseSDNode as well as information on where it
671 /// is located in a sequence of memory operations connected by a chain.
672 struct MemOpLink {
673 // Ptr to the mem node.
674 LSBaseSDNode *MemNode;
675
676 // Offset from the base ptr.
677 int64_t OffsetFromBase;
678
679 MemOpLink(LSBaseSDNode *N, int64_t Offset)
680 : MemNode(N), OffsetFromBase(Offset) {}
681 };
682
683 // Classify the origin of a stored value.
684 enum class StoreSource { Unknown, Constant, Extract, Load };
685 StoreSource getStoreSource(SDValue StoreVal) {
686 switch (StoreVal.getOpcode()) {
687 case ISD::Constant:
688 case ISD::ConstantFP:
689 return StoreSource::Constant;
692 return StoreSource::Extract;
693 case ISD::LOAD:
694 return StoreSource::Load;
695 default:
696 return StoreSource::Unknown;
697 }
698 }
699
700 /// This is a helper function for visitMUL to check the profitability
701 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
702 /// MulNode is the original multiply, AddNode is (add x, c1),
703 /// and ConstNode is c2.
704 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
705 SDValue ConstNode);
706
707 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
708 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
709 /// the type of the loaded value to be extended.
710 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
711 EVT LoadResultTy, EVT &ExtVT);
712
713 /// Helper function to calculate whether the given Load/Store can have its
714 /// width reduced to ExtVT.
715 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
716 EVT &MemVT, unsigned ShAmt = 0);
717
718 /// Used by BackwardsPropagateMask to find suitable loads.
719 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
720 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
721 ConstantSDNode *Mask, SDNode *&NodeToMask);
722 /// Attempt to propagate a given AND node back to load leaves so that they
723 /// can be combined into narrow loads.
724 bool BackwardsPropagateMask(SDNode *N);
725
726 /// Helper function for mergeConsecutiveStores which merges the component
727 /// store chains.
728 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
729 unsigned NumStores);
730
731 /// Helper function for mergeConsecutiveStores which checks if all the store
732 /// nodes have the same underlying object. We can still reuse the first
733 /// store's pointer info if all the stores are from the same object.
734 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
735
736 /// This is a helper function for mergeConsecutiveStores. When the source
737 /// elements of the consecutive stores are all constants or all extracted
738 /// vector elements, try to merge them into one larger store introducing
739 /// bitcasts if necessary. \return True if a merged store was created.
740 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
741 EVT MemVT, unsigned NumStores,
742 bool IsConstantSrc, bool UseVector,
743 bool UseTrunc);
744
745 /// This is a helper function for mergeConsecutiveStores. Stores that
746 /// potentially may be merged with St are placed in StoreNodes. RootNode is
747 /// a chain predecessor to all store candidates.
748 void getStoreMergeCandidates(StoreSDNode *St,
749 SmallVectorImpl<MemOpLink> &StoreNodes,
750 SDNode *&Root);
751
752 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
753 /// have indirect dependency through their operands. RootNode is the
754 /// predecessor to all stores calculated by getStoreMergeCandidates and is
755 /// used to prune the dependency check. \return True if safe to merge.
756 bool checkMergeStoreCandidatesForDependencies(
757 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
758 SDNode *RootNode);
759
760 /// This is a helper function for mergeConsecutiveStores. Given a list of
761 /// store candidates, find the first N that are consecutive in memory.
762 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
763 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
764 int64_t ElementSizeBytes) const;
765
766 /// This is a helper function for mergeConsecutiveStores. It is used for
767 /// store chains that are composed entirely of constant values.
768 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
769 unsigned NumConsecutiveStores,
770 EVT MemVT, SDNode *Root, bool AllowVectors);
771
772 /// This is a helper function for mergeConsecutiveStores. It is used for
773 /// store chains that are composed entirely of extracted vector elements.
774 /// When extracting multiple vector elements, try to store them in one
775 /// vector store rather than a sequence of scalar stores.
776 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
777 unsigned NumConsecutiveStores, EVT MemVT,
778 SDNode *Root);
779
780 /// This is a helper function for mergeConsecutiveStores. It is used for
781 /// store chains that are composed entirely of loaded values.
782 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
783 unsigned NumConsecutiveStores, EVT MemVT,
784 SDNode *Root, bool AllowVectors,
785 bool IsNonTemporalStore, bool IsNonTemporalLoad);
786
787 /// Merge consecutive store operations into a wide store.
788 /// This optimization uses wide integers or vectors when possible.
789 /// \return true if stores were merged.
790 bool mergeConsecutiveStores(StoreSDNode *St);
791
792 /// Try to transform a truncation where C is a constant:
793 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
794 ///
795 /// \p N needs to be a truncation and its first operand an AND. Other
796 /// requirements are checked by the function (e.g. that trunc is
797 /// single-use) and if missed an empty SDValue is returned.
798 SDValue distributeTruncateThroughAnd(SDNode *N);
799
800 /// Helper function to determine whether the target supports operation
801 /// given by \p Opcode for type \p VT, that is, whether the operation
802 /// is legal or custom before legalizing operations, and whether is
803 /// legal (but not custom) after legalization.
804 bool hasOperation(unsigned Opcode, EVT VT) {
805 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
806 }
807
808 public:
809 /// Runs the dag combiner on all nodes in the work list
810 void Run(CombineLevel AtLevel);
811
812 SelectionDAG &getDAG() const { return DAG; }
813
814 /// Returns a type large enough to hold any valid shift amount - before type
815 /// legalization these can be huge.
816 EVT getShiftAmountTy(EVT LHSTy) {
817 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
818 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
819 }
820
821 /// This method returns true if we are running before type legalization or
822 /// if the specified VT is legal.
823 bool isTypeLegal(const EVT &VT) {
824 if (!LegalTypes) return true;
825 return TLI.isTypeLegal(VT);
826 }
827
828 /// Convenience wrapper around TargetLowering::getSetCCResultType
829 EVT getSetCCResultType(EVT VT) const {
830 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
831 }
832
833 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
834 SDValue OrigLoad, SDValue ExtLoad,
835 ISD::NodeType ExtType);
836 };
837
838/// This class is a DAGUpdateListener that removes any deleted
839/// nodes from the worklist.
840class WorklistRemover : public SelectionDAG::DAGUpdateListener {
841 DAGCombiner &DC;
842
843public:
844 explicit WorklistRemover(DAGCombiner &dc)
845 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
846
847 void NodeDeleted(SDNode *N, SDNode *E) override {
848 DC.removeFromWorklist(N);
849 }
850};
851
852class WorklistInserter : public SelectionDAG::DAGUpdateListener {
853 DAGCombiner &DC;
854
855public:
856 explicit WorklistInserter(DAGCombiner &dc)
857 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
858
859 // FIXME: Ideally we could add N to the worklist, but this causes exponential
860 // compile time costs in large DAGs, e.g. Halide.
861 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
862};
863
864class EmptyMatchContext {
865 SelectionDAG &DAG;
866 const TargetLowering &TLI;
867
868public:
869 EmptyMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
870 : DAG(DAG), TLI(TLI) {}
871
872 bool match(SDValue OpN, unsigned Opcode) const {
873 return Opcode == OpN->getOpcode();
874 }
875
876 // Same as SelectionDAG::getNode().
877 template <typename... ArgT> SDValue getNode(ArgT &&...Args) {
878 return DAG.getNode(std::forward<ArgT>(Args)...);
879 }
880
881 bool isOperationLegalOrCustom(unsigned Op, EVT VT,
882 bool LegalOnly = false) const {
883 return TLI.isOperationLegalOrCustom(Op, VT, LegalOnly);
884 }
885};
886
887class VPMatchContext {
888 SelectionDAG &DAG;
889 const TargetLowering &TLI;
890 SDValue RootMaskOp;
891 SDValue RootVectorLenOp;
892
893public:
894 VPMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
895 : DAG(DAG), TLI(TLI), RootMaskOp(), RootVectorLenOp() {
896 assert(Root->isVPOpcode());
897 if (auto RootMaskPos = ISD::getVPMaskIdx(Root->getOpcode()))
898 RootMaskOp = Root->getOperand(*RootMaskPos);
899
900 if (auto RootVLenPos =
902 RootVectorLenOp = Root->getOperand(*RootVLenPos);
903 }
904
905 /// whether \p OpVal is a node that is functionally compatible with the
906 /// NodeType \p Opc
907 bool match(SDValue OpVal, unsigned Opc) const {
908 if (!OpVal->isVPOpcode())
909 return OpVal->getOpcode() == Opc;
910
911 auto BaseOpc = ISD::getBaseOpcodeForVP(OpVal->getOpcode(),
912 !OpVal->getFlags().hasNoFPExcept());
913 if (BaseOpc != Opc)
914 return false;
915
916 // Make sure the mask of OpVal is true mask or is same as Root's.
917 unsigned VPOpcode = OpVal->getOpcode();
918 if (auto MaskPos = ISD::getVPMaskIdx(VPOpcode)) {
919 SDValue MaskOp = OpVal.getOperand(*MaskPos);
920 if (RootMaskOp != MaskOp &&
922 return false;
923 }
924
925 // Make sure the EVL of OpVal is same as Root's.
926 if (auto VLenPos = ISD::getVPExplicitVectorLengthIdx(VPOpcode))
927 if (RootVectorLenOp != OpVal.getOperand(*VLenPos))
928 return false;
929 return true;
930 }
931
932 // Specialize based on number of operands.
933 // TODO emit VP intrinsics where MaskOp/VectorLenOp != null
934 // SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return
935 // DAG.getNode(Opcode, DL, VT); }
936 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand) {
937 unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
938 assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
939 ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
940 return DAG.getNode(VPOpcode, DL, VT,
941 {Operand, RootMaskOp, RootVectorLenOp});
942 }
943
944 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
945 SDValue N2) {
946 unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
947 assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
948 ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
949 return DAG.getNode(VPOpcode, DL, VT,
950 {N1, N2, RootMaskOp, RootVectorLenOp});
951 }
952
953 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
954 SDValue N2, SDValue N3) {
955 unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
956 assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
957 ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
958 return DAG.getNode(VPOpcode, DL, VT,
959 {N1, N2, N3, RootMaskOp, RootVectorLenOp});
960 }
961
962 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
963 SDNodeFlags Flags) {
964 unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
965 assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
966 ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
967 return DAG.getNode(VPOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp},
968 Flags);
969 }
970
971 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
972 SDValue N2, SDNodeFlags Flags) {
973 unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
974 assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
975 ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
976 return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp},
977 Flags);
978 }
979
980 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
981 SDValue N2, SDValue N3, SDNodeFlags Flags) {
982 unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
983 assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
984 ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
985 return DAG.getNode(VPOpcode, DL, VT,
986 {N1, N2, N3, RootMaskOp, RootVectorLenOp}, Flags);
987 }
988
989 bool isOperationLegalOrCustom(unsigned Op, EVT VT,
990 bool LegalOnly = false) const {
991 unsigned VPOp = ISD::getVPForBaseOpcode(Op);
992 return TLI.isOperationLegalOrCustom(VPOp, VT, LegalOnly);
993 }
994};
995
996} // end anonymous namespace
997
998//===----------------------------------------------------------------------===//
999// TargetLowering::DAGCombinerInfo implementation
1000//===----------------------------------------------------------------------===//
1001
1003 ((DAGCombiner*)DC)->AddToWorklist(N);
1004}
1005
1007CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
1008 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
1009}
1010
1012CombineTo(SDNode *N, SDValue Res, bool AddTo) {
1013 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
1014}
1015
1017CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
1018 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
1019}
1020
1023 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
1024}
1025
1028 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
1029}
1030
1031//===----------------------------------------------------------------------===//
1032// Helper Functions
1033//===----------------------------------------------------------------------===//
1034
1035void DAGCombiner::deleteAndRecombine(SDNode *N) {
1036 removeFromWorklist(N);
1037
1038 // If the operands of this node are only used by the node, they will now be
1039 // dead. Make sure to re-visit them and recursively delete dead nodes.
1040 for (const SDValue &Op : N->ops())
1041 // For an operand generating multiple values, one of the values may
1042 // become dead allowing further simplification (e.g. split index
1043 // arithmetic from an indexed load).
1044 if (Op->hasOneUse() || Op->getNumValues() > 1)
1045 AddToWorklist(Op.getNode());
1046
1047 DAG.DeleteNode(N);
1048}
1049
1050// APInts must be the same size for most operations, this helper
1051// function zero extends the shorter of the pair so that they match.
1052// We provide an Offset so that we can create bitwidths that won't overflow.
1053static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
1054 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
1055 LHS = LHS.zext(Bits);
1056 RHS = RHS.zext(Bits);
1057}
1058
1059// Return true if this node is a setcc, or is a select_cc
1060// that selects between the target values used for true and false, making it
1061// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
1062// the appropriate nodes based on the type of node we are checking. This
1063// simplifies life a bit for the callers.
1064bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
1065 SDValue &CC, bool MatchStrict) const {
1066 if (N.getOpcode() == ISD::SETCC) {
1067 LHS = N.getOperand(0);
1068 RHS = N.getOperand(1);
1069 CC = N.getOperand(2);
1070 return true;
1071 }
1072
1073 if (MatchStrict &&
1074 (N.getOpcode() == ISD::STRICT_FSETCC ||
1075 N.getOpcode() == ISD::STRICT_FSETCCS)) {
1076 LHS = N.getOperand(1);
1077 RHS = N.getOperand(2);
1078 CC = N.getOperand(3);
1079 return true;
1080 }
1081
1082 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
1083 !TLI.isConstFalseVal(N.getOperand(3)))
1084 return false;
1085
1086 if (TLI.getBooleanContents(N.getValueType()) ==
1088 return false;
1089
1090 LHS = N.getOperand(0);
1091 RHS = N.getOperand(1);
1092 CC = N.getOperand(4);
1093 return true;
1094}
1095
1096/// Return true if this is a SetCC-equivalent operation with only one use.
1097/// If this is true, it allows the users to invert the operation for free when
1098/// it is profitable to do so.
1099bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1100 SDValue N0, N1, N2;
1101 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1102 return true;
1103 return false;
1104}
1105
1107 if (!ScalarTy.isSimple())
1108 return false;
1109
1110 uint64_t MaskForTy = 0ULL;
1111 switch (ScalarTy.getSimpleVT().SimpleTy) {
1112 case MVT::i8:
1113 MaskForTy = 0xFFULL;
1114 break;
1115 case MVT::i16:
1116 MaskForTy = 0xFFFFULL;
1117 break;
1118 case MVT::i32:
1119 MaskForTy = 0xFFFFFFFFULL;
1120 break;
1121 default:
1122 return false;
1123 break;
1124 }
1125
1126 APInt Val;
1127 if (ISD::isConstantSplatVector(N, Val))
1128 return Val.getLimitedValue() == MaskForTy;
1129
1130 return false;
1131}
1132
1133// Determines if it is a constant integer or a splat/build vector of constant
1134// integers (and undefs).
1135// Do not permit build vector implicit truncation.
1136static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1137 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1138 return !(Const->isOpaque() && NoOpaques);
1139 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1140 return false;
1141 unsigned BitWidth = N.getScalarValueSizeInBits();
1142 for (const SDValue &Op : N->op_values()) {
1143 if (Op.isUndef())
1144 continue;
1145 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1146 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1147 (Const->isOpaque() && NoOpaques))
1148 return false;
1149 }
1150 return true;
1151}
1152
1153// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1154// undef's.
1155static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1156 if (V.getOpcode() != ISD::BUILD_VECTOR)
1157 return false;
1158 return isConstantOrConstantVector(V, NoOpaques) ||
1160}
1161
1162// Determine if this an indexed load with an opaque target constant index.
1163static bool canSplitIdx(LoadSDNode *LD) {
1164 return MaySplitLoadIndex &&
1165 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1166 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1167}
1168
1169bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1170 const SDLoc &DL,
1171 SDNode *N,
1172 SDValue N0,
1173 SDValue N1) {
1174 // Currently this only tries to ensure we don't undo the GEP splits done by
1175 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1176 // we check if the following transformation would be problematic:
1177 // (load/store (add, (add, x, offset1), offset2)) ->
1178 // (load/store (add, x, offset1+offset2)).
1179
1180 // (load/store (add, (add, x, y), offset2)) ->
1181 // (load/store (add, (add, x, offset2), y)).
1182
1183 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1184 return false;
1185
1186 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1187 if (!C2)
1188 return false;
1189
1190 const APInt &C2APIntVal = C2->getAPIntValue();
1191 if (C2APIntVal.getSignificantBits() > 64)
1192 return false;
1193
1194 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1195 if (N0.hasOneUse())
1196 return false;
1197
1198 const APInt &C1APIntVal = C1->getAPIntValue();
1199 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1200 if (CombinedValueIntVal.getSignificantBits() > 64)
1201 return false;
1202 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1203
1204 for (SDNode *Node : N->uses()) {
1205 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1206 // Is x[offset2] already not a legal addressing mode? If so then
1207 // reassociating the constants breaks nothing (we test offset2 because
1208 // that's the one we hope to fold into the load or store).
1210 AM.HasBaseReg = true;
1211 AM.BaseOffs = C2APIntVal.getSExtValue();
1212 EVT VT = LoadStore->getMemoryVT();
1213 unsigned AS = LoadStore->getAddressSpace();
1214 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1215 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1216 continue;
1217
1218 // Would x[offset1+offset2] still be a legal addressing mode?
1219 AM.BaseOffs = CombinedValue;
1220 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1221 return true;
1222 }
1223 }
1224 } else {
1225 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1226 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1227 return false;
1228
1229 for (SDNode *Node : N->uses()) {
1230 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1231 if (!LoadStore)
1232 return false;
1233
1234 // Is x[offset2] a legal addressing mode? If so then
1235 // reassociating the constants breaks address pattern
1237 AM.HasBaseReg = true;
1238 AM.BaseOffs = C2APIntVal.getSExtValue();
1239 EVT VT = LoadStore->getMemoryVT();
1240 unsigned AS = LoadStore->getAddressSpace();
1241 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1242 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1243 return false;
1244 }
1245 return true;
1246 }
1247
1248 return false;
1249}
1250
1251// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1252// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1253SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1254 SDValue N0, SDValue N1) {
1255 EVT VT = N0.getValueType();
1256
1257 if (N0.getOpcode() != Opc)
1258 return SDValue();
1259
1260 SDValue N00 = N0.getOperand(0);
1261 SDValue N01 = N0.getOperand(1);
1262
1265 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1266 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1267 return DAG.getNode(Opc, DL, VT, N00, OpNode);
1268 return SDValue();
1269 }
1270 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1271 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1272 // iff (op x, c1) has one use
1273 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1);
1274 return DAG.getNode(Opc, DL, VT, OpNode, N01);
1275 }
1276 }
1277
1278 // Check for repeated operand logic simplifications.
1279 if (Opc == ISD::AND || Opc == ISD::OR) {
1280 // (N00 & N01) & N00 --> N00 & N01
1281 // (N00 & N01) & N01 --> N00 & N01
1282 // (N00 | N01) | N00 --> N00 | N01
1283 // (N00 | N01) | N01 --> N00 | N01
1284 if (N1 == N00 || N1 == N01)
1285 return N0;
1286 }
1287 if (Opc == ISD::XOR) {
1288 // (N00 ^ N01) ^ N00 --> N01
1289 if (N1 == N00)
1290 return N01;
1291 // (N00 ^ N01) ^ N01 --> N00
1292 if (N1 == N01)
1293 return N00;
1294 }
1295
1296 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1297 if (N1 != N01) {
1298 // Reassociate if (op N00, N1) already exist
1299 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1300 // if Op (Op N00, N1), N01 already exist
1301 // we need to stop reassciate to avoid dead loop
1302 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1303 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1304 }
1305 }
1306
1307 if (N1 != N00) {
1308 // Reassociate if (op N01, N1) already exist
1309 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1310 // if Op (Op N01, N1), N00 already exist
1311 // we need to stop reassciate to avoid dead loop
1312 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1313 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1314 }
1315 }
1316 }
1317
1318 return SDValue();
1319}
1320
1321// Try to reassociate commutative binops.
1322SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1323 SDValue N1, SDNodeFlags Flags) {
1324 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1325
1326 // Floating-point reassociation is not allowed without loose FP math.
1327 if (N0.getValueType().isFloatingPoint() ||
1329 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1330 return SDValue();
1331
1332 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1333 return Combined;
1334 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1335 return Combined;
1336 return SDValue();
1337}
1338
1339// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1340// Note that we only expect Flags to be passed from FP operations. For integer
1341// operations they need to be dropped.
1342SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1343 const SDLoc &DL, EVT VT, SDValue N0,
1344 SDValue N1, SDNodeFlags Flags) {
1345 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1346 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1347 N0->hasOneUse() && N1->hasOneUse() &&
1349 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1350 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1351 return DAG.getNode(RedOpc, DL, VT,
1352 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1353 N0.getOperand(0), N1.getOperand(0)));
1354 }
1355 return SDValue();
1356}
1357
1358SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1359 bool AddTo) {
1360 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1361 ++NodesCombined;
1362 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1363 To[0].dump(&DAG);
1364 dbgs() << " and " << NumTo - 1 << " other values\n");
1365 for (unsigned i = 0, e = NumTo; i != e; ++i)
1366 assert((!To[i].getNode() ||
1367 N->getValueType(i) == To[i].getValueType()) &&
1368 "Cannot combine value to value of different type!");
1369
1370 WorklistRemover DeadNodes(*this);
1371 DAG.ReplaceAllUsesWith(N, To);
1372 if (AddTo) {
1373 // Push the new nodes and any users onto the worklist
1374 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1375 if (To[i].getNode())
1376 AddToWorklistWithUsers(To[i].getNode());
1377 }
1378 }
1379
1380 // Finally, if the node is now dead, remove it from the graph. The node
1381 // may not be dead if the replacement process recursively simplified to
1382 // something else needing this node.
1383 if (N->use_empty())
1384 deleteAndRecombine(N);
1385 return SDValue(N, 0);
1386}
1387
1388void DAGCombiner::
1389CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1390 // Replace the old value with the new one.
1391 ++NodesCombined;
1392 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1393 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1394
1395 // Replace all uses.
1396 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1397
1398 // Push the new node and any (possibly new) users onto the worklist.
1399 AddToWorklistWithUsers(TLO.New.getNode());
1400
1401 // Finally, if the node is now dead, remove it from the graph.
1402 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1403}
1404
1405/// Check the specified integer node value to see if it can be simplified or if
1406/// things it uses can be simplified by bit propagation. If so, return true.
1407bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1408 const APInt &DemandedElts,
1409 bool AssumeSingleUse) {
1410 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1411 KnownBits Known;
1412 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1413 AssumeSingleUse))
1414 return false;
1415
1416 // Revisit the node.
1417 AddToWorklist(Op.getNode());
1418
1419 CommitTargetLoweringOpt(TLO);
1420 return true;
1421}
1422
1423/// Check the specified vector node value to see if it can be simplified or
1424/// if things it uses can be simplified as it only uses some of the elements.
1425/// If so, return true.
1426bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1427 const APInt &DemandedElts,
1428 bool AssumeSingleUse) {
1429 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1430 APInt KnownUndef, KnownZero;
1431 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1432 TLO, 0, AssumeSingleUse))
1433 return false;
1434
1435 // Revisit the node.
1436 AddToWorklist(Op.getNode());
1437
1438 CommitTargetLoweringOpt(TLO);
1439 return true;
1440}
1441
1442void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1443 SDLoc DL(Load);
1444 EVT VT = Load->getValueType(0);
1445 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1446
1447 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1448 Trunc.dump(&DAG); dbgs() << '\n');
1449
1450 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1451 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1452
1453 AddToWorklist(Trunc.getNode());
1454 recursivelyDeleteUnusedNodes(Load);
1455}
1456
1457SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1458 Replace = false;
1459 SDLoc DL(Op);
1460 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1461 LoadSDNode *LD = cast<LoadSDNode>(Op);
1462 EVT MemVT = LD->getMemoryVT();
1464 : LD->getExtensionType();
1465 Replace = true;
1466 return DAG.getExtLoad(ExtType, DL, PVT,
1467 LD->getChain(), LD->getBasePtr(),
1468 MemVT, LD->getMemOperand());
1469 }
1470
1471 unsigned Opc = Op.getOpcode();
1472 switch (Opc) {
1473 default: break;
1474 case ISD::AssertSext:
1475 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1476 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1477 break;
1478 case ISD::AssertZext:
1479 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1480 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1481 break;
1482 case ISD::Constant: {
1483 unsigned ExtOpc =
1484 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1485 return DAG.getNode(ExtOpc, DL, PVT, Op);
1486 }
1487 }
1488
1489 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1490 return SDValue();
1491 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1492}
1493
1494SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1496 return SDValue();
1497 EVT OldVT = Op.getValueType();
1498 SDLoc DL(Op);
1499 bool Replace = false;
1500 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1501 if (!NewOp.getNode())
1502 return SDValue();
1503 AddToWorklist(NewOp.getNode());
1504
1505 if (Replace)
1506 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1507 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1508 DAG.getValueType(OldVT));
1509}
1510
1511SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1512 EVT OldVT = Op.getValueType();
1513 SDLoc DL(Op);
1514 bool Replace = false;
1515 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1516 if (!NewOp.getNode())
1517 return SDValue();
1518 AddToWorklist(NewOp.getNode());
1519
1520 if (Replace)
1521 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1522 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1523}
1524
1525/// Promote the specified integer binary operation if the target indicates it is
1526/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1527/// i32 since i16 instructions are longer.
1528SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1529 if (!LegalOperations)
1530 return SDValue();
1531
1532 EVT VT = Op.getValueType();
1533 if (VT.isVector() || !VT.isInteger())
1534 return SDValue();
1535
1536 // If operation type is 'undesirable', e.g. i16 on x86, consider
1537 // promoting it.
1538 unsigned Opc = Op.getOpcode();
1539 if (TLI.isTypeDesirableForOp(Opc, VT))
1540 return SDValue();
1541
1542 EVT PVT = VT;
1543 // Consult target whether it is a good idea to promote this operation and
1544 // what's the right type to promote it to.
1545 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1546 assert(PVT != VT && "Don't know what type to promote to!");
1547
1548 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1549
1550 bool Replace0 = false;
1551 SDValue N0 = Op.getOperand(0);
1552 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1553
1554 bool Replace1 = false;
1555 SDValue N1 = Op.getOperand(1);
1556 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1557 SDLoc DL(Op);
1558
1559 SDValue RV =
1560 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1561
1562 // We are always replacing N0/N1's use in N and only need additional
1563 // replacements if there are additional uses.
1564 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1565 // (SDValue) here because the node may reference multiple values
1566 // (for example, the chain value of a load node).
1567 Replace0 &= !N0->hasOneUse();
1568 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1569
1570 // Combine Op here so it is preserved past replacements.
1571 CombineTo(Op.getNode(), RV);
1572
1573 // If operands have a use ordering, make sure we deal with
1574 // predecessor first.
1575 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1576 std::swap(N0, N1);
1577 std::swap(NN0, NN1);
1578 }
1579
1580 if (Replace0) {
1581 AddToWorklist(NN0.getNode());
1582 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1583 }
1584 if (Replace1) {
1585 AddToWorklist(NN1.getNode());
1586 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1587 }
1588 return Op;
1589 }
1590 return SDValue();
1591}
1592
1593/// Promote the specified integer shift operation if the target indicates it is
1594/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1595/// i32 since i16 instructions are longer.
1596SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1597 if (!LegalOperations)
1598 return SDValue();
1599
1600 EVT VT = Op.getValueType();
1601 if (VT.isVector() || !VT.isInteger())
1602 return SDValue();
1603
1604 // If operation type is 'undesirable', e.g. i16 on x86, consider
1605 // promoting it.
1606 unsigned Opc = Op.getOpcode();
1607 if (TLI.isTypeDesirableForOp(Opc, VT))
1608 return SDValue();
1609
1610 EVT PVT = VT;
1611 // Consult target whether it is a good idea to promote this operation and
1612 // what's the right type to promote it to.
1613 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1614 assert(PVT != VT && "Don't know what type to promote to!");
1615
1616 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1617
1618 bool Replace = false;
1619 SDValue N0 = Op.getOperand(0);
1620 if (Opc == ISD::SRA)
1621 N0 = SExtPromoteOperand(N0, PVT);
1622 else if (Opc == ISD::SRL)
1623 N0 = ZExtPromoteOperand(N0, PVT);
1624 else
1625 N0 = PromoteOperand(N0, PVT, Replace);
1626
1627 if (!N0.getNode())
1628 return SDValue();
1629
1630 SDLoc DL(Op);
1631 SDValue N1 = Op.getOperand(1);
1632 SDValue RV =
1633 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1634
1635 if (Replace)
1636 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1637
1638 // Deal with Op being deleted.
1639 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1640 return RV;
1641 }
1642 return SDValue();
1643}
1644
1645SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1646 if (!LegalOperations)
1647 return SDValue();
1648
1649 EVT VT = Op.getValueType();
1650 if (VT.isVector() || !VT.isInteger())
1651 return SDValue();
1652
1653 // If operation type is 'undesirable', e.g. i16 on x86, consider
1654 // promoting it.
1655 unsigned Opc = Op.getOpcode();
1656 if (TLI.isTypeDesirableForOp(Opc, VT))
1657 return SDValue();
1658
1659 EVT PVT = VT;
1660 // Consult target whether it is a good idea to promote this operation and
1661 // what's the right type to promote it to.
1662 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1663 assert(PVT != VT && "Don't know what type to promote to!");
1664 // fold (aext (aext x)) -> (aext x)
1665 // fold (aext (zext x)) -> (zext x)
1666 // fold (aext (sext x)) -> (sext x)
1667 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1668 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1669 }
1670 return SDValue();
1671}
1672
1673bool DAGCombiner::PromoteLoad(SDValue Op) {
1674 if (!LegalOperations)
1675 return false;
1676
1677 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1678 return false;
1679
1680 EVT VT = Op.getValueType();
1681 if (VT.isVector() || !VT.isInteger())
1682 return false;
1683
1684 // If operation type is 'undesirable', e.g. i16 on x86, consider
1685 // promoting it.
1686 unsigned Opc = Op.getOpcode();
1687 if (TLI.isTypeDesirableForOp(Opc, VT))
1688 return false;
1689
1690 EVT PVT = VT;
1691 // Consult target whether it is a good idea to promote this operation and
1692 // what's the right type to promote it to.
1693 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1694 assert(PVT != VT && "Don't know what type to promote to!");
1695
1696 SDLoc DL(Op);
1697 SDNode *N = Op.getNode();
1698 LoadSDNode *LD = cast<LoadSDNode>(N);
1699 EVT MemVT = LD->getMemoryVT();
1701 : LD->getExtensionType();
1702 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1703 LD->getChain(), LD->getBasePtr(),
1704 MemVT, LD->getMemOperand());
1705 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1706
1707 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1708 Result.dump(&DAG); dbgs() << '\n');
1709
1711 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1712
1713 AddToWorklist(Result.getNode());
1714 recursivelyDeleteUnusedNodes(N);
1715 return true;
1716 }
1717
1718 return false;
1719}
1720
1721/// Recursively delete a node which has no uses and any operands for
1722/// which it is the only use.
1723///
1724/// Note that this both deletes the nodes and removes them from the worklist.
1725/// It also adds any nodes who have had a user deleted to the worklist as they
1726/// may now have only one use and subject to other combines.
1727bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1728 if (!N->use_empty())
1729 return false;
1730
1732 Nodes.insert(N);
1733 do {
1734 N = Nodes.pop_back_val();
1735 if (!N)
1736 continue;
1737
1738 if (N->use_empty()) {
1739 for (const SDValue &ChildN : N->op_values())
1740 Nodes.insert(ChildN.getNode());
1741
1742 removeFromWorklist(N);
1743 DAG.DeleteNode(N);
1744 } else {
1745 AddToWorklist(N);
1746 }
1747 } while (!Nodes.empty());
1748 return true;
1749}
1750
1751//===----------------------------------------------------------------------===//
1752// Main DAG Combiner implementation
1753//===----------------------------------------------------------------------===//
1754
1755void DAGCombiner::Run(CombineLevel AtLevel) {
1756 // set the instance variables, so that the various visit routines may use it.
1757 Level = AtLevel;
1758 LegalDAG = Level >= AfterLegalizeDAG;
1759 LegalOperations = Level >= AfterLegalizeVectorOps;
1760 LegalTypes = Level >= AfterLegalizeTypes;
1761
1762 WorklistInserter AddNodes(*this);
1763
1764 // Add all the dag nodes to the worklist.
1765 for (SDNode &Node : DAG.allnodes())
1766 AddToWorklist(&Node);
1767
1768 // Create a dummy node (which is not added to allnodes), that adds a reference
1769 // to the root node, preventing it from being deleted, and tracking any
1770 // changes of the root.
1771 HandleSDNode Dummy(DAG.getRoot());
1772
1773 // While we have a valid worklist entry node, try to combine it.
1774 while (SDNode *N = getNextWorklistEntry()) {
1775 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1776 // N is deleted from the DAG, since they too may now be dead or may have a
1777 // reduced number of uses, allowing other xforms.
1778 if (recursivelyDeleteUnusedNodes(N))
1779 continue;
1780
1781 WorklistRemover DeadNodes(*this);
1782
1783 // If this combine is running after legalizing the DAG, re-legalize any
1784 // nodes pulled off the worklist.
1785 if (LegalDAG) {
1786 SmallSetVector<SDNode *, 16> UpdatedNodes;
1787 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1788
1789 for (SDNode *LN : UpdatedNodes)
1790 AddToWorklistWithUsers(LN);
1791
1792 if (!NIsValid)
1793 continue;
1794 }
1795
1796 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1797
1798 // Add any operands of the new node which have not yet been combined to the
1799 // worklist as well. Because the worklist uniques things already, this
1800 // won't repeatedly process the same operand.
1801 CombinedNodes.insert(N);
1802 for (const SDValue &ChildN : N->op_values())
1803 if (!CombinedNodes.count(ChildN.getNode()))
1804 AddToWorklist(ChildN.getNode());
1805
1806 SDValue RV = combine(N);
1807
1808 if (!RV.getNode())
1809 continue;
1810
1811 ++NodesCombined;
1812
1813 // If we get back the same node we passed in, rather than a new node or
1814 // zero, we know that the node must have defined multiple values and
1815 // CombineTo was used. Since CombineTo takes care of the worklist
1816 // mechanics for us, we have no work to do in this case.
1817 if (RV.getNode() == N)
1818 continue;
1819
1820 assert(N->getOpcode() != ISD::DELETED_NODE &&
1821 RV.getOpcode() != ISD::DELETED_NODE &&
1822 "Node was deleted but visit returned new node!");
1823
1824 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1825
1826 if (N->getNumValues() == RV->getNumValues())
1827 DAG.ReplaceAllUsesWith(N, RV.getNode());
1828 else {
1829 assert(N->getValueType(0) == RV.getValueType() &&
1830 N->getNumValues() == 1 && "Type mismatch");
1831 DAG.ReplaceAllUsesWith(N, &RV);
1832 }
1833
1834 // Push the new node and any users onto the worklist. Omit this if the
1835 // new node is the EntryToken (e.g. if a store managed to get optimized
1836 // out), because re-visiting the EntryToken and its users will not uncover
1837 // any additional opportunities, but there may be a large number of such
1838 // users, potentially causing compile time explosion.
1839 if (RV.getOpcode() != ISD::EntryToken) {
1840 AddToWorklist(RV.getNode());
1841 AddUsersToWorklist(RV.getNode());
1842 }
1843
1844 // Finally, if the node is now dead, remove it from the graph. The node
1845 // may not be dead if the replacement process recursively simplified to
1846 // something else needing this node. This will also take care of adding any
1847 // operands which have lost a user to the worklist.
1848 recursivelyDeleteUnusedNodes(N);
1849 }
1850
1851 // If the root changed (e.g. it was a dead load, update the root).
1852 DAG.setRoot(Dummy.getValue());
1853 DAG.RemoveDeadNodes();
1854}
1855
1856SDValue DAGCombiner::visit(SDNode *N) {
1857 switch (N->getOpcode()) {
1858 default: break;
1859 case ISD::TokenFactor: return visitTokenFactor(N);
1860 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1861 case ISD::ADD: return visitADD(N);
1862 case ISD::SUB: return visitSUB(N);
1863 case ISD::SADDSAT:
1864 case ISD::UADDSAT: return visitADDSAT(N);
1865 case ISD::SSUBSAT:
1866 case ISD::USUBSAT: return visitSUBSAT(N);
1867 case ISD::ADDC: return visitADDC(N);
1868 case ISD::SADDO:
1869 case ISD::UADDO: return visitADDO(N);
1870 case ISD::SUBC: return visitSUBC(N);
1871 case ISD::SSUBO:
1872 case ISD::USUBO: return visitSUBO(N);
1873 case ISD::ADDE: return visitADDE(N);
1874 case ISD::ADDCARRY: return visitADDCARRY(N);
1875 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1876 case ISD::SUBE: return visitSUBE(N);
1877 case ISD::SUBCARRY: return visitSUBCARRY(N);
1878 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1879 case ISD::SMULFIX:
1880 case ISD::SMULFIXSAT:
1881 case ISD::UMULFIX:
1882 case ISD::UMULFIXSAT: return visitMULFIX(N);
1883 case ISD::MUL: return visitMUL(N);
1884 case ISD::SDIV: return visitSDIV(N);
1885 case ISD::UDIV: return visitUDIV(N);
1886 case ISD::SREM:
1887 case ISD::UREM: return visitREM(N);
1888 case ISD::MULHU: return visitMULHU(N);
1889 case ISD::MULHS: return visitMULHS(N);
1890 case ISD::AVGFLOORS:
1891 case ISD::AVGFLOORU:
1892 case ISD::AVGCEILS:
1893 case ISD::AVGCEILU: return visitAVG(N);
1894 case ISD::ABDS:
1895 case ISD::ABDU: return visitABD(N);
1896 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1897 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1898 case ISD::SMULO:
1899 case ISD::UMULO: return visitMULO(N);
1900 case ISD::SMIN:
1901 case ISD::SMAX:
1902 case ISD::UMIN:
1903 case ISD::UMAX: return visitIMINMAX(N);
1904 case ISD::AND: return visitAND(N);
1905 case ISD::OR: return visitOR(N);
1906 case ISD::XOR: return visitXOR(N);
1907 case ISD::SHL: return visitSHL(N);
1908 case ISD::SRA: return visitSRA(N);
1909 case ISD::SRL: return visitSRL(N);
1910 case ISD::ROTR:
1911 case ISD::ROTL: return visitRotate(N);
1912 case ISD::FSHL:
1913 case ISD::FSHR: return visitFunnelShift(N);
1914 case ISD::SSHLSAT:
1915 case ISD::USHLSAT: return visitSHLSAT(N);
1916 case ISD::ABS: return visitABS(N);
1917 case ISD::BSWAP: return visitBSWAP(N);
1918 case ISD::BITREVERSE: return visitBITREVERSE(N);
1919 case ISD::CTLZ: return visitCTLZ(N);
1920 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1921 case ISD::CTTZ: return visitCTTZ(N);
1922 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1923 case ISD::CTPOP: return visitCTPOP(N);
1924 case ISD::SELECT: return visitSELECT(N);
1925 case ISD::VSELECT: return visitVSELECT(N);
1926 case ISD::SELECT_CC: return visitSELECT_CC(N);
1927 case ISD::SETCC: return visitSETCC(N);
1928 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1929 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1930 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1931 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1932 case ISD::AssertSext:
1933 case ISD::AssertZext: return visitAssertExt(N);
1934 case ISD::AssertAlign: return visitAssertAlign(N);
1935 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1938 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1939 case ISD::TRUNCATE: return visitTRUNCATE(N);
1940 case ISD::BITCAST: return visitBITCAST(N);
1941 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1942 case ISD::FADD: return visitFADD(N);
1943 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1944 case ISD::FSUB: return visitFSUB(N);
1945 case ISD::FMUL: return visitFMUL(N);
1946 case ISD::FMA: return visitFMA(N);
1947 case ISD::FDIV: return visitFDIV(N);
1948 case ISD::FREM: return visitFREM(N);
1949 case ISD::FSQRT: return visitFSQRT(N);
1950 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1951 case ISD::FPOW: return visitFPOW(N);
1952 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1953 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1954 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1955 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1956 case ISD::FP_ROUND: return visitFP_ROUND(N);
1957 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1958 case ISD::FNEG: return visitFNEG(N);
1959 case ISD::FABS: return visitFABS(N);
1960 case ISD::FFLOOR: return visitFFLOOR(N);
1961 case ISD::FMINNUM:
1962 case ISD::FMAXNUM:
1963 case ISD::FMINIMUM:
1964 case ISD::FMAXIMUM: return visitFMinMax(N);
1965 case ISD::FCEIL: return visitFCEIL(N);
1966 case ISD::FTRUNC: return visitFTRUNC(N);
1967 case ISD::BRCOND: return visitBRCOND(N);
1968 case ISD::BR_CC: return visitBR_CC(N);
1969 case ISD::LOAD: return visitLOAD(N);
1970 case ISD::STORE: return visitSTORE(N);
1971 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1972 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1973 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1974 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1975 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1976 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1977 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1978 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1979 case ISD::MGATHER: return visitMGATHER(N);
1980 case ISD::MLOAD: return visitMLOAD(N);
1981 case ISD::MSCATTER: return visitMSCATTER(N);
1982 case ISD::MSTORE: return visitMSTORE(N);
1983 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1984 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1985 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1986 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1987 case ISD::FREEZE: return visitFREEZE(N);
1990 case ISD::VECREDUCE_ADD:
1991 case ISD::VECREDUCE_MUL:
1992 case ISD::VECREDUCE_AND:
1993 case ISD::VECREDUCE_OR:
1994 case ISD::VECREDUCE_XOR:
2000 case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
2001#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2002#include "llvm/IR/VPIntrinsics.def"
2003 return visitVPOp(N);
2004 }
2005 return SDValue();
2006}
2007
2008SDValue DAGCombiner::combine(SDNode *N) {
2009 SDValue RV;
2010 if (!DisableGenericCombines)
2011 RV = visit(N);
2012
2013 // If nothing happened, try a target-specific DAG combine.
2014 if (!RV.getNode()) {
2015 assert(N->getOpcode() != ISD::DELETED_NODE &&
2016 "Node was deleted but visit returned NULL!");
2017
2018 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2019 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2020
2021 // Expose the DAG combiner to the target combiner impls.
2023 DagCombineInfo(DAG, Level, false, this);
2024
2025 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2026 }
2027 }
2028
2029 // If nothing happened still, try promoting the operation.
2030 if (!RV.getNode()) {
2031 switch (N->getOpcode()) {
2032 default: break;
2033 case ISD::ADD:
2034 case ISD::SUB:
2035 case ISD::MUL:
2036 case ISD::AND:
2037 case ISD::OR:
2038 case ISD::XOR:
2039 RV = PromoteIntBinOp(SDValue(N, 0));
2040 break;
2041 case ISD::SHL:
2042 case ISD::SRA:
2043 case ISD::SRL:
2044 RV = PromoteIntShiftOp(SDValue(N, 0));
2045 break;
2046 case ISD::SIGN_EXTEND:
2047 case ISD::ZERO_EXTEND:
2048 case ISD::ANY_EXTEND:
2049 RV = PromoteExtend(SDValue(N, 0));
2050 break;
2051 case ISD::LOAD:
2052 if (PromoteLoad(SDValue(N, 0)))
2053 RV = SDValue(N, 0);
2054 break;
2055 }
2056 }
2057
2058 // If N is a commutative binary node, try to eliminate it if the commuted
2059 // version is already present in the DAG.
2060 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2061 SDValue N0 = N->getOperand(0);
2062 SDValue N1 = N->getOperand(1);
2063
2064 // Constant operands are canonicalized to RHS.
2065 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2066 SDValue Ops[] = {N1, N0};
2067 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2068 N->getFlags());
2069 if (CSENode)
2070 return SDValue(CSENode, 0);
2071 }
2072 }
2073
2074 return RV;
2075}
2076
2077/// Given a node, return its input chain if it has one, otherwise return a null
2078/// sd operand.
2080 if (unsigned NumOps = N->getNumOperands()) {
2081 if (N->getOperand(0).getValueType() == MVT::Other)
2082 return N->getOperand(0);
2083 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2084 return N->getOperand(NumOps-1);
2085 for (unsigned i = 1; i < NumOps-1; ++i)
2086 if (N->getOperand(i).getValueType() == MVT::Other)
2087 return N->getOperand(i);
2088 }
2089 return SDValue();
2090}
2091
2092SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2093 // If N has two operands, where one has an input chain equal to the other,
2094 // the 'other' chain is redundant.
2095 if (N->getNumOperands() == 2) {
2096 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2097 return N->getOperand(0);
2098 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2099 return N->getOperand(1);
2100 }
2101
2102 // Don't simplify token factors if optnone.
2103 if (OptLevel == CodeGenOpt::None)
2104 return SDValue();
2105
2106 // Don't simplify the token factor if the node itself has too many operands.
2107 if (N->getNumOperands() > TokenFactorInlineLimit)
2108 return SDValue();
2109
2110 // If the sole user is a token factor, we should make sure we have a
2111 // chance to merge them together. This prevents TF chains from inhibiting
2112 // optimizations.
2113 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
2114 AddToWorklist(*(N->use_begin()));
2115
2116 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2117 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2119 bool Changed = false; // If we should replace this token factor.
2120
2121 // Start out with this token factor.
2122 TFs.push_back(N);
2123
2124 // Iterate through token factors. The TFs grows when new token factors are
2125 // encountered.
2126 for (unsigned i = 0; i < TFs.size(); ++i) {
2127 // Limit number of nodes to inline, to avoid quadratic compile times.
2128 // We have to add the outstanding Token Factors to Ops, otherwise we might
2129 // drop Ops from the resulting Token Factors.
2130 if (Ops.size() > TokenFactorInlineLimit) {
2131 for (unsigned j = i; j < TFs.size(); j++)
2132 Ops.emplace_back(TFs[j], 0);
2133 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2134 // combiner worklist later.
2135 TFs.resize(i);
2136 break;
2137 }
2138
2139 SDNode *TF = TFs[i];
2140 // Check each of the operands.
2141 for (const SDValue &Op : TF->op_values()) {
2142 switch (Op.getOpcode()) {
2143 case ISD::EntryToken:
2144 // Entry tokens don't need to be added to the list. They are
2145 // redundant.
2146 Changed = true;
2147 break;
2148
2149 case ISD::TokenFactor:
2150 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2151 // Queue up for processing.
2152 TFs.push_back(Op.getNode());
2153 Changed = true;
2154 break;
2155 }
2156 [[fallthrough]];
2157
2158 default:
2159 // Only add if it isn't already in the list.
2160 if (SeenOps.insert(Op.getNode()).second)
2161 Ops.push_back(Op);
2162 else
2163 Changed = true;
2164 break;
2165 }
2166 }
2167 }
2168
2169 // Re-visit inlined Token Factors, to clean them up in case they have been
2170 // removed. Skip the first Token Factor, as this is the current node.
2171 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2172 AddToWorklist(TFs[i]);
2173
2174 // Remove Nodes that are chained to another node in the list. Do so
2175 // by walking up chains breath-first stopping when we've seen
2176 // another operand. In general we must climb to the EntryNode, but we can exit
2177 // early if we find all remaining work is associated with just one operand as
2178 // no further pruning is possible.
2179
2180 // List of nodes to search through and original Ops from which they originate.
2182 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2183 SmallPtrSet<SDNode *, 16> SeenChains;
2184 bool DidPruneOps = false;
2185
2186 unsigned NumLeftToConsider = 0;
2187 for (const SDValue &Op : Ops) {
2188 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2189 OpWorkCount.push_back(1);
2190 }
2191
2192 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2193 // If this is an Op, we can remove the op from the list. Remark any
2194 // search associated with it as from the current OpNumber.
2195 if (SeenOps.contains(Op)) {
2196 Changed = true;
2197 DidPruneOps = true;
2198 unsigned OrigOpNumber = 0;
2199 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2200 OrigOpNumber++;
2201 assert((OrigOpNumber != Ops.size()) &&
2202 "expected to find TokenFactor Operand");
2203 // Re-mark worklist from OrigOpNumber to OpNumber
2204 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2205 if (Worklist[i].second == OrigOpNumber) {
2206 Worklist[i].second = OpNumber;
2207 }
2208 }
2209 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2210 OpWorkCount[OrigOpNumber] = 0;
2211 NumLeftToConsider--;
2212 }
2213 // Add if it's a new chain
2214 if (SeenChains.insert(Op).second) {
2215 OpWorkCount[OpNumber]++;
2216 Worklist.push_back(std::make_pair(Op, OpNumber));
2217 }
2218 };
2219
2220 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2221 // We need at least be consider at least 2 Ops to prune.
2222 if (NumLeftToConsider <= 1)
2223 break;
2224 auto CurNode = Worklist[i].first;
2225 auto CurOpNumber = Worklist[i].second;
2226 assert((OpWorkCount[CurOpNumber] > 0) &&
2227 "Node should not appear in worklist");
2228 switch (CurNode->getOpcode()) {
2229 case ISD::EntryToken:
2230 // Hitting EntryToken is the only way for the search to terminate without
2231 // hitting
2232 // another operand's search. Prevent us from marking this operand
2233 // considered.
2234 NumLeftToConsider++;
2235 break;
2236 case ISD::TokenFactor:
2237 for (const SDValue &Op : CurNode->op_values())
2238 AddToWorklist(i, Op.getNode(), CurOpNumber);
2239 break;
2241 case ISD::LIFETIME_END:
2242 case ISD::CopyFromReg:
2243 case ISD::CopyToReg:
2244 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2245 break;
2246 default:
2247 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2248 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2249 break;
2250 }
2251 OpWorkCount[CurOpNumber]--;
2252 if (OpWorkCount[CurOpNumber] == 0)
2253 NumLeftToConsider--;
2254 }
2255
2256 // If we've changed things around then replace token factor.
2257 if (Changed) {
2259 if (Ops.empty()) {
2260 // The entry token is the only possible outcome.
2261 Result = DAG.getEntryNode();
2262 } else {
2263 if (DidPruneOps) {
2264 SmallVector<SDValue, 8> PrunedOps;
2265 //
2266 for (const SDValue &Op : Ops) {
2267 if (SeenChains.count(Op.getNode()) == 0)
2268 PrunedOps.push_back(Op);
2269 }
2270 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2271 } else {
2272 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2273 }
2274 }
2275 return Result;
2276 }
2277 return SDValue();
2278}
2279
2280/// MERGE_VALUES can always be eliminated.
2281SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2282 WorklistRemover DeadNodes(*this);
2283 // Replacing results may cause a different MERGE_VALUES to suddenly
2284 // be CSE'd with N, and carry its uses with it. Iterate until no
2285 // uses remain, to ensure that the node can be safely deleted.
2286 // First add the users of this node to the work list so that they
2287 // can be tried again once they have new operands.
2288 AddUsersToWorklist(N);
2289 do {
2290 // Do as a single replacement to avoid rewalking use lists.
2292 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2293 Ops.push_back(N->getOperand(i));
2294 DAG.ReplaceAllUsesWith(N, Ops.data());
2295 } while (!N->use_empty());
2296 deleteAndRecombine(N);
2297 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2298}
2299
2300/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2301/// ConstantSDNode pointer else nullptr.
2303 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2304 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2305}
2306
2307/// Return true if 'Use' is a load or a store that uses N as its base pointer
2308/// and that N may be folded in the load / store addressing mode.
2310 const TargetLowering &TLI) {
2311 EVT VT;
2312 unsigned AS;
2313
2314 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2315 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2316 return false;
2317 VT = LD->getMemoryVT();
2318 AS = LD->getAddressSpace();
2319 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2320 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2321 return false;
2322 VT = ST->getMemoryVT();
2323 AS = ST->getAddressSpace();
2324 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2325 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2326 return false;
2327 VT = LD->getMemoryVT();
2328 AS = LD->getAddressSpace();
2329 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2330 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2331 return false;
2332 VT = ST->getMemoryVT();
2333 AS = ST->getAddressSpace();
2334 } else {
2335 return false;
2336 }
2337
2339 if (N->getOpcode() == ISD::ADD) {
2340 AM.HasBaseReg = true;
2341 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2342 if (Offset)
2343 // [reg +/- imm]
2344 AM.BaseOffs = Offset->getSExtValue();
2345 else
2346 // [reg +/- reg]
2347 AM.Scale = 1;
2348 } else if (N->getOpcode() == ISD::SUB) {
2349 AM.HasBaseReg = true;
2350 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2351 if (Offset)
2352 // [reg +/- imm]
2353 AM.BaseOffs = -Offset->getSExtValue();
2354 else
2355 // [reg +/- reg]
2356 AM.Scale = 1;
2357 } else {
2358 return false;
2359 }
2360
2361 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2362 VT.getTypeForEVT(*DAG.getContext()), AS);
2363}
2364
2365/// This inverts a canonicalization in IR that replaces a variable select arm
2366/// with an identity constant. Codegen improves if we re-use the variable
2367/// operand rather than load a constant. This can also be converted into a
2368/// masked vector operation if the target supports it.
2370 bool ShouldCommuteOperands) {
2371 // Match a select as operand 1. The identity constant that we are looking for
2372 // is only valid as operand 1 of a non-commutative binop.
2373 SDValue N0 = N->getOperand(0);
2374 SDValue N1 = N->getOperand(1);
2375 if (ShouldCommuteOperands)
2376 std::swap(N0, N1);
2377
2378 // TODO: Should this apply to scalar select too?
2379 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2380 return SDValue();
2381
2382 // We can't hoist div/rem because of immediate UB (not speculatable).
2383 unsigned Opcode = N->getOpcode();
2384 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
2385 return SDValue();
2386
2387 EVT VT = N->getValueType(0);
2388 SDValue Cond = N1.getOperand(0);
2389 SDValue TVal = N1.getOperand(1);
2390 SDValue FVal = N1.getOperand(2);
2391
2392 // This transform increases uses of N0, so freeze it to be safe.
2393 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2394 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2395 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2396 SDValue F0 = DAG.getFreeze(N0);
2397 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2398 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2399 }
2400 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2401 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2402 SDValue F0 = DAG.getFreeze(N0);
2403 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2404 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2405 }
2406
2407 return SDValue();
2408}
2409
2410SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2411 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2412 "Unexpected binary operator");
2413
2414 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2415 auto BinOpcode = BO->getOpcode();
2416 EVT VT = BO->getValueType(0);
2417 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2418 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2419 return Sel;
2420
2421 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2422 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2423 return Sel;
2424 }
2425
2426 // Don't do this unless the old select is going away. We want to eliminate the
2427 // binary operator, not replace a binop with a select.
2428 // TODO: Handle ISD::SELECT_CC.
2429 unsigned SelOpNo = 0;
2430 SDValue Sel = BO->getOperand(0);
2431 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2432 SelOpNo = 1;
2433 Sel = BO->getOperand(1);
2434 }
2435
2436 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2437 return SDValue();
2438
2439 SDValue CT = Sel.getOperand(1);
2440 if (!isConstantOrConstantVector(CT, true) &&
2442 return SDValue();
2443
2444 SDValue CF = Sel.getOperand(2);
2445 if (!isConstantOrConstantVector(CF, true) &&
2447 return SDValue();
2448
2449 // Bail out if any constants are opaque because we can't constant fold those.
2450 // The exception is "and" and "or" with either 0 or -1 in which case we can
2451 // propagate non constant operands into select. I.e.:
2452 // and (select Cond, 0, -1), X --> select Cond, 0, X
2453 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2454 bool CanFoldNonConst =
2455 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2458
2459 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2460 if (!CanFoldNonConst &&
2461 !isConstantOrConstantVector(CBO, true) &&
2463 return SDValue();
2464
2465 SDLoc DL(Sel);
2466 SDValue NewCT, NewCF;
2467
2468 if (CanFoldNonConst) {
2469 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2470 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2471 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2472 NewCT = CT;
2473 else
2474 NewCT = CBO;
2475
2476 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2477 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2478 NewCF = CF;
2479 else
2480 NewCF = CBO;
2481 } else {
2482 // We have a select-of-constants followed by a binary operator with a
2483 // constant. Eliminate the binop by pulling the constant math into the
2484 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2485 // CBO, CF + CBO
2486 NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2487 : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2488 if (!CanFoldNonConst && !NewCT.isUndef() &&
2489 !isConstantOrConstantVector(NewCT, true) &&
2491 return SDValue();
2492
2493 NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2494 : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2495 if (!CanFoldNonConst && !NewCF.isUndef() &&
2496 !isConstantOrConstantVector(NewCF, true) &&
2498 return SDValue();
2499 }
2500
2501 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2502 SelectOp->setFlags(BO->getFlags());
2503 return SelectOp;
2504}
2505
2507 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2508 "Expecting add or sub");
2509
2510 // Match a constant operand and a zext operand for the math instruction:
2511 // add Z, C
2512 // sub C, Z
2513 bool IsAdd = N->getOpcode() == ISD::ADD;
2514 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2515 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2516 auto *CN = dyn_cast<ConstantSDNode>(C);
2517 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2518 return SDValue();
2519
2520 // Match the zext operand as a setcc of a boolean.
2521 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2522 Z.getOperand(0).getValueType() != MVT::i1)
2523 return SDValue();
2524
2525 // Match the compare as: setcc (X & 1), 0, eq.
2526 SDValue SetCC = Z.getOperand(0);
2527 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2528 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2529 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2530 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2531 return SDValue();
2532
2533 // We are adding/subtracting a constant and an inverted low bit. Turn that
2534 // into a subtract/add of the low bit with incremented/decremented constant:
2535 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2536 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2537 EVT VT = C.getValueType();
2538 SDLoc DL(N);
2539 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2540 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2541 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2542 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2543}
2544
2545/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2546/// a shift and add with a different constant.
2548 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2549 "Expecting add or sub");
2550
2551 // We need a constant operand for the add/sub, and the other operand is a
2552 // logical shift right: add (srl), C or sub C, (srl).
2553 bool IsAdd = N->getOpcode() == ISD::ADD;
2554 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2555 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2556 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2557 ShiftOp.getOpcode() != ISD::SRL)
2558 return SDValue();
2559
2560 // The shift must be of a 'not' value.
2561 SDValue Not = ShiftOp.getOperand(0);
2562 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2563 return SDValue();
2564
2565 // The shift must be moving the sign bit to the least-significant-bit.
2566 EVT VT = ShiftOp.getValueType();
2567 SDValue ShAmt = ShiftOp.getOperand(1);
2568 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2569 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2570 return SDValue();
2571
2572 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2573 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2574 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2575 SDLoc DL(N);
2576 if (SDValue NewC = DAG.FoldConstantArithmetic(
2577 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2578 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2579 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2580 Not.getOperand(0), ShAmt);
2581 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2582 }
2583
2584 return SDValue();
2585}
2586
2587static bool isADDLike(SDValue V, const SelectionDAG &DAG) {
2588 unsigned Opcode = V.getOpcode();
2589 if (Opcode == ISD::OR)
2590 return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1));
2591 if (Opcode == ISD::XOR)
2592 return isMinSignedConstant(V.getOperand(1));
2593 return false;
2594}
2595
2596static bool
2598 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2599 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2600}
2601
2602/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2603/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2604/// are no common bits set in the operands).
2605SDValue DAGCombiner::visitADDLike(SDNode *N) {
2606 SDValue N0 = N->getOperand(0);
2607 SDValue N1 = N->getOperand(1);
2608 EVT VT = N0.getValueType();
2609 SDLoc DL(N);
2610
2611 // fold (add x, undef) -> undef
2612 if (N0.isUndef())
2613 return N0;
2614 if (N1.isUndef())
2615 return N1;
2616
2617 // fold (add c1, c2) -> c1+c2
2618 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2619 return C;
2620
2621 // canonicalize constant to RHS
2624 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2625
2626 if (areBitwiseNotOfEachother(N0, N1))
2628 SDLoc(N), VT);
2629
2630 // fold vector ops
2631 if (VT.isVector()) {
2632 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2633 return FoldedVOp;
2634
2635 // fold (add x, 0) -> x, vector edition
2637 return N0;
2638 }
2639
2640 // fold (add x, 0) -> x
2641 if (isNullConstant(N1))
2642 return N0;
2643
2644 if (N0.getOpcode() == ISD::SUB) {
2645 SDValue N00 = N0.getOperand(0);
2646 SDValue N01 = N0.getOperand(1);
2647
2648 // fold ((A-c1)+c2) -> (A+(c2-c1))
2649 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2650 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2651
2652 // fold ((c1-A)+c2) -> (c1+c2)-A
2653 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2654 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2655 }
2656
2657 // add (sext i1 X), 1 -> zext (not i1 X)
2658 // We don't transform this pattern:
2659 // add (zext i1 X), -1 -> sext (not i1 X)
2660 // because most (?) targets generate better code for the zext form.
2661 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2662 isOneOrOneSplat(N1)) {
2663 SDValue X = N0.getOperand(0);
2664 if ((!LegalOperations ||
2665 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2667 X.getScalarValueSizeInBits() == 1) {
2668 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2669 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2670 }
2671 }
2672
2673 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2674 // iff (or x, c0) is equivalent to (add x, c0).
2675 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2676 // iff (xor x, c0) is equivalent to (add x, c0).
2677 if (isADDLike(N0, DAG)) {
2678 SDValue N01 = N0.getOperand(1);
2679 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2680 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2681 }
2682
2683 if (SDValue NewSel = foldBinOpIntoSelect(N))
2684 return NewSel;
2685
2686 // reassociate add
2687 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2688 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2689 return RADD;
2690
2691 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2692 // equivalent to (add x, c).
2693 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2694 // equivalent to (add x, c).
2695 // Do this optimization only when adding c does not introduce instructions
2696 // for adding carries.
2697 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2698 if (isADDLike(N0, DAG) && N0.hasOneUse() &&
2699 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2700 // If N0's type does not split or is a sign mask, it does not introduce
2701 // add carry.
2702 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2703 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2706 if (NoAddCarry)
2707 return DAG.getNode(
2708 ISD::ADD, DL, VT,
2709 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2710 N0.getOperand(1));
2711 }
2712 return SDValue();
2713 };
2714 if (SDValue Add = ReassociateAddOr(N0, N1))
2715 return Add;
2716 if (SDValue Add = ReassociateAddOr(N1, N0))
2717 return Add;
2718
2719 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2720 if (SDValue SD =
2721 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2722 return SD;
2723 }
2724 // fold ((0-A) + B) -> B-A
2725 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2726 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2727
2728 // fold (A + (0-B)) -> A-B
2729 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2730 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2731
2732 // fold (A+(B-A)) -> B
2733 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2734 return N1.getOperand(0);
2735
2736 // fold ((B-A)+A) -> B
2737 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2738 return N0.getOperand(0);
2739
2740 // fold ((A-B)+(C-A)) -> (C-B)
2741 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2742 N0.getOperand(0) == N1.getOperand(1))
2743 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2744 N0.getOperand(1));
2745
2746 // fold ((A-B)+(B-C)) -> (A-C)
2747 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2748 N0.getOperand(1) == N1.getOperand(0))
2749 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2750 N1.getOperand(1));
2751
2752 // fold (A+(B-(A+C))) to (B-C)
2753 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2754 N0 == N1.getOperand(1).getOperand(0))
2755 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2756 N1.getOperand(1).getOperand(1));
2757
2758 // fold (A+(B-(C+A))) to (B-C)
2759 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2760 N0 == N1.getOperand(1).getOperand(1))
2761 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2762 N1.getOperand(1).getOperand(0));
2763
2764 // fold (A+((B-A)+or-C)) to (B+or-C)
2765 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2766 N1.getOperand(0).getOpcode() == ISD::SUB &&
2767 N0 == N1.getOperand(0).getOperand(1))
2768 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2769 N1.getOperand(1));
2770
2771 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2772 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2773 N0->hasOneUse() && N1->hasOneUse()) {
2774 SDValue N00 = N0.getOperand(0);
2775 SDValue N01 = N0.getOperand(1);
2776 SDValue N10 = N1.getOperand(0);
2777 SDValue N11 = N1.getOperand(1);
2778
2780 return DAG.getNode(ISD::SUB, DL, VT,
2781 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2782 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2783 }
2784
2785 // fold (add (umax X, C), -C) --> (usubsat X, C)
2786 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2787 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2788 return (!Max && !Op) ||
2789 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2790 };
2791 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2792 /*AllowUndefs*/ true))
2793 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2794 N0.getOperand(1));
2795 }
2796
2798 return SDValue(N, 0);
2799
2800 if (isOneOrOneSplat(N1)) {
2801 // fold (add (xor a, -1), 1) -> (sub 0, a)
2802 if (isBitwiseNot(N0))
2803 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2804 N0.getOperand(0));
2805
2806 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2807 if (N0.getOpcode() == ISD::ADD) {
2808 SDValue A, Xor;
2809
2810 if (isBitwiseNot(N0.getOperand(0))) {
2811 A = N0.getOperand(1);
2812 Xor = N0.getOperand(0);
2813 } else if (isBitwiseNot(N0.getOperand(1))) {
2814 A = N0.getOperand(0);
2815 Xor = N0.getOperand(1);
2816 }
2817
2818 if (Xor)
2819 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2820 }
2821
2822 // Look for:
2823 // add (add x, y), 1
2824 // And if the target does not like this form then turn into:
2825 // sub y, (xor x, -1)
2826 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2827 N0.hasOneUse()) {
2828 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2829 DAG.getAllOnesConstant(DL, VT));
2830 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2831 }
2832 }
2833
2834 // (x - y) + -1 -> add (xor y, -1), x
2835 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2837 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2838 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2839 }
2840
2841 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2842 return Combined;
2843
2844 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2845 return Combined;
2846
2847 return SDValue();
2848}
2849
2850SDValue DAGCombiner::visitADD(SDNode *N) {
2851 SDValue N0 = N->getOperand(0);
2852 SDValue N1 = N->getOperand(1);
2853 EVT VT = N0.getValueType();
2854 SDLoc DL(N);
2855
2856 if (SDValue Combined = visitADDLike(N))
2857 return Combined;
2858
2859 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2860 return V;
2861
2862 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2863 return V;
2864
2865 // fold (a+b) -> (a|b) iff a and b share no bits.
2866 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2867 DAG.haveNoCommonBitsSet(N0, N1))
2868 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2869
2870 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2871 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2872 const APInt &C0 = N0->getConstantOperandAPInt(0);
2873 const APInt &C1 = N1->getConstantOperandAPInt(0);
2874 return DAG.getVScale(DL, VT, C0 + C1);
2875 }
2876
2877 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2878 if (N0.getOpcode() == ISD::ADD &&
2879 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
2880 N1.getOpcode() == ISD::VSCALE) {
2881 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2882 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2883 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2884 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2885 }
2886
2887 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2888 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2889 N1.getOpcode() == ISD::STEP_VECTOR) {
2890 const APInt &C0 = N0->getConstantOperandAPInt(0);
2891 const APInt &C1 = N1->getConstantOperandAPInt(0);
2892 APInt NewStep = C0 + C1;
2893 return DAG.getStepVector(DL, VT, NewStep);
2894 }
2895
2896 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2897 if (N0.getOpcode() == ISD::ADD &&
2899 N1.getOpcode() == ISD::STEP_VECTOR) {
2900 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2901 const APInt &SV1 = N1->getConstantOperandAPInt(0);
2902 APInt NewStep = SV0 + SV1;
2903 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2904 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2905 }
2906
2907 return SDValue();
2908}
2909
2910SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2911 unsigned Opcode = N->getOpcode();
2912 SDValue N0 = N->getOperand(0);
2913 SDValue N1 = N->getOperand(1);
2914 EVT VT = N0.getValueType();
2915 SDLoc DL(N);
2916
2917 // fold (add_sat x, undef) -> -1
2918 if (N0.isUndef() || N1.isUndef())
2919 return DAG.getAllOnesConstant(DL, VT);
2920
2921 // fold (add_sat c1, c2) -> c3
2922 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2923 return C;
2924
2925 // canonicalize constant to RHS
2928 return DAG.getNode(Opcode, DL, VT, N1, N0);
2929
2930 // fold vector ops
2931 if (VT.isVector()) {
2932 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2933 return FoldedVOp;
2934
2935 // fold (add_sat x, 0) -> x, vector edition
2937 return N0;
2938 }
2939
2940 // fold (add_sat x, 0) -> x
2941 if (isNullConstant(N1))
2942 return N0;
2943
2944 // If it cannot overflow, transform into an add.
2945 if (Opcode == ISD::UADDSAT)
2947 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2948
2949 return SDValue();
2950}
2951
2953 bool Masked = false;
2954
2955 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2956 while (true) {
2957 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2958 V = V.getOperand(0);
2959 continue;
2960 }
2961
2962 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2963 Masked = true;
2964 V = V.getOperand(0);
2965 continue;
2966 }
2967
2968 break;
2969 }
2970
2971 // If this is not a carry, return.
2972 if (V.getResNo() != 1)
2973 return SDValue();
2974
2975 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2976 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2977 return SDValue();
2978
2979 EVT VT = V->getValueType(0);
2980 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2981 return SDValue();
2982
2983 // If the result is masked, then no matter what kind of bool it is we can
2984 // return. If it isn't, then we need to make sure the bool type is either 0 or
2985 // 1 and not other values.
2986 if (Masked ||
2987 TLI.getBooleanContents(V.getValueType()) ==
2989 return V;
2990
2991 return SDValue();
2992}
2993
2994/// Given the operands of an add/sub operation, see if the 2nd operand is a
2995/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2996/// the opcode and bypass the mask operation.
2997static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2998 SelectionDAG &DAG, const SDLoc &DL) {
2999 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3000 N1 = N1.getOperand(0);
3001
3002 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3003 return SDValue();
3004
3005 EVT VT = N0.getValueType();
3006 SDValue N10 = N1.getOperand(0);
3007 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3008 N10 = N10.getOperand(0);
3009
3010 if (N10.getValueType() != VT)
3011 return SDValue();
3012
3013 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3014 return SDValue();
3015
3016 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3017 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3018 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3019}
3020
3021/// Helper for doing combines based on N0 and N1 being added to each other.
3022SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3023 SDNode *LocReference) {
3024 EVT VT = N0.getValueType();
3025 SDLoc DL(LocReference);
3026
3027 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3028 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
3030 return DAG.getNode(ISD::SUB, DL, VT, N0,
3031 DAG.getNode(ISD::SHL, DL, VT,
3032 N1.getOperand(0).getOperand(1),
3033 N1.getOperand(1)));
3034
3035 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3036 return V;
3037
3038 // Look for:
3039 // add (add x, 1), y
3040 // And if the target does not like this form then turn into:
3041 // sub y, (xor x, -1)
3042 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3043 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) {
3044 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
3045 DAG.getAllOnesConstant(DL, VT));
3046 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3047 }
3048
3049 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3050 // Hoist one-use subtraction by non-opaque constant:
3051 // (x - C) + y -> (x + y) - C
3052 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3053 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3054 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3055 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3056 }
3057 // Hoist one-use subtraction from non-opaque constant:
3058 // (C - x) + y -> (y - x) + C
3059 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3060 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3061 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3062 }
3063 }
3064
3065 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3066 // rather than 'add 0/-1' (the zext should get folded).
3067 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3068 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3069 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3071 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3072 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3073 }
3074
3075 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3076 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3077 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3078 if (TN->getVT() == MVT::i1) {
3079 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3080 DAG.getConstant(1, DL, VT));
3081 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3082 }
3083 }
3084
3085 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
3086 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
3087 N1.getResNo() == 0)
3088 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
3089 N0, N1.getOperand(0), N1.getOperand(2));
3090
3091 // (add X, Carry) -> (addcarry X, 0, Carry)
3093 if (SDValue Carry = getAsCarry(TLI, N1))
3094 return DAG.getNode(ISD::ADDCARRY, DL,
3095 DAG.getVTList(VT, Carry.getValueType()), N0,
3096 DAG.getConstant(0, DL, VT), Carry);
3097
3098 return SDValue();
3099}
3100
3101SDValue DAGCombiner::visitADDC(SDNode *N) {
3102 SDValue N0 = N->getOperand(0);
3103 SDValue N1 = N->getOperand(1);
3104 EVT VT = N0.getValueType();
3105 SDLoc DL(N);
3106
3107 // If the flag result is dead, turn this into an ADD.
3108 if (!N->hasAnyUseOfValue(1))
3109 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3111
3112 // canonicalize constant to RHS.
3113 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3114 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3115 if (N0C && !N1C)
3116 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3117
3118 // fold (addc x, 0) -> x + no carry out
3119 if (isNullConstant(N1))
3120 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3121 DL, MVT::Glue));
3122
3123 // If it cannot overflow, transform into an add.
3125 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3127
3128 return SDValue();
3129}
3130
3131/**
3132 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3133 * then the flip also occurs if computing the inverse is the same cost.
3134 * This function returns an empty SDValue in case it cannot flip the boolean
3135 * without increasing the cost of the computation. If you want to flip a boolean
3136 * no matter what, use DAG.getLogicalNOT.
3137 */
3139 const TargetLowering &TLI,
3140 bool Force) {
3141 if (Force && isa<ConstantSDNode>(V))
3142 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3143
3144 if (V.getOpcode() != ISD::XOR)
3145 return SDValue();
3146
3147 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
3148 if (!Const)
3149 return SDValue();
3150
3151 EVT VT = V.getValueType();
3152
3153 bool IsFlip = false;
3154 switch(TLI.getBooleanContents(VT)) {
3156 IsFlip = Const->isOne();
3157 break;
3159 IsFlip = Const->isAllOnes();
3160 break;
3162 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
3163 break;
3164 }
3165
3166 if (IsFlip)
3167 return V.getOperand(0);
3168 if (Force)
3169 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3170 return SDValue();
3171}
3172
3173SDValue DAGCombiner::visitADDO(SDNode *N) {
3174 SDValue N0 = N->getOperand(0);
3175 SDValue N1 = N->getOperand(1);
3176 EVT VT = N0.getValueType();
3177 bool IsSigned = (ISD::SADDO == N->getOpcode());
3178
3179 EVT CarryVT = N->getValueType(1);
3180 SDLoc DL(N);
3181
3182 // If the flag result is dead, turn this into an ADD.
3183 if (!N->hasAnyUseOfValue(1))
3184 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3185 DAG.getUNDEF(CarryVT));
3186
3187 // canonicalize constant to RHS.
3190 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3191
3192 // fold (addo x, 0) -> x + no carry out
3193 if (isNullOrNullSplat(N1))
3194 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3195
3196 if (!IsSigned) {
3197 // If it cannot overflow, transform into an add.
3199 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3200 DAG.getConstant(0, DL, CarryVT));
3201
3202 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3203 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3204 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3205 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3206 return CombineTo(
3207 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3208 }
3209
3210 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3211 return Combined;
3212
3213 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3214 return Combined;
3215 }
3216
3217 return SDValue();
3218}
3219
3220SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3221 EVT VT = N0.getValueType();
3222 if (VT.isVector())
3223 return SDValue();
3224
3225 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
3226 // If Y + 1 cannot overflow.
3227 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
3228 SDValue Y = N1.getOperand(0);
3229 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3231 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
3232 N1.getOperand(2));
3233 }
3234
3235 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
3237 if (SDValue Carry = getAsCarry(TLI, N1))
3238 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
3239 DAG.getConstant(0, SDLoc(N), VT), Carry);
3240
3241 return SDValue();
3242}
3243
3244SDValue DAGCombiner::visitADDE(SDNode *N) {
3245 SDValue N0 = N->getOperand(0);
3246 SDValue N1 = N->getOperand(1);
3247 SDValue CarryIn = N->getOperand(2);
3248
3249 // canonicalize constant to RHS
3250 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3251 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3252 if (N0C && !N1C)
3253 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3254 N1, N0, CarryIn);
3255
3256 // fold (adde x, y, false) -> (addc x, y)
3257 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3258 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3259
3260 return SDValue();
3261}
3262
3263SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
3264 SDValue N0 = N->getOperand(0);
3265 SDValue N1 = N->getOperand(1);
3266 SDValue CarryIn = N->getOperand(2);
3267 SDLoc DL(N);
3268
3269 // canonicalize constant to RHS
3270 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3271 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3272 if (N0C && !N1C)
3273 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
3274
3275 // fold (addcarry x, y, false) -> (uaddo x, y)
3276 if (isNullConstant(CarryIn)) {
3277 if (!LegalOperations ||
3278 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3279 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3280 }
3281
3282 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3283 if (isNullConstant(N0) && isNullConstant(N1)) {
3284 EVT VT = N0.getValueType();
3285 EVT CarryVT = CarryIn.getValueType();
3286 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3287 AddToWorklist(CarryExt.getNode());
3288 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3289 DAG.getConstant(1, DL, VT)),
3290 DAG.getConstant(0, DL, CarryVT));
3291 }
3292
3293 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
3294 return Combined;
3295
3296 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
3297 return Combined;
3298
3299 // We want to avoid useless duplication.
3300 // TODO: This is done automatically for binary operations. As ADDCARRY is
3301 // not a binary operation, this is not really possible to leverage this
3302 // existing mechanism for it. However, if more operations require the same
3303 // deduplication logic, then it may be worth generalize.
3304 SDValue Ops[] = {N1, N0, CarryIn};
3305 SDNode *CSENode =
3306 DAG.getNodeIfExists(ISD::ADDCARRY, N->getVTList(), Ops, N->getFlags());
3307 if (CSENode)
3308 return SDValue(CSENode, 0);
3309
3310 return SDValue();
3311}
3312
3313SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3314 SDValue N0 = N->getOperand(0);
3315 SDValue N1 = N->getOperand(1);
3316 SDValue CarryIn = N->getOperand(2);
3317 SDLoc DL(N);
3318
3319 // canonicalize constant to RHS
3320 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3321 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3322 if (N0C && !N1C)
3323 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3324
3325 // fold (saddo_carry x, y, false) -> (saddo x, y)
3326 if (isNullConstant(CarryIn)) {
3327 if (!LegalOperations ||
3328 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3329 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3330 }
3331
3332 return SDValue();
3333}
3334
3335/**
3336 * If we are facing some sort of diamond carry propapagtion pattern try to
3337 * break it up to generate something like:
3338 * (addcarry X, 0, (addcarry A, B, Z):Carry)
3339 *
3340 * The end result is usually an increase in operation required, but because the
3341 * carry is now linearized, other transforms can kick in and optimize the DAG.
3342 *
3343 * Patterns typically look something like
3344 * (uaddo A, B)
3345 * / \
3346 * Carry Sum
3347 * | \
3348 * | (addcarry *, 0, Z)
3349 * | /
3350 * \ Carry
3351 * | /
3352 * (addcarry X, *, *)
3353 *
3354 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3355 * produce a combine with a single path for carry propagation.
3356 */
3358 SDValue X, SDValue Carry0, SDValue Carry1,
3359 SDNode *N) {
3360 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3361 return SDValue();
3362 if (Carry1.getOpcode() != ISD::UADDO)
3363 return SDValue();
3364
3365 SDValue Z;
3366
3367 /**
3368 * First look for a suitable Z. It will present itself in the form of
3369 * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3370 */
3371 if (Carry0.getOpcode() == ISD::ADDCARRY &&
3372 isNullConstant(Carry0.getOperand(1))) {
3373 Z = Carry0.getOperand(2);
3374 } else if (Carry0.getOpcode() == ISD::UADDO &&
3375 isOneConstant(Carry0.getOperand(1))) {
3376 EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3377 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3378 } else {
3379 // We couldn't find a suitable Z.
3380 return SDValue();
3381 }
3382
3383
3384 auto cancelDiamond = [&](SDValue A,SDValue B) {
3385 SDLoc DL(N);
3386 SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3387 Combiner.AddToWorklist(NewY.getNode());
3388 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3389 DAG.getConstant(0, DL, X.getValueType()),
3390 NewY.getValue(1));
3391 };
3392
3393 /**
3394 * (uaddo A, B)
3395 * |
3396 * Sum
3397 * |
3398 * (addcarry *, 0, Z)
3399 */
3400 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3401 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3402 }
3403
3404 /**
3405 * (addcarry A, 0, Z)
3406 * |
3407 * Sum
3408 * |
3409 * (uaddo *, B)
3410 */
3411 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3412 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3413 }
3414
3415 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3416 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3417 }
3418
3419 return SDValue();
3420}
3421
3422// If we are facing some sort of diamond carry/borrow in/out pattern try to
3423// match patterns like:
3424//
3425// (uaddo A, B) CarryIn
3426// | \ |
3427// | \ |
3428// PartialSum PartialCarryOutX /
3429// | | /
3430// | ____|____________/
3431// | / |
3432// (uaddo *, *) \________
3433// | \ \
3434// | \ |
3435// | PartialCarryOutY |
3436// | \ |
3437// | \ /
3438// AddCarrySum | ______/
3439// | /
3440// CarryOut = (or *, *)
3441//
3442// And generate ADDCARRY (or SUBCARRY) with two result values:
3443//
3444// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3445//
3446// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3447// a single path for carry/borrow out propagation:
3449 SDValue N0, SDValue N1, SDNode *N) {
3450 SDValue Carry0 = getAsCarry(TLI, N0);
3451 if (!Carry0)
3452 return SDValue();
3453 SDValue Carry1 = getAsCarry(TLI, N1);
3454 if (!Carry1)
3455 return SDValue();
3456
3457 unsigned Opcode = Carry0.getOpcode();
3458 if (Opcode != Carry1.getOpcode())
3459 return SDValue();
3460 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3461 return SDValue();
3462
3463 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3464 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3465 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3466 std::swap(Carry0, Carry1);
3467
3468 // Check if nodes are connected in expected way.
3469 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3470 Carry1.getOperand(1) != Carry0.getValue(0))
3471 return SDValue();
3472
3473 // The carry in value must be on the righthand side for subtraction.
3474 unsigned CarryInOperandNum =
3475 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3476 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3477 return SDValue();
3478 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3479
3480 unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3481 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3482 return SDValue();
3483
3484 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3485 // TODO: make getAsCarry() aware of how partial carries are merged.
3486 if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3487 return SDValue();
3488 CarryIn = CarryIn.getOperand(0);
3489 if (CarryIn.getValueType() != MVT::i1)
3490 return SDValue();
3491
3492 SDLoc DL(N);
3493 SDValue Merged =
3494 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3495 Carry0.getOperand(1), CarryIn);
3496
3497 // Please note that because we have proven that the result of the UADDO/USUBO
3498 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3499 // therefore prove that if the first UADDO/USUBO overflows, the second
3500 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3501 // maximum value.
3502 //
3503 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3504 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3505 //
3506 // This is important because it means that OR and XOR can be used to merge
3507 // carry flags; and that AND can return a constant zero.
3508 //
3509 // TODO: match other operations that can merge flags (ADD, etc)
3510 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3511 if (N->getOpcode() == ISD::AND)
3512 return DAG.getConstant(0, DL, MVT::i1);
3513 return Merged.getValue(1);
3514}
3515
3516SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3517 SDNode *N) {
3518 // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3519 if (isBitwiseNot(N0))
3520 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3521 SDLoc DL(N);
3522 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3523 N0.getOperand(0), NotC);
3524 return CombineTo(
3525 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3526 }
3527
3528 // Iff the flag result is dead:
3529 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3530 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3531 // or the dependency between the instructions.
3532 if ((N0.getOpcode() == ISD::ADD ||
3533 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3534 N0.getValue(1) != CarryIn)) &&
3535 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3536 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3537 N0.getOperand(0), N0.getOperand(1), CarryIn);
3538
3539 /**
3540 * When one of the addcarry argument is itself a carry, we may be facing
3541 * a diamond carry propagation. In which case we try to transform the DAG
3542 * to ensure linear carry propagation if that is possible.
3543 */
3544 if (auto Y = getAsCarry(TLI, N1)) {
3545 // Because both are carries, Y and Z can be swapped.
3546 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3547 return R;
3548 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3549 return R;
3550 }
3551
3552 return SDValue();
3553}
3554
3555// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3556// clamp/truncation if necessary.
3557static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3558 SDValue RHS, SelectionDAG &DAG,
3559 const SDLoc &DL) {
3560 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3561 "Illegal truncation");
3562
3563 if (DstVT == SrcVT)
3564 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3565
3566 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3567 // clamping RHS.
3569 DstVT.getScalarSizeInBits());
3570 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3571 return SDValue();
3572
3573 SDValue SatLimit =
3575 DstVT.getScalarSizeInBits()),
3576 DL, SrcVT);
3577 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3578 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3579 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3580 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3581}
3582
3583// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3584// usubsat(a,b), optionally as a truncated type.
3585SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3586 if (N->getOpcode() != ISD::SUB ||
3587 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3588 return SDValue();
3589
3590 EVT SubVT = N->getValueType(0);
3591 SDValue Op0 = N->getOperand(0);
3592 SDValue Op1 = N->getOperand(1);
3593
3594 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3595 // they may be converted to usubsat(a,b).
3596 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3597 SDValue MaxLHS = Op0.getOperand(0);
3598 SDValue MaxRHS = Op0.getOperand(1);
3599 if (MaxLHS == Op1)
3600 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3601 if (MaxRHS == Op1)
3602 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3603 }
3604
3605 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3606 SDValue MinLHS = Op1.getOperand(0);
3607 SDValue MinRHS = Op1.getOperand(1);
3608 if (MinLHS == Op0)
3609 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3610 if (MinRHS == Op0)
3611 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3612 }
3613
3614 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3615 if (Op1.getOpcode() == ISD::TRUNCATE &&
3616 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3617 Op1.getOperand(0).hasOneUse()) {
3618 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3619 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3620 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3621 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3622 DAG, SDLoc(N));
3623 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3624 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3625 DAG, SDLoc(N));
3626 }
3627
3628 return SDValue();
3629}
3630
3631// Since it may not be valid to emit a fold to zero for vector initializers
3632// check if we can before folding.
3633static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3634 SelectionDAG &DAG, bool LegalOperations) {
3635 if (!VT.isVector())
3636 return DAG.getConstant(0, DL, VT);
3637 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3638 return DAG.getConstant(0, DL, VT);
3639 return SDValue();
3640}
3641
3642SDValue DAGCombiner::visitSUB(SDNode *N) {
3643 SDValue N0 = N->getOperand(0);
3644 SDValue N1 = N->getOperand(1);
3645 EVT VT = N0.getValueType();
3646 SDLoc DL(N);
3647
3648 auto PeekThroughFreeze = [](SDValue N) {
3649 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3650 return N->getOperand(0);
3651 return N;
3652 };
3653
3654 // fold (sub x, x) -> 0
3655 // FIXME: Refactor this and xor and other similar operations together.
3656 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3657 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3658
3659 // fold (sub c1, c2) -> c3
3660 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3661 return C;
3662
3663 // fold vector ops
3664 if (VT.isVector()) {
3665 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3666 return FoldedVOp;
3667
3668 // fold (sub x, 0) -> x, vector edition
3670 return N0;
3671 }
3672
3673 if (SDValue NewSel = foldBinOpIntoSelect(N))
3674 return NewSel;
3675
3677
3678 // fold (sub x, c) -> (add x, -c)
3679 if (N1C) {
3680 return DAG.getNode(ISD::ADD, DL, VT, N0,
3681 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3682 }
3683
3684 if (isNullOrNullSplat(N0)) {
3685 unsigned BitWidth = VT.getScalarSizeInBits();
3686 // Right-shifting everything out but the sign bit followed by negation is
3687 // the same as flipping arithmetic/logical shift type without the negation:
3688 // -(X >>u 31) -> (X >>s 31)
3689 // -(X >>s 31) -> (X >>u 31)
3690 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3692 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3693 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3694 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3695 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3696 }
3697 }
3698
3699 // 0 - X --> 0 if the sub is NUW.
3700 if (N->getFlags().hasNoUnsignedWrap())
3701 return N0;
3702
3704 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3705 // N1 must be 0 because negating the minimum signed value is undefined.
3706 if (N->getFlags().hasNoSignedWrap())
3707 return N0;
3708
3709 // 0 - X --> X if X is 0 or the minimum signed value.
3710 return N1;
3711 }
3712
3713 // Convert 0 - abs(x).
3714 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3716 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3717 return Result;
3718
3719 // Fold neg(splat(neg(x)) -> splat(x)
3720 if (VT.isVector()) {
3721 SDValue N1S = DAG.getSplatValue(N1, true);
3722 if (N1S && N1S.getOpcode() == ISD::SUB &&
3723 isNullConstant(N1S.getOperand(0)))
3724 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3725 }
3726 }
3727
3728 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3730 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3731
3732 // fold (A - (0-B)) -> A+B
3733 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3734 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3735
3736 // fold A-(A-B) -> B
3737 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3738 return N1.getOperand(1);
3739
3740 // fold (A+B)-A -> B
3741 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3742 return N0.getOperand(1);
3743
3744 // fold (A+B)-B -> A
3745 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3746 return N0.getOperand(0);
3747
3748 // fold (A+C1)-C2 -> A+(C1-C2)
3749 if (N0.getOpcode() == ISD::ADD) {
3750 SDValue N01 = N0.getOperand(1);
3751 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3752 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3753 }
3754
3755 // fold C2-(A+C1) -> (C2-C1)-A
3756 if (N1.getOpcode() == ISD::ADD) {
3757 SDValue N11 = N1.getOperand(1);
3758 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3759 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3760 }
3761
3762 // fold (A-C1)-C2 -> A-(C1+C2)
3763 if (N0.getOpcode() == ISD::SUB) {
3764 SDValue N01 = N0.getOperand(1);
3765 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3766 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3767 }
3768
3769 // fold (c1-A)-c2 -> (c1-c2)-A
3770 if (N0.getOpcode() == ISD::SUB) {
3771 SDValue N00 = N0.getOperand(0);
3772 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3773 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3774 }
3775
3776 // fold ((A+(B+or-C))-B) -> A+or-C
3777 if (N0.getOpcode() == ISD::ADD &&
3778 (N0.getOperand(1).getOpcode() == ISD::SUB ||
3779 N0.getOperand(1).getOpcode() == ISD::ADD) &&
3780 N0.getOperand(1).getOperand(0) == N1)
3781 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3782 N0.getOperand(1).getOperand(1));
3783
3784 // fold ((A+(C+B))-B) -> A+C
3785 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3786 N0.getOperand(1).getOperand(1) == N1)
3787 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3788 N0.getOperand(1).getOperand(0));
3789
3790 // fold ((A-(B-C))-C) -> A-B
3791 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3792 N0.getOperand(1).getOperand(1) == N1)
3793 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3794 N0.getOperand(1).getOperand(0));
3795
3796 // fold (A-(B-C)) -> A+(C-B)
3797 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3798 return DAG.getNode(ISD::ADD, DL, VT, N0,
3799 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3800 N1.getOperand(0)));
3801
3802 // A - (A & B) -> A & (~B)
3803 if (N1.getOpcode() == ISD::AND) {
3804 SDValue A = N1.getOperand(0);
3805 SDValue B = N1.getOperand(1);
3806 if (A != N0)
3807 std::swap(A, B);
3808 if (A == N0 &&
3809 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3810 SDValue InvB =
3811 DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3812 return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3813 }
3814 }
3815
3816 // fold (X - (-Y * Z)) -> (X + (Y * Z))
3817 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3818 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3820 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3821 N1.getOperand(0).getOperand(1),
3822 N1.getOperand(1));
3823 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3824 }
3825 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3827 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3828 N1.getOperand(0),
3829 N1.getOperand(1).getOperand(1));
3830 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3831 }
3832 }
3833
3834 // If either operand of a sub is undef, the result is undef
3835 if (N0.isUndef())
3836 return N0;
3837 if (N1.isUndef())
3838 return N1;
3839
3840 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3841 return V;
3842
3843 if (SDValue V = foldAddSubOfSignBit(N, DAG))
3844 return V;
3845
3846 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3847 return V;
3848
3849 if (SDValue V = foldSubToUSubSat(VT, N))
3850 return V;
3851
3852 // (x - y) - 1 -> add (xor y, -1), x
3853 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) {
3854 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3855 DAG.getAllOnesConstant(DL, VT));
3856 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3857 }
3858
3859 // Look for:
3860 // sub y, (xor x, -1)
3861 // And if the target does not like this form then turn into:
3862 // add (add x, y), 1
3863 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3864 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3865 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3866 }
3867
3868 // Hoist one-use addition by non-opaque constant:
3869 // (x + C) - y -> (x - y) + C
3870 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
3871 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3872 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3873 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3874 }
3875 // y - (x + C) -> (y - x) - C
3876 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
3877 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3878 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3879 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3880 }
3881 // (x - C) - y -> (x - y) - C
3882 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3883 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3884 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3885 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3886 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3887 }
3888 // (C - x) - y -> C - (x + y)
3889 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3890 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3891 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3892 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3893 }
3894
3895 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3896 // rather than 'sub 0/1' (the sext should get folded).
3897 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3898 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3899 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3900 TLI.getBooleanContents(VT) ==
3902 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3903 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3904 }
3905
3906 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3907 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3908 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3909 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3910 SDValue S0 = N1.getOperand(0);
3911 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3913 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3914 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3915 }
3916 }
3917
3918 // If the relocation model supports it, consider symbol offsets.
3919 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3920 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3921 // fold (sub Sym, c) -> Sym-c
3922 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3923 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3924 GA->getOffset() -
3925 (uint64_t)N1C->getSExtValue());
3926 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3927 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3928 if (GA->getGlobal() == GB->getGlobal())
3929 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3930 DL, VT);
3931 }
3932
3933 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3934 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3935 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3936 if (TN->getVT() == MVT::i1) {
3937 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3938 DAG.getConstant(1, DL, VT));
3939 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3940 }
3941 }
3942
3943 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3944 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
3945 const APInt &IntVal = N1.getConstantOperandAPInt(0);
3946 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3947 }
3948
3949 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3950 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3951 APInt NewStep = -N1.getConstantOperandAPInt(0);
3952 return DAG.getNode(ISD::ADD, DL, VT, N0,
3953 DAG.getStepVector(DL, VT, NewStep));
3954 }
3955
3956 // Prefer an add for more folding potential and possibly better codegen:
3957 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3958 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3959 SDValue ShAmt = N1.getOperand(1);
3960 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3961 if (ShAmtC &&
3962 ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3963 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3964 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3965 }
3966 }
3967
3968 // As with the previous fold, prefer add for more folding potential.
3969 // Subtracting SMIN/0 is the same as adding SMIN/0:
3970 // N0 - (X << BW-1) --> N0 + (X << BW-1)
3971 if (N1.getOpcode() == ISD::SHL) {
3973 if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
3974 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
3975 }
3976
3977 // (sub (subcarry X, 0, Carry), Y) -> (subcarry X, Y, Carry)
3978 if (N0.getOpcode() == ISD::SUBCARRY && isNullConstant(N0.getOperand(1)) &&
3979 N0.getResNo() == 0 && N0.hasOneUse())
3980 return DAG.getNode(ISD::SUBCARRY, DL, N0->getVTList(),
3981 N0.getOperand(0), N1, N0.getOperand(2));
3982
3984 // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3985 if (SDValue Carry = getAsCarry(TLI, N0)) {
3986 SDValue X = N1;
3987 SDValue Zero = DAG.getConstant(0, DL, VT);
3988 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3989 return DAG.getNode(ISD::ADDCARRY, DL,
3990 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3991 Carry);
3992 }
3993 }
3994
3995 // If there's no chance of borrowing from adjacent bits, then sub is xor:
3996 // sub C0, X --> xor X, C0
3997 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
3998 if (!C0->isOpaque()) {
3999 const APInt &C0Val = C0->getAPIntValue();
4000 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4001 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4002 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4003 }
4004 }
4005
4006 // max(a,b) - min(a,b) --> abd(a,b)
4007 auto MatchSubMaxMin = [&](unsigned Max, unsigned Min, unsigned Abd) {
4008 if (N0.getOpcode() != Max || N1.getOpcode() != Min)
4009 return SDValue();
4010 if ((N0.getOperand(0) != N1.getOperand(0) ||
4011 N0.getOperand(1) != N1.getOperand(1)) &&
4012 (N0.getOperand(0) != N1.getOperand(1) ||
4013 N0.getOperand(1) != N1.getOperand(0)))
4014 return SDValue();
4015 if (!hasOperation(Abd, VT))
4016 return SDValue();
4017 return DAG.getNode(Abd, DL, VT, N0.getOperand(0), N0.getOperand(1));
4018 };
4019 if (SDValue R = MatchSubMaxMin(ISD::SMAX, ISD::SMIN, ISD::ABDS))
4020 return R;
4021 if (SDValue R = MatchSubMaxMin(ISD::UMAX, ISD::UMIN, ISD::ABDU))
4022 return R;
4023
4024 return SDValue();
4025}
4026
4027SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4028 SDValue N0 = N->getOperand(0);
4029 SDValue N1 = N->getOperand(1);
4030 EVT VT = N0.getValueType();
4031 SDLoc DL(N);
4032
4033 // fold (sub_sat x, undef) -> 0
4034 if (N0.isUndef() || N1.isUndef())
4035 return DAG.getConstant(0, DL, VT);
4036
4037 // fold (sub_sat x, x) -> 0
4038 if (N0 == N1)
4039 return DAG.getConstant(0, DL, VT);
4040
4041 // fold (sub_sat c1, c2) -> c3
4042 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
4043 return C;
4044
4045 // fold vector ops
4046 if (VT.isVector()) {
4047 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4048 return FoldedVOp;
4049
4050 // fold (sub_sat x, 0) -> x, vector edition
4052 return N0;
4053 }
4054
4055 // fold (sub_sat x, 0) -> x
4056 if (isNullConstant(N1))
4057 return N0;
4058
4059 return SDValue();
4060}
4061
4062SDValue DAGCombiner::visitSUBC(SDNode *N) {
4063 SDValue N0 = N->getOperand(0);
4064 SDValue N1 = N->getOperand(1);
4065 EVT VT = N0.getValueType();
4066 SDLoc DL(N);
4067
4068 // If the flag result is dead, turn this into an SUB.
4069 if (!N->hasAnyUseOfValue(1))
4070 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4072
4073 // fold (subc x, x) -> 0 + no borrow
4074 if (N0 == N1)
4075 return CombineTo(N, DAG.getConstant(0, DL, VT),
4077
4078 // fold (subc x, 0) -> x + no borrow
4079 if (isNullConstant(N1))
4080 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4081
4082 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4083 if (isAllOnesConstant(N0))
4084 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4086
4087 return SDValue();
4088}
4089
4090SDValue DAGCombiner::visitSUBO(SDNode *N) {
4091 SDValue N0 = N->getOperand(0);
4092 SDValue N1 = N->getOperand(1);
4093 EVT VT = N0.getValueType();
4094 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4095
4096 EVT CarryVT = N->getValueType(1);
4097 SDLoc DL(N);
4098
4099 // If the flag result is dead, turn this into an SUB.
4100 if (!N->hasAnyUseOfValue(1))
4101 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4102 DAG.getUNDEF(CarryVT));
4103
4104 // fold (subo x, x) -> 0 + no borrow
4105 if (N0 == N1)
4106 return CombineTo(N, DAG.getConstant(0, DL, VT),
4107 DAG.getConstant(0, DL, CarryVT));
4108
4110
4111 // fold (subox, c) -> (addo x, -c)
4112 if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
4113 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4114 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4115 }
4116
4117 // fold (subo x, 0) -> x + no borrow
4118 if (isNullOrNullSplat(N1))
4119 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4120
4121 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4122 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4123 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4124 DAG.getConstant(0, DL, CarryVT));
4125
4126 return SDValue();
4127}
4128
4129SDValue DAGCombiner::visitSUBE(SDNode *N) {
4130 SDValue N0 = N->getOperand(0);
4131 SDValue N1 = N->getOperand(1);
4132 SDValue CarryIn = N->getOperand(2);
4133
4134 // fold (sube x, y, false) -> (subc x, y)
4135 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4136 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4137
4138 return SDValue();
4139}
4140
4141SDValue DAGCombiner