LLVM 19.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
142
144 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
145 cl::desc("DAG combiner enable load/<replace bytes>/store with "
146 "a narrower store"));
147
149 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
150 cl::desc(
151 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
152
153namespace {
154
155 class DAGCombiner {
156 SelectionDAG &DAG;
157 const TargetLowering &TLI;
158 const SelectionDAGTargetInfo *STI;
160 CodeGenOptLevel OptLevel;
161 bool LegalDAG = false;
162 bool LegalOperations = false;
163 bool LegalTypes = false;
164 bool ForCodeSize;
165 bool DisableGenericCombines;
166
167 /// Worklist of all of the nodes that need to be simplified.
168 ///
169 /// This must behave as a stack -- new nodes to process are pushed onto the
170 /// back and when processing we pop off of the back.
171 ///
172 /// The worklist will not contain duplicates but may contain null entries
173 /// due to nodes being deleted from the underlying DAG.
175
176 /// Mapping from an SDNode to its position on the worklist.
177 ///
178 /// This is used to find and remove nodes from the worklist (by nulling
179 /// them) when they are deleted from the underlying DAG. It relies on
180 /// stable indices of nodes within the worklist.
182
183 /// This records all nodes attempted to be added to the worklist since we
184 /// considered a new worklist entry. As we keep do not add duplicate nodes
185 /// in the worklist, this is different from the tail of the worklist.
187
188 /// Set of nodes which have been combined (at least once).
189 ///
190 /// This is used to allow us to reliably add any operands of a DAG node
191 /// which have not yet been combined to the worklist.
192 SmallPtrSet<SDNode *, 32> CombinedNodes;
193
194 /// Map from candidate StoreNode to the pair of RootNode and count.
195 /// The count is used to track how many times we have seen the StoreNode
196 /// with the same RootNode bail out in dependence check. If we have seen
197 /// the bail out for the same pair many times over a limit, we won't
198 /// consider the StoreNode with the same RootNode as store merging
199 /// candidate again.
201
202 // AA - Used for DAG load/store alias analysis.
203 AliasAnalysis *AA;
204
205 /// When an instruction is simplified, add all users of the instruction to
206 /// the work lists because they might get more simplified now.
207 void AddUsersToWorklist(SDNode *N) {
208 for (SDNode *Node : N->uses())
209 AddToWorklist(Node);
210 }
211
212 /// Convenient shorthand to add a node and all of its user to the worklist.
213 void AddToWorklistWithUsers(SDNode *N) {
214 AddUsersToWorklist(N);
215 AddToWorklist(N);
216 }
217
218 // Prune potentially dangling nodes. This is called after
219 // any visit to a node, but should also be called during a visit after any
220 // failed combine which may have created a DAG node.
221 void clearAddedDanglingWorklistEntries() {
222 // Check any nodes added to the worklist to see if they are prunable.
223 while (!PruningList.empty()) {
224 auto *N = PruningList.pop_back_val();
225 if (N->use_empty())
226 recursivelyDeleteUnusedNodes(N);
227 }
228 }
229
230 SDNode *getNextWorklistEntry() {
231 // Before we do any work, remove nodes that are not in use.
232 clearAddedDanglingWorklistEntries();
233 SDNode *N = nullptr;
234 // The Worklist holds the SDNodes in order, but it may contain null
235 // entries.
236 while (!N && !Worklist.empty()) {
237 N = Worklist.pop_back_val();
238 }
239
240 if (N) {
241 bool GoodWorklistEntry = WorklistMap.erase(N);
242 (void)GoodWorklistEntry;
243 assert(GoodWorklistEntry &&
244 "Found a worklist entry without a corresponding map entry!");
245 }
246 return N;
247 }
248
249 /// Call the node-specific routine that folds each particular type of node.
250 SDValue visit(SDNode *N);
251
252 public:
253 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
254 : DAG(D), TLI(D.getTargetLoweringInfo()),
255 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
256 ForCodeSize = DAG.shouldOptForSize();
257 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
258
259 MaximumLegalStoreInBits = 0;
260 // We use the minimum store size here, since that's all we can guarantee
261 // for the scalable vector types.
262 for (MVT VT : MVT::all_valuetypes())
263 if (EVT(VT).isSimple() && VT != MVT::Other &&
264 TLI.isTypeLegal(EVT(VT)) &&
265 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
266 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
267 }
268
269 void ConsiderForPruning(SDNode *N) {
270 // Mark this for potential pruning.
271 PruningList.insert(N);
272 }
273
274 /// Add to the worklist making sure its instance is at the back (next to be
275 /// processed.)
276 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true) {
277 assert(N->getOpcode() != ISD::DELETED_NODE &&
278 "Deleted Node added to Worklist");
279
280 // Skip handle nodes as they can't usefully be combined and confuse the
281 // zero-use deletion strategy.
282 if (N->getOpcode() == ISD::HANDLENODE)
283 return;
284
285 if (IsCandidateForPruning)
286 ConsiderForPruning(N);
287
288 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
289 Worklist.push_back(N);
290 }
291
292 /// Remove all instances of N from the worklist.
293 void removeFromWorklist(SDNode *N) {
294 CombinedNodes.erase(N);
295 PruningList.remove(N);
296 StoreRootCountMap.erase(N);
297
298 auto It = WorklistMap.find(N);
299 if (It == WorklistMap.end())
300 return; // Not in the worklist.
301
302 // Null out the entry rather than erasing it to avoid a linear operation.
303 Worklist[It->second] = nullptr;
304 WorklistMap.erase(It);
305 }
306
307 void deleteAndRecombine(SDNode *N);
308 bool recursivelyDeleteUnusedNodes(SDNode *N);
309
310 /// Replaces all uses of the results of one DAG node with new values.
311 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
312 bool AddTo = true);
313
314 /// Replaces all uses of the results of one DAG node with new values.
315 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
316 return CombineTo(N, &Res, 1, AddTo);
317 }
318
319 /// Replaces all uses of the results of one DAG node with new values.
320 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
321 bool AddTo = true) {
322 SDValue To[] = { Res0, Res1 };
323 return CombineTo(N, To, 2, AddTo);
324 }
325
326 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
327
328 private:
329 unsigned MaximumLegalStoreInBits;
330
331 /// Check the specified integer node value to see if it can be simplified or
332 /// if things it uses can be simplified by bit propagation.
333 /// If so, return true.
334 bool SimplifyDemandedBits(SDValue Op) {
335 unsigned BitWidth = Op.getScalarValueSizeInBits();
337 return SimplifyDemandedBits(Op, DemandedBits);
338 }
339
340 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
341 EVT VT = Op.getValueType();
342 APInt DemandedElts = VT.isFixedLengthVector()
344 : APInt(1, 1);
345 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
346 }
347
348 /// Check the specified vector node value to see if it can be simplified or
349 /// if things it uses can be simplified as it only uses some of the
350 /// elements. If so, return true.
351 bool SimplifyDemandedVectorElts(SDValue Op) {
352 // TODO: For now just pretend it cannot be simplified.
353 if (Op.getValueType().isScalableVector())
354 return false;
355
356 unsigned NumElts = Op.getValueType().getVectorNumElements();
357 APInt DemandedElts = APInt::getAllOnes(NumElts);
358 return SimplifyDemandedVectorElts(Op, DemandedElts);
359 }
360
361 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
362 const APInt &DemandedElts,
363 bool AssumeSingleUse = false);
364 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
365 bool AssumeSingleUse = false);
366
367 bool CombineToPreIndexedLoadStore(SDNode *N);
368 bool CombineToPostIndexedLoadStore(SDNode *N);
369 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
370 bool SliceUpLoad(SDNode *N);
371
372 // Looks up the chain to find a unique (unaliased) store feeding the passed
373 // load. If no such store is found, returns a nullptr.
374 // Note: This will look past a CALLSEQ_START if the load is chained to it so
375 // so that it can find stack stores for byval params.
376 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
377 // Scalars have size 0 to distinguish from singleton vectors.
378 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
379 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
380 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
381
382 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
383 /// load.
384 ///
385 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
386 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
387 /// \param EltNo index of the vector element to load.
388 /// \param OriginalLoad load that EVE came from to be replaced.
389 /// \returns EVE on success SDValue() on failure.
390 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
391 SDValue EltNo,
392 LoadSDNode *OriginalLoad);
393 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
394 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
395 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
396 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
397 SDValue PromoteIntBinOp(SDValue Op);
398 SDValue PromoteIntShiftOp(SDValue Op);
399 SDValue PromoteExtend(SDValue Op);
400 bool PromoteLoad(SDValue Op);
401
402 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
403 SDValue RHS, SDValue True, SDValue False,
405
406 /// Call the node-specific routine that knows how to fold each
407 /// particular type of node. If that doesn't do anything, try the
408 /// target-specific DAG combines.
409 SDValue combine(SDNode *N);
410
411 // Visitation implementation - Implement dag node combining for different
412 // node types. The semantics are as follows:
413 // Return Value:
414 // SDValue.getNode() == 0 - No change was made
415 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
416 // otherwise - N should be replaced by the returned Operand.
417 //
418 SDValue visitTokenFactor(SDNode *N);
419 SDValue visitMERGE_VALUES(SDNode *N);
420 SDValue visitADD(SDNode *N);
421 SDValue visitADDLike(SDNode *N);
422 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
423 SDValue visitSUB(SDNode *N);
424 SDValue visitADDSAT(SDNode *N);
425 SDValue visitSUBSAT(SDNode *N);
426 SDValue visitADDC(SDNode *N);
427 SDValue visitADDO(SDNode *N);
428 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
429 SDValue visitSUBC(SDNode *N);
430 SDValue visitSUBO(SDNode *N);
431 SDValue visitADDE(SDNode *N);
432 SDValue visitUADDO_CARRY(SDNode *N);
433 SDValue visitSADDO_CARRY(SDNode *N);
434 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
435 SDNode *N);
436 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
437 SDNode *N);
438 SDValue visitSUBE(SDNode *N);
439 SDValue visitUSUBO_CARRY(SDNode *N);
440 SDValue visitSSUBO_CARRY(SDNode *N);
441 SDValue visitMUL(SDNode *N);
442 SDValue visitMULFIX(SDNode *N);
443 SDValue useDivRem(SDNode *N);
444 SDValue visitSDIV(SDNode *N);
445 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
446 SDValue visitUDIV(SDNode *N);
447 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
448 SDValue visitREM(SDNode *N);
449 SDValue visitMULHU(SDNode *N);
450 SDValue visitMULHS(SDNode *N);
451 SDValue visitAVG(SDNode *N);
452 SDValue visitABD(SDNode *N);
453 SDValue visitSMUL_LOHI(SDNode *N);
454 SDValue visitUMUL_LOHI(SDNode *N);
455 SDValue visitMULO(SDNode *N);
456 SDValue visitIMINMAX(SDNode *N);
457 SDValue visitAND(SDNode *N);
458 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
459 SDValue visitOR(SDNode *N);
460 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
461 SDValue visitXOR(SDNode *N);
462 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
463 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
464 SDValue visitSHL(SDNode *N);
465 SDValue visitSRA(SDNode *N);
466 SDValue visitSRL(SDNode *N);
467 SDValue visitFunnelShift(SDNode *N);
468 SDValue visitSHLSAT(SDNode *N);
469 SDValue visitRotate(SDNode *N);
470 SDValue visitABS(SDNode *N);
471 SDValue visitBSWAP(SDNode *N);
472 SDValue visitBITREVERSE(SDNode *N);
473 SDValue visitCTLZ(SDNode *N);
474 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
475 SDValue visitCTTZ(SDNode *N);
476 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
477 SDValue visitCTPOP(SDNode *N);
478 SDValue visitSELECT(SDNode *N);
479 SDValue visitVSELECT(SDNode *N);
480 SDValue visitVP_SELECT(SDNode *N);
481 SDValue visitSELECT_CC(SDNode *N);
482 SDValue visitSETCC(SDNode *N);
483 SDValue visitSETCCCARRY(SDNode *N);
484 SDValue visitSIGN_EXTEND(SDNode *N);
485 SDValue visitZERO_EXTEND(SDNode *N);
486 SDValue visitANY_EXTEND(SDNode *N);
487 SDValue visitAssertExt(SDNode *N);
488 SDValue visitAssertAlign(SDNode *N);
489 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
490 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
491 SDValue visitTRUNCATE(SDNode *N);
492 SDValue visitBITCAST(SDNode *N);
493 SDValue visitFREEZE(SDNode *N);
494 SDValue visitBUILD_PAIR(SDNode *N);
495 SDValue visitFADD(SDNode *N);
496 SDValue visitVP_FADD(SDNode *N);
497 SDValue visitVP_FSUB(SDNode *N);
498 SDValue visitSTRICT_FADD(SDNode *N);
499 SDValue visitFSUB(SDNode *N);
500 SDValue visitFMUL(SDNode *N);
501 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
502 SDValue visitFMAD(SDNode *N);
503 SDValue visitFDIV(SDNode *N);
504 SDValue visitFREM(SDNode *N);
505 SDValue visitFSQRT(SDNode *N);
506 SDValue visitFCOPYSIGN(SDNode *N);
507 SDValue visitFPOW(SDNode *N);
508 SDValue visitSINT_TO_FP(SDNode *N);
509 SDValue visitUINT_TO_FP(SDNode *N);
510 SDValue visitFP_TO_SINT(SDNode *N);
511 SDValue visitFP_TO_UINT(SDNode *N);
512 SDValue visitXRINT(SDNode *N);
513 SDValue visitFP_ROUND(SDNode *N);
514 SDValue visitFP_EXTEND(SDNode *N);
515 SDValue visitFNEG(SDNode *N);
516 SDValue visitFABS(SDNode *N);
517 SDValue visitFCEIL(SDNode *N);
518 SDValue visitFTRUNC(SDNode *N);
519 SDValue visitFFREXP(SDNode *N);
520 SDValue visitFFLOOR(SDNode *N);
521 SDValue visitFMinMax(SDNode *N);
522 SDValue visitBRCOND(SDNode *N);
523 SDValue visitBR_CC(SDNode *N);
524 SDValue visitLOAD(SDNode *N);
525
526 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
527 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
528 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
529
530 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
531
532 SDValue visitSTORE(SDNode *N);
533 SDValue visitATOMIC_STORE(SDNode *N);
534 SDValue visitLIFETIME_END(SDNode *N);
535 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
536 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
537 SDValue visitBUILD_VECTOR(SDNode *N);
538 SDValue visitCONCAT_VECTORS(SDNode *N);
539 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
540 SDValue visitVECTOR_SHUFFLE(SDNode *N);
541 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
542 SDValue visitINSERT_SUBVECTOR(SDNode *N);
543 SDValue visitMLOAD(SDNode *N);
544 SDValue visitMSTORE(SDNode *N);
545 SDValue visitMGATHER(SDNode *N);
546 SDValue visitMSCATTER(SDNode *N);
547 SDValue visitVPGATHER(SDNode *N);
548 SDValue visitVPSCATTER(SDNode *N);
549 SDValue visitVP_STRIDED_LOAD(SDNode *N);
550 SDValue visitVP_STRIDED_STORE(SDNode *N);
551 SDValue visitFP_TO_FP16(SDNode *N);
552 SDValue visitFP16_TO_FP(SDNode *N);
553 SDValue visitFP_TO_BF16(SDNode *N);
554 SDValue visitBF16_TO_FP(SDNode *N);
555 SDValue visitVECREDUCE(SDNode *N);
556 SDValue visitVPOp(SDNode *N);
557 SDValue visitGET_FPENV_MEM(SDNode *N);
558 SDValue visitSET_FPENV_MEM(SDNode *N);
559
560 template <class MatchContextClass>
561 SDValue visitFADDForFMACombine(SDNode *N);
562 template <class MatchContextClass>
563 SDValue visitFSUBForFMACombine(SDNode *N);
564 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
565
566 SDValue XformToShuffleWithZero(SDNode *N);
567 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
568 const SDLoc &DL,
569 SDNode *N,
570 SDValue N0,
571 SDValue N1);
572 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
573 SDValue N1, SDNodeFlags Flags);
574 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
575 SDValue N1, SDNodeFlags Flags);
576 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
577 EVT VT, SDValue N0, SDValue N1,
578 SDNodeFlags Flags = SDNodeFlags());
579
580 SDValue visitShiftByConstant(SDNode *N);
581
582 SDValue foldSelectOfConstants(SDNode *N);
583 SDValue foldVSelectOfConstants(SDNode *N);
584 SDValue foldBinOpIntoSelect(SDNode *BO);
585 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
586 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
587 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
588 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
590 bool NotExtCompare = false);
591 SDValue convertSelectOfFPConstantsToLoadOffset(
592 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
594 SDValue foldSignChangeInBitcast(SDNode *N);
595 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
597 SDValue foldSelectOfBinops(SDNode *N);
598 SDValue foldSextSetcc(SDNode *N);
599 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
600 const SDLoc &DL);
601 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
602 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
603 SDValue unfoldMaskedMerge(SDNode *N);
604 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
605 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
606 const SDLoc &DL, bool foldBooleans);
607 SDValue rebuildSetCC(SDValue N);
608
609 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
610 SDValue &CC, bool MatchStrict = false) const;
611 bool isOneUseSetCC(SDValue N) const;
612
613 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
614 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
615
616 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
617 unsigned HiOp);
618 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
619 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
620 const TargetLowering &TLI);
621
622 SDValue CombineExtLoad(SDNode *N);
623 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
624 SDValue combineRepeatedFPDivisors(SDNode *N);
625 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
626 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
627 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
628 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
629 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
630 SDValue BuildSDIV(SDNode *N);
631 SDValue BuildSDIVPow2(SDNode *N);
632 SDValue BuildUDIV(SDNode *N);
633 SDValue BuildSREMPow2(SDNode *N);
634 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
635 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
636 bool KnownNeverZero = false,
637 bool InexpensiveOnly = false,
638 std::optional<EVT> OutVT = std::nullopt);
639 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
640 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
641 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
642 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
643 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
644 SDNodeFlags Flags, bool Reciprocal);
645 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
646 SDNodeFlags Flags, bool Reciprocal);
647 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
648 bool DemandHighBits = true);
649 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
650 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
651 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
652 unsigned PosOpcode, unsigned NegOpcode,
653 const SDLoc &DL);
654 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
655 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
656 unsigned PosOpcode, unsigned NegOpcode,
657 const SDLoc &DL);
658 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
659 SDValue MatchLoadCombine(SDNode *N);
660 SDValue mergeTruncStores(StoreSDNode *N);
661 SDValue reduceLoadWidth(SDNode *N);
662 SDValue ReduceLoadOpStoreWidth(SDNode *N);
664 SDValue TransformFPLoadStorePair(SDNode *N);
665 SDValue convertBuildVecZextToZext(SDNode *N);
666 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
667 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
668 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
669 SDValue reduceBuildVecToShuffle(SDNode *N);
670 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
671 ArrayRef<int> VectorMask, SDValue VecIn1,
672 SDValue VecIn2, unsigned LeftIdx,
673 bool DidSplitVec);
674 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
675
676 /// Walk up chain skipping non-aliasing memory nodes,
677 /// looking for aliasing nodes and adding them to the Aliases vector.
678 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
679 SmallVectorImpl<SDValue> &Aliases);
680
681 /// Return true if there is any possibility that the two addresses overlap.
682 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
683
684 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
685 /// chain (aliasing node.)
686 SDValue FindBetterChain(SDNode *N, SDValue Chain);
687
688 /// Try to replace a store and any possibly adjacent stores on
689 /// consecutive chains with better chains. Return true only if St is
690 /// replaced.
691 ///
692 /// Notice that other chains may still be replaced even if the function
693 /// returns false.
694 bool findBetterNeighborChains(StoreSDNode *St);
695
696 // Helper for findBetterNeighborChains. Walk up store chain add additional
697 // chained stores that do not overlap and can be parallelized.
698 bool parallelizeChainedStores(StoreSDNode *St);
699
700 /// Holds a pointer to an LSBaseSDNode as well as information on where it
701 /// is located in a sequence of memory operations connected by a chain.
702 struct MemOpLink {
703 // Ptr to the mem node.
704 LSBaseSDNode *MemNode;
705
706 // Offset from the base ptr.
707 int64_t OffsetFromBase;
708
709 MemOpLink(LSBaseSDNode *N, int64_t Offset)
710 : MemNode(N), OffsetFromBase(Offset) {}
711 };
712
713 // Classify the origin of a stored value.
714 enum class StoreSource { Unknown, Constant, Extract, Load };
715 StoreSource getStoreSource(SDValue StoreVal) {
716 switch (StoreVal.getOpcode()) {
717 case ISD::Constant:
718 case ISD::ConstantFP:
719 return StoreSource::Constant;
723 return StoreSource::Constant;
724 return StoreSource::Unknown;
727 return StoreSource::Extract;
728 case ISD::LOAD:
729 return StoreSource::Load;
730 default:
731 return StoreSource::Unknown;
732 }
733 }
734
735 /// This is a helper function for visitMUL to check the profitability
736 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
737 /// MulNode is the original multiply, AddNode is (add x, c1),
738 /// and ConstNode is c2.
739 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
740 SDValue ConstNode);
741
742 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
743 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
744 /// the type of the loaded value to be extended.
745 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
746 EVT LoadResultTy, EVT &ExtVT);
747
748 /// Helper function to calculate whether the given Load/Store can have its
749 /// width reduced to ExtVT.
750 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
751 EVT &MemVT, unsigned ShAmt = 0);
752
753 /// Used by BackwardsPropagateMask to find suitable loads.
754 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
755 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
756 ConstantSDNode *Mask, SDNode *&NodeToMask);
757 /// Attempt to propagate a given AND node back to load leaves so that they
758 /// can be combined into narrow loads.
759 bool BackwardsPropagateMask(SDNode *N);
760
761 /// Helper function for mergeConsecutiveStores which merges the component
762 /// store chains.
763 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
764 unsigned NumStores);
765
766 /// Helper function for mergeConsecutiveStores which checks if all the store
767 /// nodes have the same underlying object. We can still reuse the first
768 /// store's pointer info if all the stores are from the same object.
769 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
770
771 /// This is a helper function for mergeConsecutiveStores. When the source
772 /// elements of the consecutive stores are all constants or all extracted
773 /// vector elements, try to merge them into one larger store introducing
774 /// bitcasts if necessary. \return True if a merged store was created.
775 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
776 EVT MemVT, unsigned NumStores,
777 bool IsConstantSrc, bool UseVector,
778 bool UseTrunc);
779
780 /// This is a helper function for mergeConsecutiveStores. Stores that
781 /// potentially may be merged with St are placed in StoreNodes. RootNode is
782 /// a chain predecessor to all store candidates.
783 void getStoreMergeCandidates(StoreSDNode *St,
784 SmallVectorImpl<MemOpLink> &StoreNodes,
785 SDNode *&Root);
786
787 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
788 /// have indirect dependency through their operands. RootNode is the
789 /// predecessor to all stores calculated by getStoreMergeCandidates and is
790 /// used to prune the dependency check. \return True if safe to merge.
791 bool checkMergeStoreCandidatesForDependencies(
792 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
793 SDNode *RootNode);
794
795 /// This is a helper function for mergeConsecutiveStores. Given a list of
796 /// store candidates, find the first N that are consecutive in memory.
797 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
798 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
799 int64_t ElementSizeBytes) const;
800
801 /// This is a helper function for mergeConsecutiveStores. It is used for
802 /// store chains that are composed entirely of constant values.
803 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
804 unsigned NumConsecutiveStores,
805 EVT MemVT, SDNode *Root, bool AllowVectors);
806
807 /// This is a helper function for mergeConsecutiveStores. It is used for
808 /// store chains that are composed entirely of extracted vector elements.
809 /// When extracting multiple vector elements, try to store them in one
810 /// vector store rather than a sequence of scalar stores.
811 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
812 unsigned NumConsecutiveStores, EVT MemVT,
813 SDNode *Root);
814
815 /// This is a helper function for mergeConsecutiveStores. It is used for
816 /// store chains that are composed entirely of loaded values.
817 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
818 unsigned NumConsecutiveStores, EVT MemVT,
819 SDNode *Root, bool AllowVectors,
820 bool IsNonTemporalStore, bool IsNonTemporalLoad);
821
822 /// Merge consecutive store operations into a wide store.
823 /// This optimization uses wide integers or vectors when possible.
824 /// \return true if stores were merged.
825 bool mergeConsecutiveStores(StoreSDNode *St);
826
827 /// Try to transform a truncation where C is a constant:
828 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
829 ///
830 /// \p N needs to be a truncation and its first operand an AND. Other
831 /// requirements are checked by the function (e.g. that trunc is
832 /// single-use) and if missed an empty SDValue is returned.
833 SDValue distributeTruncateThroughAnd(SDNode *N);
834
835 /// Helper function to determine whether the target supports operation
836 /// given by \p Opcode for type \p VT, that is, whether the operation
837 /// is legal or custom before legalizing operations, and whether is
838 /// legal (but not custom) after legalization.
839 bool hasOperation(unsigned Opcode, EVT VT) {
840 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
841 }
842
843 public:
844 /// Runs the dag combiner on all nodes in the work list
845 void Run(CombineLevel AtLevel);
846
847 SelectionDAG &getDAG() const { return DAG; }
848
849 /// Returns a type large enough to hold any valid shift amount - before type
850 /// legalization these can be huge.
851 EVT getShiftAmountTy(EVT LHSTy) {
852 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
853 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
854 }
855
856 /// This method returns true if we are running before type legalization or
857 /// if the specified VT is legal.
858 bool isTypeLegal(const EVT &VT) {
859 if (!LegalTypes) return true;
860 return TLI.isTypeLegal(VT);
861 }
862
863 /// Convenience wrapper around TargetLowering::getSetCCResultType
864 EVT getSetCCResultType(EVT VT) const {
865 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
866 }
867
868 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
869 SDValue OrigLoad, SDValue ExtLoad,
870 ISD::NodeType ExtType);
871 };
872
873/// This class is a DAGUpdateListener that removes any deleted
874/// nodes from the worklist.
875class WorklistRemover : public SelectionDAG::DAGUpdateListener {
876 DAGCombiner &DC;
877
878public:
879 explicit WorklistRemover(DAGCombiner &dc)
880 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
881
882 void NodeDeleted(SDNode *N, SDNode *E) override {
883 DC.removeFromWorklist(N);
884 }
885};
886
887class WorklistInserter : public SelectionDAG::DAGUpdateListener {
888 DAGCombiner &DC;
889
890public:
891 explicit WorklistInserter(DAGCombiner &dc)
892 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
893
894 // FIXME: Ideally we could add N to the worklist, but this causes exponential
895 // compile time costs in large DAGs, e.g. Halide.
896 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
897};
898
899} // end anonymous namespace
900
901//===----------------------------------------------------------------------===//
902// TargetLowering::DAGCombinerInfo implementation
903//===----------------------------------------------------------------------===//
904
906 ((DAGCombiner*)DC)->AddToWorklist(N);
907}
908
910CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
911 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
912}
913
915CombineTo(SDNode *N, SDValue Res, bool AddTo) {
916 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
917}
918
920CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
921 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
922}
923
926 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
927}
928
931 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
932}
933
934//===----------------------------------------------------------------------===//
935// Helper Functions
936//===----------------------------------------------------------------------===//
937
938void DAGCombiner::deleteAndRecombine(SDNode *N) {
939 removeFromWorklist(N);
940
941 // If the operands of this node are only used by the node, they will now be
942 // dead. Make sure to re-visit them and recursively delete dead nodes.
943 for (const SDValue &Op : N->ops())
944 // For an operand generating multiple values, one of the values may
945 // become dead allowing further simplification (e.g. split index
946 // arithmetic from an indexed load).
947 if (Op->hasOneUse() || Op->getNumValues() > 1)
948 AddToWorklist(Op.getNode());
949
950 DAG.DeleteNode(N);
951}
952
953// APInts must be the same size for most operations, this helper
954// function zero extends the shorter of the pair so that they match.
955// We provide an Offset so that we can create bitwidths that won't overflow.
956static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
957 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
958 LHS = LHS.zext(Bits);
959 RHS = RHS.zext(Bits);
960}
961
962// Return true if this node is a setcc, or is a select_cc
963// that selects between the target values used for true and false, making it
964// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
965// the appropriate nodes based on the type of node we are checking. This
966// simplifies life a bit for the callers.
967bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
968 SDValue &CC, bool MatchStrict) const {
969 if (N.getOpcode() == ISD::SETCC) {
970 LHS = N.getOperand(0);
971 RHS = N.getOperand(1);
972 CC = N.getOperand(2);
973 return true;
974 }
975
976 if (MatchStrict &&
977 (N.getOpcode() == ISD::STRICT_FSETCC ||
978 N.getOpcode() == ISD::STRICT_FSETCCS)) {
979 LHS = N.getOperand(1);
980 RHS = N.getOperand(2);
981 CC = N.getOperand(3);
982 return true;
983 }
984
985 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
986 !TLI.isConstFalseVal(N.getOperand(3)))
987 return false;
988
989 if (TLI.getBooleanContents(N.getValueType()) ==
991 return false;
992
993 LHS = N.getOperand(0);
994 RHS = N.getOperand(1);
995 CC = N.getOperand(4);
996 return true;
997}
998
999/// Return true if this is a SetCC-equivalent operation with only one use.
1000/// If this is true, it allows the users to invert the operation for free when
1001/// it is profitable to do so.
1002bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1003 SDValue N0, N1, N2;
1004 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1005 return true;
1006 return false;
1007}
1008
1010 if (!ScalarTy.isSimple())
1011 return false;
1012
1013 uint64_t MaskForTy = 0ULL;
1014 switch (ScalarTy.getSimpleVT().SimpleTy) {
1015 case MVT::i8:
1016 MaskForTy = 0xFFULL;
1017 break;
1018 case MVT::i16:
1019 MaskForTy = 0xFFFFULL;
1020 break;
1021 case MVT::i32:
1022 MaskForTy = 0xFFFFFFFFULL;
1023 break;
1024 default:
1025 return false;
1026 break;
1027 }
1028
1029 APInt Val;
1030 if (ISD::isConstantSplatVector(N, Val))
1031 return Val.getLimitedValue() == MaskForTy;
1032
1033 return false;
1034}
1035
1036// Determines if it is a constant integer or a splat/build vector of constant
1037// integers (and undefs).
1038// Do not permit build vector implicit truncation.
1039static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1040 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1041 return !(Const->isOpaque() && NoOpaques);
1042 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1043 return false;
1044 unsigned BitWidth = N.getScalarValueSizeInBits();
1045 for (const SDValue &Op : N->op_values()) {
1046 if (Op.isUndef())
1047 continue;
1048 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1049 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1050 (Const->isOpaque() && NoOpaques))
1051 return false;
1052 }
1053 return true;
1054}
1055
1056// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1057// undef's.
1058static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1059 if (V.getOpcode() != ISD::BUILD_VECTOR)
1060 return false;
1061 return isConstantOrConstantVector(V, NoOpaques) ||
1063}
1064
1065// Determine if this an indexed load with an opaque target constant index.
1066static bool canSplitIdx(LoadSDNode *LD) {
1067 return MaySplitLoadIndex &&
1068 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1069 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1070}
1071
1072bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1073 const SDLoc &DL,
1074 SDNode *N,
1075 SDValue N0,
1076 SDValue N1) {
1077 // Currently this only tries to ensure we don't undo the GEP splits done by
1078 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1079 // we check if the following transformation would be problematic:
1080 // (load/store (add, (add, x, offset1), offset2)) ->
1081 // (load/store (add, x, offset1+offset2)).
1082
1083 // (load/store (add, (add, x, y), offset2)) ->
1084 // (load/store (add, (add, x, offset2), y)).
1085
1086 if (N0.getOpcode() != ISD::ADD)
1087 return false;
1088
1089 // Check for vscale addressing modes.
1090 // (load/store (add/sub (add x, y), vscale))
1091 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1092 // (load/store (add/sub (add x, y), (mul vscale, C)))
1093 if ((N1.getOpcode() == ISD::VSCALE ||
1094 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1095 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1096 isa<ConstantSDNode>(N1.getOperand(1)))) &&
1097 N1.getValueType().getFixedSizeInBits() <= 64) {
1098 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1099 ? N1.getConstantOperandVal(0)
1100 : (N1.getOperand(0).getConstantOperandVal(0) *
1101 (N1.getOpcode() == ISD::SHL
1102 ? (1LL << N1.getConstantOperandVal(1))
1103 : N1.getConstantOperandVal(1)));
1104 if (Opc == ISD::SUB)
1105 ScalableOffset = -ScalableOffset;
1106 if (all_of(N->uses(), [&](SDNode *Node) {
1107 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1108 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1110 AM.HasBaseReg = true;
1111 AM.ScalableOffset = ScalableOffset;
1112 EVT VT = LoadStore->getMemoryVT();
1113 unsigned AS = LoadStore->getAddressSpace();
1114 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1115 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1116 AS);
1117 }
1118 return false;
1119 }))
1120 return true;
1121 }
1122
1123 if (Opc != ISD::ADD)
1124 return false;
1125
1126 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1127 if (!C2)
1128 return false;
1129
1130 const APInt &C2APIntVal = C2->getAPIntValue();
1131 if (C2APIntVal.getSignificantBits() > 64)
1132 return false;
1133
1134 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1135 if (N0.hasOneUse())
1136 return false;
1137
1138 const APInt &C1APIntVal = C1->getAPIntValue();
1139 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1140 if (CombinedValueIntVal.getSignificantBits() > 64)
1141 return false;
1142 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1143
1144 for (SDNode *Node : N->uses()) {
1145 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1146 // Is x[offset2] already not a legal addressing mode? If so then
1147 // reassociating the constants breaks nothing (we test offset2 because
1148 // that's the one we hope to fold into the load or store).
1150 AM.HasBaseReg = true;
1151 AM.BaseOffs = C2APIntVal.getSExtValue();
1152 EVT VT = LoadStore->getMemoryVT();
1153 unsigned AS = LoadStore->getAddressSpace();
1154 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1155 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1156 continue;
1157
1158 // Would x[offset1+offset2] still be a legal addressing mode?
1159 AM.BaseOffs = CombinedValue;
1160 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1161 return true;
1162 }
1163 }
1164 } else {
1165 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1166 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1167 return false;
1168
1169 for (SDNode *Node : N->uses()) {
1170 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1171 if (!LoadStore)
1172 return false;
1173
1174 // Is x[offset2] a legal addressing mode? If so then
1175 // reassociating the constants breaks address pattern
1177 AM.HasBaseReg = true;
1178 AM.BaseOffs = C2APIntVal.getSExtValue();
1179 EVT VT = LoadStore->getMemoryVT();
1180 unsigned AS = LoadStore->getAddressSpace();
1181 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1182 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1183 return false;
1184 }
1185 return true;
1186 }
1187
1188 return false;
1189}
1190
1191/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1192/// \p N0 is the same kind of operation as \p Opc.
1193SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1194 SDValue N0, SDValue N1,
1195 SDNodeFlags Flags) {
1196 EVT VT = N0.getValueType();
1197
1198 if (N0.getOpcode() != Opc)
1199 return SDValue();
1200
1201 SDValue N00 = N0.getOperand(0);
1202 SDValue N01 = N0.getOperand(1);
1203
1205 SDNodeFlags NewFlags;
1206 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1207 Flags.hasNoUnsignedWrap())
1208 NewFlags.setNoUnsignedWrap(true);
1209
1211 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1212 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1213 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1214 return SDValue();
1215 }
1216 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1217 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1218 // iff (op x, c1) has one use
1219 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1220 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1221 }
1222 }
1223
1224 // Check for repeated operand logic simplifications.
1225 if (Opc == ISD::AND || Opc == ISD::OR) {
1226 // (N00 & N01) & N00 --> N00 & N01
1227 // (N00 & N01) & N01 --> N00 & N01
1228 // (N00 | N01) | N00 --> N00 | N01
1229 // (N00 | N01) | N01 --> N00 | N01
1230 if (N1 == N00 || N1 == N01)
1231 return N0;
1232 }
1233 if (Opc == ISD::XOR) {
1234 // (N00 ^ N01) ^ N00 --> N01
1235 if (N1 == N00)
1236 return N01;
1237 // (N00 ^ N01) ^ N01 --> N00
1238 if (N1 == N01)
1239 return N00;
1240 }
1241
1242 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1243 if (N1 != N01) {
1244 // Reassociate if (op N00, N1) already exist
1245 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1246 // if Op (Op N00, N1), N01 already exist
1247 // we need to stop reassciate to avoid dead loop
1248 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1249 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1250 }
1251 }
1252
1253 if (N1 != N00) {
1254 // Reassociate if (op N01, N1) already exist
1255 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1256 // if Op (Op N01, N1), N00 already exist
1257 // we need to stop reassciate to avoid dead loop
1258 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1259 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1260 }
1261 }
1262
1263 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1264 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1265 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1266 // comparisons with the same predicate. This enables optimizations as the
1267 // following one:
1268 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1269 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1270 if (Opc == ISD::AND || Opc == ISD::OR) {
1271 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1272 N01->getOpcode() == ISD::SETCC) {
1273 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1274 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1275 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1276 if (CC1 == CC00 && CC1 != CC01) {
1277 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1278 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1279 }
1280 if (CC1 == CC01 && CC1 != CC00) {
1281 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1282 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1283 }
1284 }
1285 }
1286 }
1287
1288 return SDValue();
1289}
1290
1291/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1292/// same kind of operation as \p Opc.
1293SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1294 SDValue N1, SDNodeFlags Flags) {
1295 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1296
1297 // Floating-point reassociation is not allowed without loose FP math.
1298 if (N0.getValueType().isFloatingPoint() ||
1300 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1301 return SDValue();
1302
1303 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1304 return Combined;
1305 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1306 return Combined;
1307 return SDValue();
1308}
1309
1310// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1311// Note that we only expect Flags to be passed from FP operations. For integer
1312// operations they need to be dropped.
1313SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1314 const SDLoc &DL, EVT VT, SDValue N0,
1315 SDValue N1, SDNodeFlags Flags) {
1316 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1317 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1318 N0->hasOneUse() && N1->hasOneUse() &&
1320 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1321 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1322 return DAG.getNode(RedOpc, DL, VT,
1323 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1324 N0.getOperand(0), N1.getOperand(0)));
1325 }
1326 return SDValue();
1327}
1328
1329SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1330 bool AddTo) {
1331 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1332 ++NodesCombined;
1333 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1334 To[0].dump(&DAG);
1335 dbgs() << " and " << NumTo - 1 << " other values\n");
1336 for (unsigned i = 0, e = NumTo; i != e; ++i)
1337 assert((!To[i].getNode() ||
1338 N->getValueType(i) == To[i].getValueType()) &&
1339 "Cannot combine value to value of different type!");
1340
1341 WorklistRemover DeadNodes(*this);
1342 DAG.ReplaceAllUsesWith(N, To);
1343 if (AddTo) {
1344 // Push the new nodes and any users onto the worklist
1345 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1346 if (To[i].getNode())
1347 AddToWorklistWithUsers(To[i].getNode());
1348 }
1349 }
1350
1351 // Finally, if the node is now dead, remove it from the graph. The node
1352 // may not be dead if the replacement process recursively simplified to
1353 // something else needing this node.
1354 if (N->use_empty())
1355 deleteAndRecombine(N);
1356 return SDValue(N, 0);
1357}
1358
1359void DAGCombiner::
1360CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1361 // Replace the old value with the new one.
1362 ++NodesCombined;
1363 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1364 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1365
1366 // Replace all uses.
1367 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1368
1369 // Push the new node and any (possibly new) users onto the worklist.
1370 AddToWorklistWithUsers(TLO.New.getNode());
1371
1372 // Finally, if the node is now dead, remove it from the graph.
1373 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1374}
1375
1376/// Check the specified integer node value to see if it can be simplified or if
1377/// things it uses can be simplified by bit propagation. If so, return true.
1378bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1379 const APInt &DemandedElts,
1380 bool AssumeSingleUse) {
1381 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1382 KnownBits Known;
1383 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1384 AssumeSingleUse))
1385 return false;
1386
1387 // Revisit the node.
1388 AddToWorklist(Op.getNode());
1389
1390 CommitTargetLoweringOpt(TLO);
1391 return true;
1392}
1393
1394/// Check the specified vector node value to see if it can be simplified or
1395/// if things it uses can be simplified as it only uses some of the elements.
1396/// If so, return true.
1397bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1398 const APInt &DemandedElts,
1399 bool AssumeSingleUse) {
1400 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1401 APInt KnownUndef, KnownZero;
1402 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1403 TLO, 0, AssumeSingleUse))
1404 return false;
1405
1406 // Revisit the node.
1407 AddToWorklist(Op.getNode());
1408
1409 CommitTargetLoweringOpt(TLO);
1410 return true;
1411}
1412
1413void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1414 SDLoc DL(Load);
1415 EVT VT = Load->getValueType(0);
1416 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1417
1418 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1419 Trunc.dump(&DAG); dbgs() << '\n');
1420
1421 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1422 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1423
1424 AddToWorklist(Trunc.getNode());
1425 recursivelyDeleteUnusedNodes(Load);
1426}
1427
1428SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1429 Replace = false;
1430 SDLoc DL(Op);
1431 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1432 LoadSDNode *LD = cast<LoadSDNode>(Op);
1433 EVT MemVT = LD->getMemoryVT();
1435 : LD->getExtensionType();
1436 Replace = true;
1437 return DAG.getExtLoad(ExtType, DL, PVT,
1438 LD->getChain(), LD->getBasePtr(),
1439 MemVT, LD->getMemOperand());
1440 }
1441
1442 unsigned Opc = Op.getOpcode();
1443 switch (Opc) {
1444 default: break;
1445 case ISD::AssertSext:
1446 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1447 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1448 break;
1449 case ISD::AssertZext:
1450 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1451 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1452 break;
1453 case ISD::Constant: {
1454 unsigned ExtOpc =
1455 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1456 return DAG.getNode(ExtOpc, DL, PVT, Op);
1457 }
1458 }
1459
1460 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1461 return SDValue();
1462 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1463}
1464
1465SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1467 return SDValue();
1468 EVT OldVT = Op.getValueType();
1469 SDLoc DL(Op);
1470 bool Replace = false;
1471 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1472 if (!NewOp.getNode())
1473 return SDValue();
1474 AddToWorklist(NewOp.getNode());
1475
1476 if (Replace)
1477 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1478 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1479 DAG.getValueType(OldVT));
1480}
1481
1482SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1483 EVT OldVT = Op.getValueType();
1484 SDLoc DL(Op);
1485 bool Replace = false;
1486 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1487 if (!NewOp.getNode())
1488 return SDValue();
1489 AddToWorklist(NewOp.getNode());
1490
1491 if (Replace)
1492 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1493 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1494}
1495
1496/// Promote the specified integer binary operation if the target indicates it is
1497/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1498/// i32 since i16 instructions are longer.
1499SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1500 if (!LegalOperations)
1501 return SDValue();
1502
1503 EVT VT = Op.getValueType();
1504 if (VT.isVector() || !VT.isInteger())
1505 return SDValue();
1506
1507 // If operation type is 'undesirable', e.g. i16 on x86, consider
1508 // promoting it.
1509 unsigned Opc = Op.getOpcode();
1510 if (TLI.isTypeDesirableForOp(Opc, VT))
1511 return SDValue();
1512
1513 EVT PVT = VT;
1514 // Consult target whether it is a good idea to promote this operation and
1515 // what's the right type to promote it to.
1516 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1517 assert(PVT != VT && "Don't know what type to promote to!");
1518
1519 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1520
1521 bool Replace0 = false;
1522 SDValue N0 = Op.getOperand(0);
1523 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1524
1525 bool Replace1 = false;
1526 SDValue N1 = Op.getOperand(1);
1527 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1528 SDLoc DL(Op);
1529
1530 SDValue RV =
1531 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1532
1533 // We are always replacing N0/N1's use in N and only need additional
1534 // replacements if there are additional uses.
1535 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1536 // (SDValue) here because the node may reference multiple values
1537 // (for example, the chain value of a load node).
1538 Replace0 &= !N0->hasOneUse();
1539 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1540
1541 // Combine Op here so it is preserved past replacements.
1542 CombineTo(Op.getNode(), RV);
1543
1544 // If operands have a use ordering, make sure we deal with
1545 // predecessor first.
1546 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1547 std::swap(N0, N1);
1548 std::swap(NN0, NN1);
1549 }
1550
1551 if (Replace0) {
1552 AddToWorklist(NN0.getNode());
1553 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1554 }
1555 if (Replace1) {
1556 AddToWorklist(NN1.getNode());
1557 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1558 }
1559 return Op;
1560 }
1561 return SDValue();
1562}
1563
1564/// Promote the specified integer shift operation if the target indicates it is
1565/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1566/// i32 since i16 instructions are longer.
1567SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1568 if (!LegalOperations)
1569 return SDValue();
1570
1571 EVT VT = Op.getValueType();
1572 if (VT.isVector() || !VT.isInteger())
1573 return SDValue();
1574
1575 // If operation type is 'undesirable', e.g. i16 on x86, consider
1576 // promoting it.
1577 unsigned Opc = Op.getOpcode();
1578 if (TLI.isTypeDesirableForOp(Opc, VT))
1579 return SDValue();
1580
1581 EVT PVT = VT;
1582 // Consult target whether it is a good idea to promote this operation and
1583 // what's the right type to promote it to.
1584 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1585 assert(PVT != VT && "Don't know what type to promote to!");
1586
1587 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1588
1589 bool Replace = false;
1590 SDValue N0 = Op.getOperand(0);
1591 if (Opc == ISD::SRA)
1592 N0 = SExtPromoteOperand(N0, PVT);
1593 else if (Opc == ISD::SRL)
1594 N0 = ZExtPromoteOperand(N0, PVT);
1595 else
1596 N0 = PromoteOperand(N0, PVT, Replace);
1597
1598 if (!N0.getNode())
1599 return SDValue();
1600
1601 SDLoc DL(Op);
1602 SDValue N1 = Op.getOperand(1);
1603 SDValue RV =
1604 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1605
1606 if (Replace)
1607 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1608
1609 // Deal with Op being deleted.
1610 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1611 return RV;
1612 }
1613 return SDValue();
1614}
1615
1616SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1617 if (!LegalOperations)
1618 return SDValue();
1619
1620 EVT VT = Op.getValueType();
1621 if (VT.isVector() || !VT.isInteger())
1622 return SDValue();
1623
1624 // If operation type is 'undesirable', e.g. i16 on x86, consider
1625 // promoting it.
1626 unsigned Opc = Op.getOpcode();
1627 if (TLI.isTypeDesirableForOp(Opc, VT))
1628 return SDValue();
1629
1630 EVT PVT = VT;
1631 // Consult target whether it is a good idea to promote this operation and
1632 // what's the right type to promote it to.
1633 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1634 assert(PVT != VT && "Don't know what type to promote to!");
1635 // fold (aext (aext x)) -> (aext x)
1636 // fold (aext (zext x)) -> (zext x)
1637 // fold (aext (sext x)) -> (sext x)
1638 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1639 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1640 }
1641 return SDValue();
1642}
1643
1644bool DAGCombiner::PromoteLoad(SDValue Op) {
1645 if (!LegalOperations)
1646 return false;
1647
1648 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1649 return false;
1650
1651 EVT VT = Op.getValueType();
1652 if (VT.isVector() || !VT.isInteger())
1653 return false;
1654
1655 // If operation type is 'undesirable', e.g. i16 on x86, consider
1656 // promoting it.
1657 unsigned Opc = Op.getOpcode();
1658 if (TLI.isTypeDesirableForOp(Opc, VT))
1659 return false;
1660
1661 EVT PVT = VT;
1662 // Consult target whether it is a good idea to promote this operation and
1663 // what's the right type to promote it to.
1664 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1665 assert(PVT != VT && "Don't know what type to promote to!");
1666
1667 SDLoc DL(Op);
1668 SDNode *N = Op.getNode();
1669 LoadSDNode *LD = cast<LoadSDNode>(N);
1670 EVT MemVT = LD->getMemoryVT();
1672 : LD->getExtensionType();
1673 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1674 LD->getChain(), LD->getBasePtr(),
1675 MemVT, LD->getMemOperand());
1676 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1677
1678 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1679 Result.dump(&DAG); dbgs() << '\n');
1680
1682 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1683
1684 AddToWorklist(Result.getNode());
1685 recursivelyDeleteUnusedNodes(N);
1686 return true;
1687 }
1688
1689 return false;
1690}
1691
1692/// Recursively delete a node which has no uses and any operands for
1693/// which it is the only use.
1694///
1695/// Note that this both deletes the nodes and removes them from the worklist.
1696/// It also adds any nodes who have had a user deleted to the worklist as they
1697/// may now have only one use and subject to other combines.
1698bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1699 if (!N->use_empty())
1700 return false;
1701
1703 Nodes.insert(N);
1704 do {
1705 N = Nodes.pop_back_val();
1706 if (!N)
1707 continue;
1708
1709 if (N->use_empty()) {
1710 for (const SDValue &ChildN : N->op_values())
1711 Nodes.insert(ChildN.getNode());
1712
1713 removeFromWorklist(N);
1714 DAG.DeleteNode(N);
1715 } else {
1716 AddToWorklist(N);
1717 }
1718 } while (!Nodes.empty());
1719 return true;
1720}
1721
1722//===----------------------------------------------------------------------===//
1723// Main DAG Combiner implementation
1724//===----------------------------------------------------------------------===//
1725
1726void DAGCombiner::Run(CombineLevel AtLevel) {
1727 // set the instance variables, so that the various visit routines may use it.
1728 Level = AtLevel;
1729 LegalDAG = Level >= AfterLegalizeDAG;
1730 LegalOperations = Level >= AfterLegalizeVectorOps;
1731 LegalTypes = Level >= AfterLegalizeTypes;
1732
1733 WorklistInserter AddNodes(*this);
1734
1735 // Add all the dag nodes to the worklist.
1736 //
1737 // Note: All nodes are not added to PruningList here, this is because the only
1738 // nodes which can be deleted are those which have no uses and all other nodes
1739 // which would otherwise be added to the worklist by the first call to
1740 // getNextWorklistEntry are already present in it.
1741 for (SDNode &Node : DAG.allnodes())
1742 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1743
1744 // Create a dummy node (which is not added to allnodes), that adds a reference
1745 // to the root node, preventing it from being deleted, and tracking any
1746 // changes of the root.
1747 HandleSDNode Dummy(DAG.getRoot());
1748
1749 // While we have a valid worklist entry node, try to combine it.
1750 while (SDNode *N = getNextWorklistEntry()) {
1751 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1752 // N is deleted from the DAG, since they too may now be dead or may have a
1753 // reduced number of uses, allowing other xforms.
1754 if (recursivelyDeleteUnusedNodes(N))
1755 continue;
1756
1757 WorklistRemover DeadNodes(*this);
1758
1759 // If this combine is running after legalizing the DAG, re-legalize any
1760 // nodes pulled off the worklist.
1761 if (LegalDAG) {
1762 SmallSetVector<SDNode *, 16> UpdatedNodes;
1763 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1764
1765 for (SDNode *LN : UpdatedNodes)
1766 AddToWorklistWithUsers(LN);
1767
1768 if (!NIsValid)
1769 continue;
1770 }
1771
1772 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1773
1774 // Add any operands of the new node which have not yet been combined to the
1775 // worklist as well. Because the worklist uniques things already, this
1776 // won't repeatedly process the same operand.
1777 for (const SDValue &ChildN : N->op_values())
1778 if (!CombinedNodes.count(ChildN.getNode()))
1779 AddToWorklist(ChildN.getNode());
1780
1781 CombinedNodes.insert(N);
1782 SDValue RV = combine(N);
1783
1784 if (!RV.getNode())
1785 continue;
1786
1787 ++NodesCombined;
1788
1789 // If we get back the same node we passed in, rather than a new node or
1790 // zero, we know that the node must have defined multiple values and
1791 // CombineTo was used. Since CombineTo takes care of the worklist
1792 // mechanics for us, we have no work to do in this case.
1793 if (RV.getNode() == N)
1794 continue;
1795
1796 assert(N->getOpcode() != ISD::DELETED_NODE &&
1797 RV.getOpcode() != ISD::DELETED_NODE &&
1798 "Node was deleted but visit returned new node!");
1799
1800 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1801
1802 if (N->getNumValues() == RV->getNumValues())
1803 DAG.ReplaceAllUsesWith(N, RV.getNode());
1804 else {
1805 assert(N->getValueType(0) == RV.getValueType() &&
1806 N->getNumValues() == 1 && "Type mismatch");
1807 DAG.ReplaceAllUsesWith(N, &RV);
1808 }
1809
1810 // Push the new node and any users onto the worklist. Omit this if the
1811 // new node is the EntryToken (e.g. if a store managed to get optimized
1812 // out), because re-visiting the EntryToken and its users will not uncover
1813 // any additional opportunities, but there may be a large number of such
1814 // users, potentially causing compile time explosion.
1815 if (RV.getOpcode() != ISD::EntryToken)
1816 AddToWorklistWithUsers(RV.getNode());
1817
1818 // Finally, if the node is now dead, remove it from the graph. The node
1819 // may not be dead if the replacement process recursively simplified to
1820 // something else needing this node. This will also take care of adding any
1821 // operands which have lost a user to the worklist.
1822 recursivelyDeleteUnusedNodes(N);
1823 }
1824
1825 // If the root changed (e.g. it was a dead load, update the root).
1826 DAG.setRoot(Dummy.getValue());
1827 DAG.RemoveDeadNodes();
1828}
1829
1830SDValue DAGCombiner::visit(SDNode *N) {
1831 // clang-format off
1832 switch (N->getOpcode()) {
1833 default: break;
1834 case ISD::TokenFactor: return visitTokenFactor(N);
1835 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1836 case ISD::ADD: return visitADD(N);
1837 case ISD::SUB: return visitSUB(N);
1838 case ISD::SADDSAT:
1839 case ISD::UADDSAT: return visitADDSAT(N);
1840 case ISD::SSUBSAT:
1841 case ISD::USUBSAT: return visitSUBSAT(N);
1842 case ISD::ADDC: return visitADDC(N);
1843 case ISD::SADDO:
1844 case ISD::UADDO: return visitADDO(N);
1845 case ISD::SUBC: return visitSUBC(N);
1846 case ISD::SSUBO:
1847 case ISD::USUBO: return visitSUBO(N);
1848 case ISD::ADDE: return visitADDE(N);
1849 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1850 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1851 case ISD::SUBE: return visitSUBE(N);
1852 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1853 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1854 case ISD::SMULFIX:
1855 case ISD::SMULFIXSAT:
1856 case ISD::UMULFIX:
1857 case ISD::UMULFIXSAT: return visitMULFIX(N);
1858 case ISD::MUL: return visitMUL(N);
1859 case ISD::SDIV: return visitSDIV(N);
1860 case ISD::UDIV: return visitUDIV(N);
1861 case ISD::SREM:
1862 case ISD::UREM: return visitREM(N);
1863 case ISD::MULHU: return visitMULHU(N);
1864 case ISD::MULHS: return visitMULHS(N);
1865 case ISD::AVGFLOORS:
1866 case ISD::AVGFLOORU:
1867 case ISD::AVGCEILS:
1868 case ISD::AVGCEILU: return visitAVG(N);
1869 case ISD::ABDS:
1870 case ISD::ABDU: return visitABD(N);
1871 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1872 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1873 case ISD::SMULO:
1874 case ISD::UMULO: return visitMULO(N);
1875 case ISD::SMIN:
1876 case ISD::SMAX:
1877 case ISD::UMIN:
1878 case ISD::UMAX: return visitIMINMAX(N);
1879 case ISD::AND: return visitAND(N);
1880 case ISD::OR: return visitOR(N);
1881 case ISD::XOR: return visitXOR(N);
1882 case ISD::SHL: return visitSHL(N);
1883 case ISD::SRA: return visitSRA(N);
1884 case ISD::SRL: return visitSRL(N);
1885 case ISD::ROTR:
1886 case ISD::ROTL: return visitRotate(N);
1887 case ISD::FSHL:
1888 case ISD::FSHR: return visitFunnelShift(N);
1889 case ISD::SSHLSAT:
1890 case ISD::USHLSAT: return visitSHLSAT(N);
1891 case ISD::ABS: return visitABS(N);
1892 case ISD::BSWAP: return visitBSWAP(N);
1893 case ISD::BITREVERSE: return visitBITREVERSE(N);
1894 case ISD::CTLZ: return visitCTLZ(N);
1895 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1896 case ISD::CTTZ: return visitCTTZ(N);
1897 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1898 case ISD::CTPOP: return visitCTPOP(N);
1899 case ISD::SELECT: return visitSELECT(N);
1900 case ISD::VSELECT: return visitVSELECT(N);
1901 case ISD::SELECT_CC: return visitSELECT_CC(N);
1902 case ISD::SETCC: return visitSETCC(N);
1903 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1904 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1905 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1906 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1907 case ISD::AssertSext:
1908 case ISD::AssertZext: return visitAssertExt(N);
1909 case ISD::AssertAlign: return visitAssertAlign(N);
1910 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1913 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1914 case ISD::TRUNCATE: return visitTRUNCATE(N);
1915 case ISD::BITCAST: return visitBITCAST(N);
1916 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1917 case ISD::FADD: return visitFADD(N);
1918 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1919 case ISD::FSUB: return visitFSUB(N);
1920 case ISD::FMUL: return visitFMUL(N);
1921 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1922 case ISD::FMAD: return visitFMAD(N);
1923 case ISD::FDIV: return visitFDIV(N);
1924 case ISD::FREM: return visitFREM(N);
1925 case ISD::FSQRT: return visitFSQRT(N);
1926 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1927 case ISD::FPOW: return visitFPOW(N);
1928 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1929 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1930 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1931 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1932 case ISD::LRINT:
1933 case ISD::LLRINT: return visitXRINT(N);
1934 case ISD::FP_ROUND: return visitFP_ROUND(N);
1935 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1936 case ISD::FNEG: return visitFNEG(N);
1937 case ISD::FABS: return visitFABS(N);
1938 case ISD::FFLOOR: return visitFFLOOR(N);
1939 case ISD::FMINNUM:
1940 case ISD::FMAXNUM:
1941 case ISD::FMINIMUM:
1942 case ISD::FMAXIMUM: return visitFMinMax(N);
1943 case ISD::FCEIL: return visitFCEIL(N);
1944 case ISD::FTRUNC: return visitFTRUNC(N);
1945 case ISD::FFREXP: return visitFFREXP(N);
1946 case ISD::BRCOND: return visitBRCOND(N);
1947 case ISD::BR_CC: return visitBR_CC(N);
1948 case ISD::LOAD: return visitLOAD(N);
1949 case ISD::STORE: return visitSTORE(N);
1950 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
1951 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1952 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1953 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1954 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1955 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1956 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1957 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1958 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1959 case ISD::MGATHER: return visitMGATHER(N);
1960 case ISD::MLOAD: return visitMLOAD(N);
1961 case ISD::MSCATTER: return visitMSCATTER(N);
1962 case ISD::MSTORE: return visitMSTORE(N);
1963 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1964 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1965 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1966 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1967 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
1968 case ISD::FREEZE: return visitFREEZE(N);
1969 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
1970 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1973 case ISD::VECREDUCE_ADD:
1974 case ISD::VECREDUCE_MUL:
1975 case ISD::VECREDUCE_AND:
1976 case ISD::VECREDUCE_OR:
1977 case ISD::VECREDUCE_XOR:
1985 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
1986#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1987#include "llvm/IR/VPIntrinsics.def"
1988 return visitVPOp(N);
1989 }
1990 // clang-format on
1991 return SDValue();
1992}
1993
1994SDValue DAGCombiner::combine(SDNode *N) {
1995 if (!DebugCounter::shouldExecute(DAGCombineCounter))
1996 return SDValue();
1997
1998 SDValue RV;
1999 if (!DisableGenericCombines)
2000 RV = visit(N);
2001
2002 // If nothing happened, try a target-specific DAG combine.
2003 if (!RV.getNode()) {
2004 assert(N->getOpcode() != ISD::DELETED_NODE &&
2005 "Node was deleted but visit returned NULL!");
2006
2007 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2008 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2009
2010 // Expose the DAG combiner to the target combiner impls.
2012 DagCombineInfo(DAG, Level, false, this);
2013
2014 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2015 }
2016 }
2017
2018 // If nothing happened still, try promoting the operation.
2019 if (!RV.getNode()) {
2020 switch (N->getOpcode()) {
2021 default: break;
2022 case ISD::ADD:
2023 case ISD::SUB:
2024 case ISD::MUL:
2025 case ISD::AND:
2026 case ISD::OR:
2027 case ISD::XOR:
2028 RV = PromoteIntBinOp(SDValue(N, 0));
2029 break;
2030 case ISD::SHL:
2031 case ISD::SRA:
2032 case ISD::SRL:
2033 RV = PromoteIntShiftOp(SDValue(N, 0));
2034 break;
2035 case ISD::SIGN_EXTEND:
2036 case ISD::ZERO_EXTEND:
2037 case ISD::ANY_EXTEND:
2038 RV = PromoteExtend(SDValue(N, 0));
2039 break;
2040 case ISD::LOAD:
2041 if (PromoteLoad(SDValue(N, 0)))
2042 RV = SDValue(N, 0);
2043 break;
2044 }
2045 }
2046
2047 // If N is a commutative binary node, try to eliminate it if the commuted
2048 // version is already present in the DAG.
2049 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2050 SDValue N0 = N->getOperand(0);
2051 SDValue N1 = N->getOperand(1);
2052
2053 // Constant operands are canonicalized to RHS.
2054 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2055 SDValue Ops[] = {N1, N0};
2056 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2057 N->getFlags());
2058 if (CSENode)
2059 return SDValue(CSENode, 0);
2060 }
2061 }
2062
2063 return RV;
2064}
2065
2066/// Given a node, return its input chain if it has one, otherwise return a null
2067/// sd operand.
2069 if (unsigned NumOps = N->getNumOperands()) {
2070 if (N->getOperand(0).getValueType() == MVT::Other)
2071 return N->getOperand(0);
2072 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2073 return N->getOperand(NumOps-1);
2074 for (unsigned i = 1; i < NumOps-1; ++i)
2075 if (N->getOperand(i).getValueType() == MVT::Other)
2076 return N->getOperand(i);
2077 }
2078 return SDValue();
2079}
2080
2081SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2082 // If N has two operands, where one has an input chain equal to the other,
2083 // the 'other' chain is redundant.
2084 if (N->getNumOperands() == 2) {
2085 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2086 return N->getOperand(0);
2087 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2088 return N->getOperand(1);
2089 }
2090
2091 // Don't simplify token factors if optnone.
2092 if (OptLevel == CodeGenOptLevel::None)
2093 return SDValue();
2094
2095 // Don't simplify the token factor if the node itself has too many operands.
2096 if (N->getNumOperands() > TokenFactorInlineLimit)
2097 return SDValue();
2098
2099 // If the sole user is a token factor, we should make sure we have a
2100 // chance to merge them together. This prevents TF chains from inhibiting
2101 // optimizations.
2102 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
2103 AddToWorklist(*(N->use_begin()));
2104
2105 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2106 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2108 bool Changed = false; // If we should replace this token factor.
2109
2110 // Start out with this token factor.
2111 TFs.push_back(N);
2112
2113 // Iterate through token factors. The TFs grows when new token factors are
2114 // encountered.
2115 for (unsigned i = 0; i < TFs.size(); ++i) {
2116 // Limit number of nodes to inline, to avoid quadratic compile times.
2117 // We have to add the outstanding Token Factors to Ops, otherwise we might
2118 // drop Ops from the resulting Token Factors.
2119 if (Ops.size() > TokenFactorInlineLimit) {
2120 for (unsigned j = i; j < TFs.size(); j++)
2121 Ops.emplace_back(TFs[j], 0);
2122 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2123 // combiner worklist later.
2124 TFs.resize(i);
2125 break;
2126 }
2127
2128 SDNode *TF = TFs[i];
2129 // Check each of the operands.
2130 for (const SDValue &Op : TF->op_values()) {
2131 switch (Op.getOpcode()) {
2132 case ISD::EntryToken:
2133 // Entry tokens don't need to be added to the list. They are
2134 // redundant.
2135 Changed = true;
2136 break;
2137
2138 case ISD::TokenFactor:
2139 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2140 // Queue up for processing.
2141 TFs.push_back(Op.getNode());
2142 Changed = true;
2143 break;
2144 }
2145 [[fallthrough]];
2146
2147 default:
2148 // Only add if it isn't already in the list.
2149 if (SeenOps.insert(Op.getNode()).second)
2150 Ops.push_back(Op);
2151 else
2152 Changed = true;
2153 break;
2154 }
2155 }
2156 }
2157
2158 // Re-visit inlined Token Factors, to clean them up in case they have been
2159 // removed. Skip the first Token Factor, as this is the current node.
2160 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2161 AddToWorklist(TFs[i]);
2162
2163 // Remove Nodes that are chained to another node in the list. Do so
2164 // by walking up chains breath-first stopping when we've seen
2165 // another operand. In general we must climb to the EntryNode, but we can exit
2166 // early if we find all remaining work is associated with just one operand as
2167 // no further pruning is possible.
2168
2169 // List of nodes to search through and original Ops from which they originate.
2171 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2172 SmallPtrSet<SDNode *, 16> SeenChains;
2173 bool DidPruneOps = false;
2174
2175 unsigned NumLeftToConsider = 0;
2176 for (const SDValue &Op : Ops) {
2177 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2178 OpWorkCount.push_back(1);
2179 }
2180
2181 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2182 // If this is an Op, we can remove the op from the list. Remark any
2183 // search associated with it as from the current OpNumber.
2184 if (SeenOps.contains(Op)) {
2185 Changed = true;
2186 DidPruneOps = true;
2187 unsigned OrigOpNumber = 0;
2188 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2189 OrigOpNumber++;
2190 assert((OrigOpNumber != Ops.size()) &&
2191 "expected to find TokenFactor Operand");
2192 // Re-mark worklist from OrigOpNumber to OpNumber
2193 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2194 if (Worklist[i].second == OrigOpNumber) {
2195 Worklist[i].second = OpNumber;
2196 }
2197 }
2198 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2199 OpWorkCount[OrigOpNumber] = 0;
2200 NumLeftToConsider--;
2201 }
2202 // Add if it's a new chain
2203 if (SeenChains.insert(Op).second) {
2204 OpWorkCount[OpNumber]++;
2205 Worklist.push_back(std::make_pair(Op, OpNumber));
2206 }
2207 };
2208
2209 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2210 // We need at least be consider at least 2 Ops to prune.
2211 if (NumLeftToConsider <= 1)
2212 break;
2213 auto CurNode = Worklist[i].first;
2214 auto CurOpNumber = Worklist[i].second;
2215 assert((OpWorkCount[CurOpNumber] > 0) &&
2216 "Node should not appear in worklist");
2217 switch (CurNode->getOpcode()) {
2218 case ISD::EntryToken:
2219 // Hitting EntryToken is the only way for the search to terminate without
2220 // hitting
2221 // another operand's search. Prevent us from marking this operand
2222 // considered.
2223 NumLeftToConsider++;
2224 break;
2225 case ISD::TokenFactor:
2226 for (const SDValue &Op : CurNode->op_values())
2227 AddToWorklist(i, Op.getNode(), CurOpNumber);
2228 break;
2230 case ISD::LIFETIME_END:
2231 case ISD::CopyFromReg:
2232 case ISD::CopyToReg:
2233 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2234 break;
2235 default:
2236 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2237 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2238 break;
2239 }
2240 OpWorkCount[CurOpNumber]--;
2241 if (OpWorkCount[CurOpNumber] == 0)
2242 NumLeftToConsider--;
2243 }
2244
2245 // If we've changed things around then replace token factor.
2246 if (Changed) {
2248 if (Ops.empty()) {
2249 // The entry token is the only possible outcome.
2250 Result = DAG.getEntryNode();
2251 } else {
2252 if (DidPruneOps) {
2253 SmallVector<SDValue, 8> PrunedOps;
2254 //
2255 for (const SDValue &Op : Ops) {
2256 if (SeenChains.count(Op.getNode()) == 0)
2257 PrunedOps.push_back(Op);
2258 }
2259 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2260 } else {
2261 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2262 }
2263 }
2264 return Result;
2265 }
2266 return SDValue();
2267}
2268
2269/// MERGE_VALUES can always be eliminated.
2270SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2271 WorklistRemover DeadNodes(*this);
2272 // Replacing results may cause a different MERGE_VALUES to suddenly
2273 // be CSE'd with N, and carry its uses with it. Iterate until no
2274 // uses remain, to ensure that the node can be safely deleted.
2275 // First add the users of this node to the work list so that they
2276 // can be tried again once they have new operands.
2277 AddUsersToWorklist(N);
2278 do {
2279 // Do as a single replacement to avoid rewalking use lists.
2281 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2282 Ops.push_back(N->getOperand(i));
2283 DAG.ReplaceAllUsesWith(N, Ops.data());
2284 } while (!N->use_empty());
2285 deleteAndRecombine(N);
2286 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2287}
2288
2289/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2290/// ConstantSDNode pointer else nullptr.
2292 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2293 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2294}
2295
2296// isTruncateOf - If N is a truncate of some other value, return true, record
2297// the value being truncated in Op and which of Op's bits are zero/one in Known.
2298// This function computes KnownBits to avoid a duplicated call to
2299// computeKnownBits in the caller.
2301 KnownBits &Known) {
2302 if (N->getOpcode() == ISD::TRUNCATE) {
2303 Op = N->getOperand(0);
2304 Known = DAG.computeKnownBits(Op);
2305 return true;
2306 }
2307
2308 if (N.getOpcode() != ISD::SETCC ||
2309 N.getValueType().getScalarType() != MVT::i1 ||
2310 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
2311 return false;
2312
2313 SDValue Op0 = N->getOperand(0);
2314 SDValue Op1 = N->getOperand(1);
2315 assert(Op0.getValueType() == Op1.getValueType());
2316
2317 if (isNullOrNullSplat(Op0))
2318 Op = Op1;
2319 else if (isNullOrNullSplat(Op1))
2320 Op = Op0;
2321 else
2322 return false;
2323
2324 Known = DAG.computeKnownBits(Op);
2325
2326 return (Known.Zero | 1).isAllOnes();
2327}
2328
2329/// Return true if 'Use' is a load or a store that uses N as its base pointer
2330/// and that N may be folded in the load / store addressing mode.
2332 const TargetLowering &TLI) {
2333 EVT VT;
2334 unsigned AS;
2335
2336 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2337 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2338 return false;
2339 VT = LD->getMemoryVT();
2340 AS = LD->getAddressSpace();
2341 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2342 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2343 return false;
2344 VT = ST->getMemoryVT();
2345 AS = ST->getAddressSpace();
2346 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2347 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2348 return false;
2349 VT = LD->getMemoryVT();
2350 AS = LD->getAddressSpace();
2351 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2352 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2353 return false;
2354 VT = ST->getMemoryVT();
2355 AS = ST->getAddressSpace();
2356 } else {
2357 return false;
2358 }
2359
2361 if (N->getOpcode() == ISD::ADD) {
2362 AM.HasBaseReg = true;
2363 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2364 if (Offset)
2365 // [reg +/- imm]
2366 AM.BaseOffs = Offset->getSExtValue();
2367 else
2368 // [reg +/- reg]
2369 AM.Scale = 1;
2370 } else if (N->getOpcode() == ISD::SUB) {
2371 AM.HasBaseReg = true;
2372 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2373 if (Offset)
2374 // [reg +/- imm]
2375 AM.BaseOffs = -Offset->getSExtValue();
2376 else
2377 // [reg +/- reg]
2378 AM.Scale = 1;
2379 } else {
2380 return false;
2381 }
2382
2383 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2384 VT.getTypeForEVT(*DAG.getContext()), AS);
2385}
2386
2387/// This inverts a canonicalization in IR that replaces a variable select arm
2388/// with an identity constant. Codegen improves if we re-use the variable
2389/// operand rather than load a constant. This can also be converted into a
2390/// masked vector operation if the target supports it.
2392 bool ShouldCommuteOperands) {
2393 // Match a select as operand 1. The identity constant that we are looking for
2394 // is only valid as operand 1 of a non-commutative binop.
2395 SDValue N0 = N->getOperand(0);
2396 SDValue N1 = N->getOperand(1);
2397 if (ShouldCommuteOperands)
2398 std::swap(N0, N1);
2399
2400 // TODO: Should this apply to scalar select too?
2401 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2402 return SDValue();
2403
2404 // We can't hoist all instructions because of immediate UB (not speculatable).
2405 // For example div/rem by zero.
2407 return SDValue();
2408
2409 unsigned Opcode = N->getOpcode();
2410 EVT VT = N->getValueType(0);
2411 SDValue Cond = N1.getOperand(0);
2412 SDValue TVal = N1.getOperand(1);
2413 SDValue FVal = N1.getOperand(2);
2414
2415 // This transform increases uses of N0, so freeze it to be safe.
2416 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2417 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2418 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2419 SDValue F0 = DAG.getFreeze(N0);
2420 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2421 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2422 }
2423 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2424 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2425 SDValue F0 = DAG.getFreeze(N0);
2426 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2427 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2428 }
2429
2430 return SDValue();
2431}
2432
2433SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2434 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2435 "Unexpected binary operator");
2436
2437 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2438 auto BinOpcode = BO->getOpcode();
2439 EVT VT = BO->getValueType(0);
2440 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2441 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2442 return Sel;
2443
2444 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2445 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2446 return Sel;
2447 }
2448
2449 // Don't do this unless the old select is going away. We want to eliminate the
2450 // binary operator, not replace a binop with a select.
2451 // TODO: Handle ISD::SELECT_CC.
2452 unsigned SelOpNo = 0;
2453 SDValue Sel = BO->getOperand(0);
2454 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2455 SelOpNo = 1;
2456 Sel = BO->getOperand(1);
2457
2458 // Peek through trunc to shift amount type.
2459 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2460 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2461 // This is valid when the truncated bits of x are already zero.
2462 SDValue Op;
2463 KnownBits Known;
2464 if (isTruncateOf(DAG, Sel, Op, Known) &&
2466 Sel = Op;
2467 }
2468 }
2469
2470 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2471 return SDValue();
2472
2473 SDValue CT = Sel.getOperand(1);
2474 if (!isConstantOrConstantVector(CT, true) &&
2476 return SDValue();
2477
2478 SDValue CF = Sel.getOperand(2);
2479 if (!isConstantOrConstantVector(CF, true) &&
2481 return SDValue();
2482
2483 // Bail out if any constants are opaque because we can't constant fold those.
2484 // The exception is "and" and "or" with either 0 or -1 in which case we can
2485 // propagate non constant operands into select. I.e.:
2486 // and (select Cond, 0, -1), X --> select Cond, 0, X
2487 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2488 bool CanFoldNonConst =
2489 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2492
2493 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2494 if (!CanFoldNonConst &&
2495 !isConstantOrConstantVector(CBO, true) &&
2497 return SDValue();
2498
2499 SDLoc DL(Sel);
2500 SDValue NewCT, NewCF;
2501
2502 if (CanFoldNonConst) {
2503 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2504 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2505 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2506 NewCT = CT;
2507 else
2508 NewCT = CBO;
2509
2510 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2511 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2512 NewCF = CF;
2513 else
2514 NewCF = CBO;
2515 } else {
2516 // We have a select-of-constants followed by a binary operator with a
2517 // constant. Eliminate the binop by pulling the constant math into the
2518 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2519 // CBO, CF + CBO
2520 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2521 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2522 if (!NewCT)
2523 return SDValue();
2524
2525 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2526 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2527 if (!NewCF)
2528 return SDValue();
2529 }
2530
2531 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2532 SelectOp->setFlags(BO->getFlags());
2533 return SelectOp;
2534}
2535
2537 SelectionDAG &DAG) {
2538 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2539 "Expecting add or sub");
2540
2541 // Match a constant operand and a zext operand for the math instruction:
2542 // add Z, C
2543 // sub C, Z
2544 bool IsAdd = N->getOpcode() == ISD::ADD;
2545 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2546 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2547 auto *CN = dyn_cast<ConstantSDNode>(C);
2548 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2549 return SDValue();
2550
2551 // Match the zext operand as a setcc of a boolean.
2552 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2553 Z.getOperand(0).getValueType() != MVT::i1)
2554 return SDValue();
2555
2556 // Match the compare as: setcc (X & 1), 0, eq.
2557 SDValue SetCC = Z.getOperand(0);
2558 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2559 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2560 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2561 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2562 return SDValue();
2563
2564 // We are adding/subtracting a constant and an inverted low bit. Turn that
2565 // into a subtract/add of the low bit with incremented/decremented constant:
2566 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2567 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2568 EVT VT = C.getValueType();
2569 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2570 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2571 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2572 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2573}
2574
2575// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2576SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2577 SDValue N0 = N->getOperand(0);
2578 EVT VT = N0.getValueType();
2579 SDValue A, B;
2580
2581 if (hasOperation(ISD::AVGCEILU, VT) &&
2584 m_SpecificInt(1))))) {
2585 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2586 }
2587 if (hasOperation(ISD::AVGCEILS, VT) &&
2590 m_SpecificInt(1))))) {
2591 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2592 }
2593 return SDValue();
2594}
2595
2596/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2597/// a shift and add with a different constant.
2599 SelectionDAG &DAG) {
2600 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2601 "Expecting add or sub");
2602
2603 // We need a constant operand for the add/sub, and the other operand is a
2604 // logical shift right: add (srl), C or sub C, (srl).
2605 bool IsAdd = N->getOpcode() == ISD::ADD;
2606 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2607 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2608 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2609 ShiftOp.getOpcode() != ISD::SRL)
2610 return SDValue();
2611
2612 // The shift must be of a 'not' value.
2613 SDValue Not = ShiftOp.getOperand(0);
2614 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2615 return SDValue();
2616
2617 // The shift must be moving the sign bit to the least-significant-bit.
2618 EVT VT = ShiftOp.getValueType();
2619 SDValue ShAmt = ShiftOp.getOperand(1);
2620 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2621 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2622 return SDValue();
2623
2624 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2625 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2626 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2627 if (SDValue NewC = DAG.FoldConstantArithmetic(
2628 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2629 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2630 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2631 Not.getOperand(0), ShAmt);
2632 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2633 }
2634
2635 return SDValue();
2636}
2637
2638static bool
2640 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2641 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2642}
2643
2644/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2645/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2646/// are no common bits set in the operands).
2647SDValue DAGCombiner::visitADDLike(SDNode *N) {
2648 SDValue N0 = N->getOperand(0);
2649 SDValue N1 = N->getOperand(1);
2650 EVT VT = N0.getValueType();
2651 SDLoc DL(N);
2652
2653 // fold (add x, undef) -> undef
2654 if (N0.isUndef())
2655 return N0;
2656 if (N1.isUndef())
2657 return N1;
2658
2659 // fold (add c1, c2) -> c1+c2
2660 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2661 return C;
2662
2663 // canonicalize constant to RHS
2666 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2667
2668 if (areBitwiseNotOfEachother(N0, N1))
2670 SDLoc(N), VT);
2671
2672 // fold vector ops
2673 if (VT.isVector()) {
2674 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2675 return FoldedVOp;
2676
2677 // fold (add x, 0) -> x, vector edition
2679 return N0;
2680 }
2681
2682 // fold (add x, 0) -> x
2683 if (isNullConstant(N1))
2684 return N0;
2685
2686 if (N0.getOpcode() == ISD::SUB) {
2687 SDValue N00 = N0.getOperand(0);
2688 SDValue N01 = N0.getOperand(1);
2689
2690 // fold ((A-c1)+c2) -> (A+(c2-c1))
2691 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2692 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2693
2694 // fold ((c1-A)+c2) -> (c1+c2)-A
2695 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2696 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2697 }
2698
2699 // add (sext i1 X), 1 -> zext (not i1 X)
2700 // We don't transform this pattern:
2701 // add (zext i1 X), -1 -> sext (not i1 X)
2702 // because most (?) targets generate better code for the zext form.
2703 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2704 isOneOrOneSplat(N1)) {
2705 SDValue X = N0.getOperand(0);
2706 if ((!LegalOperations ||
2707 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2709 X.getScalarValueSizeInBits() == 1) {
2710 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2711 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2712 }
2713 }
2714
2715 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2716 // iff (or x, c0) is equivalent to (add x, c0).
2717 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2718 // iff (xor x, c0) is equivalent to (add x, c0).
2719 if (DAG.isADDLike(N0)) {
2720 SDValue N01 = N0.getOperand(1);
2721 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2722 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2723 }
2724
2725 if (SDValue NewSel = foldBinOpIntoSelect(N))
2726 return NewSel;
2727
2728 // reassociate add
2729 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2730 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2731 return RADD;
2732
2733 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2734 // equivalent to (add x, c).
2735 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2736 // equivalent to (add x, c).
2737 // Do this optimization only when adding c does not introduce instructions
2738 // for adding carries.
2739 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2740 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2741 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2742 // If N0's type does not split or is a sign mask, it does not introduce
2743 // add carry.
2744 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2745 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2748 if (NoAddCarry)
2749 return DAG.getNode(
2750 ISD::ADD, DL, VT,
2751 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2752 N0.getOperand(1));
2753 }
2754 return SDValue();
2755 };
2756 if (SDValue Add = ReassociateAddOr(N0, N1))
2757 return Add;
2758 if (SDValue Add = ReassociateAddOr(N1, N0))
2759 return Add;
2760
2761 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2762 if (SDValue SD =
2763 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2764 return SD;
2765 }
2766
2767 SDValue A, B, C;
2768
2769 // fold ((0-A) + B) -> B-A
2770 if (sd_match(N0, m_Neg(m_Value(A))))
2771 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2772
2773 // fold (A + (0-B)) -> A-B
2774 if (sd_match(N1, m_Neg(m_Value(B))))
2775 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2776
2777 // fold (A+(B-A)) -> B
2778 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2779 return B;
2780
2781 // fold ((B-A)+A) -> B
2782 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2783 return B;
2784
2785 // fold ((A-B)+(C-A)) -> (C-B)
2786 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2788 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2789
2790 // fold ((A-B)+(B-C)) -> (A-C)
2791 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2793 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2794
2795 // fold (A+(B-(A+C))) to (B-C)
2796 // fold (A+(B-(C+A))) to (B-C)
2797 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2798 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2799
2800 // fold (A+((B-A)+or-C)) to (B+or-C)
2801 if (sd_match(N1,
2803 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2804 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2805
2806 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2807 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2808 N0->hasOneUse() && N1->hasOneUse()) {
2809 SDValue N00 = N0.getOperand(0);
2810 SDValue N01 = N0.getOperand(1);
2811 SDValue N10 = N1.getOperand(0);
2812 SDValue N11 = N1.getOperand(1);
2813
2815 return DAG.getNode(ISD::SUB, DL, VT,
2816 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2817 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2818 }
2819
2820 // fold (add (umax X, C), -C) --> (usubsat X, C)
2821 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2822 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2823 return (!Max && !Op) ||
2824 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2825 };
2826 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2827 /*AllowUndefs*/ true))
2828 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2829 N0.getOperand(1));
2830 }
2831
2833 return SDValue(N, 0);
2834
2835 if (isOneOrOneSplat(N1)) {
2836 // fold (add (xor a, -1), 1) -> (sub 0, a)
2837 if (isBitwiseNot(N0))
2838 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2839 N0.getOperand(0));
2840
2841 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2842 if (N0.getOpcode() == ISD::ADD) {
2843 SDValue A, Xor;
2844
2845 if (isBitwiseNot(N0.getOperand(0))) {
2846 A = N0.getOperand(1);
2847 Xor = N0.getOperand(0);
2848 } else if (isBitwiseNot(N0.getOperand(1))) {
2849 A = N0.getOperand(0);
2850 Xor = N0.getOperand(1);
2851 }
2852
2853 if (Xor)
2854 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2855 }
2856
2857 // Look for:
2858 // add (add x, y), 1
2859 // And if the target does not like this form then turn into:
2860 // sub y, (xor x, -1)
2861 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2862 N0.hasOneUse() &&
2863 // Limit this to after legalization if the add has wrap flags
2864 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2865 !N->getFlags().hasNoSignedWrap()))) {
2866 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2867 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2868 }
2869 }
2870
2871 // (x - y) + -1 -> add (xor y, -1), x
2872 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2873 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
2874 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
2875 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
2876 }
2877
2878 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
2879 // This can help if the inner add has multiple uses.
2880 APInt CM, CA;
2881 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
2882 if (VT.getScalarSizeInBits() <= 64) {
2884 m_ConstInt(CM)))) &&
2886 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2888 // If all the inputs are nuw, the outputs can be nuw. If all the input
2889 // are _also_ nsw the outputs can be too.
2890 if (N->getFlags().hasNoUnsignedWrap() &&
2891 N0->getFlags().hasNoUnsignedWrap() &&
2893 Flags.setNoUnsignedWrap(true);
2894 if (N->getFlags().hasNoSignedWrap() &&
2895 N0->getFlags().hasNoSignedWrap() &&
2897 Flags.setNoSignedWrap(true);
2898 }
2899 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2900 DAG.getConstant(CM, DL, VT), Flags);
2901 return DAG.getNode(
2902 ISD::ADD, DL, VT, Mul,
2903 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2904 }
2905 // Also look in case there is an intermediate add.
2906 if (sd_match(N0, m_OneUse(m_Add(
2908 m_ConstInt(CM))),
2909 m_Value(B)))) &&
2911 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2913 // If all the inputs are nuw, the outputs can be nuw. If all the input
2914 // are _also_ nsw the outputs can be too.
2915 SDValue OMul =
2916 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
2917 if (N->getFlags().hasNoUnsignedWrap() &&
2918 N0->getFlags().hasNoUnsignedWrap() &&
2919 OMul->getFlags().hasNoUnsignedWrap() &&
2920 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2921 Flags.setNoUnsignedWrap(true);
2922 if (N->getFlags().hasNoSignedWrap() &&
2923 N0->getFlags().hasNoSignedWrap() &&
2924 OMul->getFlags().hasNoSignedWrap() &&
2925 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
2926 Flags.setNoSignedWrap(true);
2927 }
2928 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2929 DAG.getConstant(CM, DL, VT), Flags);
2930 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
2931 return DAG.getNode(
2932 ISD::ADD, DL, VT, Add,
2933 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2934 }
2935 }
2936 }
2937
2938 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2939 return Combined;
2940
2941 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2942 return Combined;
2943
2944 return SDValue();
2945}
2946
2947// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2948SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2949 SDValue N0 = N->getOperand(0);
2950 EVT VT = N0.getValueType();
2951 SDValue A, B;
2952
2953 if (hasOperation(ISD::AVGFLOORU, VT) &&
2956 m_SpecificInt(1))))) {
2957 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2958 }
2959 if (hasOperation(ISD::AVGFLOORS, VT) &&
2962 m_SpecificInt(1))))) {
2963 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2964 }
2965
2966 return SDValue();
2967}
2968
2969SDValue DAGCombiner::visitADD(SDNode *N) {
2970 SDValue N0 = N->getOperand(0);
2971 SDValue N1 = N->getOperand(1);
2972 EVT VT = N0.getValueType();
2973 SDLoc DL(N);
2974
2975 if (SDValue Combined = visitADDLike(N))
2976 return Combined;
2977
2978 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2979 return V;
2980
2981 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2982 return V;
2983
2984 // Try to match AVGFLOOR fixedwidth pattern
2985 if (SDValue V = foldAddToAvg(N, DL))
2986 return V;
2987
2988 // fold (a+b) -> (a|b) iff a and b share no bits.
2989 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2990 DAG.haveNoCommonBitsSet(N0, N1)) {
2992 Flags.setDisjoint(true);
2993 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
2994 }
2995
2996 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2997 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2998 const APInt &C0 = N0->getConstantOperandAPInt(0);
2999 const APInt &C1 = N1->getConstantOperandAPInt(0);
3000 return DAG.getVScale(DL, VT, C0 + C1);
3001 }
3002
3003 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3004 if (N0.getOpcode() == ISD::ADD &&
3005 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3006 N1.getOpcode() == ISD::VSCALE) {
3007 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3008 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3009 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3010 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3011 }
3012
3013 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3014 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3015 N1.getOpcode() == ISD::STEP_VECTOR) {
3016 const APInt &C0 = N0->getConstantOperandAPInt(0);
3017 const APInt &C1 = N1->getConstantOperandAPInt(0);
3018 APInt NewStep = C0 + C1;
3019 return DAG.getStepVector(DL, VT, NewStep);
3020 }
3021
3022 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3023 if (N0.getOpcode() == ISD::ADD &&
3025 N1.getOpcode() == ISD::STEP_VECTOR) {
3026 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3027 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3028 APInt NewStep = SV0 + SV1;
3029 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3030 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3031 }
3032
3033 return SDValue();
3034}
3035
3036SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3037 unsigned Opcode = N->getOpcode();
3038 SDValue N0 = N->getOperand(0);
3039 SDValue N1 = N->getOperand(1);
3040 EVT VT = N0.getValueType();
3041 bool IsSigned = Opcode == ISD::SADDSAT;
3042 SDLoc DL(N);
3043
3044 // fold (add_sat x, undef) -> -1
3045 if (N0.isUndef() || N1.isUndef())
3046 return DAG.getAllOnesConstant(DL, VT);
3047
3048 // fold (add_sat c1, c2) -> c3
3049 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3050 return C;
3051
3052 // canonicalize constant to RHS
3055 return DAG.getNode(Opcode, DL, VT, N1, N0);
3056
3057 // fold vector ops
3058 if (VT.isVector()) {
3059 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3060 return FoldedVOp;
3061
3062 // fold (add_sat x, 0) -> x, vector edition
3064 return N0;
3065 }
3066
3067 // fold (add_sat x, 0) -> x
3068 if (isNullConstant(N1))
3069 return N0;
3070
3071 // If it cannot overflow, transform into an add.
3072 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3073 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3074
3075 return SDValue();
3076}
3077
3079 bool ForceCarryReconstruction = false) {
3080 bool Masked = false;
3081
3082 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3083 while (true) {
3084 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3085 V = V.getOperand(0);
3086 continue;
3087 }
3088
3089 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3090 if (ForceCarryReconstruction)
3091 return V;
3092
3093 Masked = true;
3094 V = V.getOperand(0);
3095 continue;
3096 }
3097
3098 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3099 return V;
3100
3101 break;
3102 }
3103
3104 // If this is not a carry, return.
3105 if (V.getResNo() != 1)
3106 return SDValue();
3107
3108 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3109 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3110 return SDValue();
3111
3112 EVT VT = V->getValueType(0);
3113 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3114 return SDValue();
3115
3116 // If the result is masked, then no matter what kind of bool it is we can
3117 // return. If it isn't, then we need to make sure the bool type is either 0 or
3118 // 1 and not other values.
3119 if (Masked ||
3120 TLI.getBooleanContents(V.getValueType()) ==
3122 return V;
3123
3124 return SDValue();
3125}
3126
3127/// Given the operands of an add/sub operation, see if the 2nd operand is a
3128/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3129/// the opcode and bypass the mask operation.
3130static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3131 SelectionDAG &DAG, const SDLoc &DL) {
3132 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3133 N1 = N1.getOperand(0);
3134
3135 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3136 return SDValue();
3137
3138 EVT VT = N0.getValueType();
3139 SDValue N10 = N1.getOperand(0);
3140 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3141 N10 = N10.getOperand(0);
3142
3143 if (N10.getValueType() != VT)
3144 return SDValue();
3145
3146 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3147 return SDValue();
3148
3149 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3150 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3151 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3152}
3153
3154/// Helper for doing combines based on N0 and N1 being added to each other.
3155SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3156 SDNode *LocReference) {
3157 EVT VT = N0.getValueType();
3158 SDLoc DL(LocReference);
3159
3160 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3161 SDValue Y, N;
3162 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3163 return DAG.getNode(ISD::SUB, DL, VT, N0,
3164 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3165
3166 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3167 return V;
3168
3169 // Look for:
3170 // add (add x, 1), y
3171 // And if the target does not like this form then turn into:
3172 // sub y, (xor x, -1)
3173 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3174 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3175 // Limit this to after legalization if the add has wrap flags
3176 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3177 !N0->getFlags().hasNoSignedWrap()))) {
3178 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3179 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3180 }
3181
3182 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3183 // Hoist one-use subtraction by non-opaque constant:
3184 // (x - C) + y -> (x + y) - C
3185 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3186 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3187 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3188 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3189 }
3190 // Hoist one-use subtraction from non-opaque constant:
3191 // (C - x) + y -> (y - x) + C
3192 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3193 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3194 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3195 }
3196 }
3197
3198 // add (mul x, C), x -> mul x, C+1
3199 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3200 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3201 N0.hasOneUse()) {
3202 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3203 DAG.getConstant(1, DL, VT));
3204 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3205 }
3206
3207 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3208 // rather than 'add 0/-1' (the zext should get folded).
3209 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3210 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3211 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3213 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3214 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3215 }
3216
3217 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3218 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3219 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3220 if (TN->getVT() == MVT::i1) {
3221 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3222 DAG.getConstant(1, DL, VT));
3223 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3224 }
3225 }
3226
3227 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3228 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3229 N1.getResNo() == 0)
3230 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3231 N0, N1.getOperand(0), N1.getOperand(2));
3232
3233 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3235 if (SDValue Carry = getAsCarry(TLI, N1))
3236 return DAG.getNode(ISD::UADDO_CARRY, DL,
3237 DAG.getVTList(VT, Carry.getValueType()), N0,
3238 DAG.getConstant(0, DL, VT), Carry);
3239
3240 return SDValue();
3241}
3242
3243SDValue DAGCombiner::visitADDC(SDNode *N) {
3244 SDValue N0 = N->getOperand(0);
3245 SDValue N1 = N->getOperand(1);
3246 EVT VT = N0.getValueType();
3247 SDLoc DL(N);
3248
3249 // If the flag result is dead, turn this into an ADD.
3250 if (!N->hasAnyUseOfValue(1))
3251 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3252 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3253
3254 // canonicalize constant to RHS.
3255 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3256 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3257 if (N0C && !N1C)
3258 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3259
3260 // fold (addc x, 0) -> x + no carry out
3261 if (isNullConstant(N1))
3262 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3263 DL, MVT::Glue));
3264
3265 // If it cannot overflow, transform into an add.
3267 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3268 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3269
3270 return SDValue();
3271}
3272
3273/**
3274 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3275 * then the flip also occurs if computing the inverse is the same cost.
3276 * This function returns an empty SDValue in case it cannot flip the boolean
3277 * without increasing the cost of the computation. If you want to flip a boolean
3278 * no matter what, use DAG.getLogicalNOT.
3279 */
3281 const TargetLowering &TLI,
3282 bool Force) {
3283 if (Force && isa<ConstantSDNode>(V))
3284 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3285
3286 if (V.getOpcode() != ISD::XOR)
3287 return SDValue();
3288
3289 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
3290 if (!Const)
3291 return SDValue();
3292
3293 EVT VT = V.getValueType();
3294
3295 bool IsFlip = false;
3296 switch(TLI.getBooleanContents(VT)) {
3298 IsFlip = Const->isOne();
3299 break;
3301 IsFlip = Const->isAllOnes();
3302 break;
3304 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
3305 break;
3306 }
3307
3308 if (IsFlip)
3309 return V.getOperand(0);
3310 if (Force)
3311 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3312 return SDValue();
3313}
3314
3315SDValue DAGCombiner::visitADDO(SDNode *N) {
3316 SDValue N0 = N->getOperand(0);
3317 SDValue N1 = N->getOperand(1);
3318 EVT VT = N0.getValueType();
3319 bool IsSigned = (ISD::SADDO == N->getOpcode());
3320
3321 EVT CarryVT = N->getValueType(1);
3322 SDLoc DL(N);
3323
3324 // If the flag result is dead, turn this into an ADD.
3325 if (!N->hasAnyUseOfValue(1))
3326 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3327 DAG.getUNDEF(CarryVT));
3328
3329 // canonicalize constant to RHS.
3332 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3333
3334 // fold (addo x, 0) -> x + no carry out
3335 if (isNullOrNullSplat(N1))
3336 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3337
3338 // If it cannot overflow, transform into an add.
3339 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3340 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3341 DAG.getConstant(0, DL, CarryVT));
3342
3343 if (IsSigned) {
3344 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3345 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3346 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3347 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3348 } else {
3349 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3350 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3351 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3352 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3353 return CombineTo(
3354 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3355 }
3356
3357 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3358 return Combined;
3359
3360 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3361 return Combined;
3362 }
3363
3364 return SDValue();
3365}
3366
3367SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3368 EVT VT = N0.getValueType();
3369 if (VT.isVector())
3370 return SDValue();
3371
3372 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3373 // If Y + 1 cannot overflow.
3374 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3375 SDValue Y = N1.getOperand(0);
3376 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3378 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3379 N1.getOperand(2));
3380 }
3381
3382 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3384 if (SDValue Carry = getAsCarry(TLI, N1))
3385 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3386 DAG.getConstant(0, SDLoc(N), VT), Carry);
3387
3388 return SDValue();
3389}
3390
3391SDValue DAGCombiner::visitADDE(SDNode *N) {
3392 SDValue N0 = N->getOperand(0);
3393 SDValue N1 = N->getOperand(1);
3394 SDValue CarryIn = N->getOperand(2);
3395
3396 // canonicalize constant to RHS
3397 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3398 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3399 if (N0C && !N1C)
3400 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3401 N1, N0, CarryIn);
3402
3403 // fold (adde x, y, false) -> (addc x, y)
3404 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3405 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3406
3407 return SDValue();
3408}
3409
3410SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3411 SDValue N0 = N->getOperand(0);
3412 SDValue N1 = N->getOperand(1);
3413 SDValue CarryIn = N->getOperand(2);
3414 SDLoc DL(N);
3415
3416 // canonicalize constant to RHS
3417 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3418 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3419 if (N0C && !N1C)
3420 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3421
3422 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3423 if (isNullConstant(CarryIn)) {
3424 if (!LegalOperations ||
3425 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3426 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3427 }
3428
3429 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3430 if (isNullConstant(N0) && isNullConstant(N1)) {
3431 EVT VT = N0.getValueType();
3432 EVT CarryVT = CarryIn.getValueType();
3433 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3434 AddToWorklist(CarryExt.getNode());
3435 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3436 DAG.getConstant(1, DL, VT)),
3437 DAG.getConstant(0, DL, CarryVT));
3438 }
3439
3440 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3441 return Combined;
3442
3443 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3444 return Combined;
3445
3446 // We want to avoid useless duplication.
3447 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3448 // not a binary operation, this is not really possible to leverage this
3449 // existing mechanism for it. However, if more operations require the same
3450 // deduplication logic, then it may be worth generalize.
3451 SDValue Ops[] = {N1, N0, CarryIn};
3452 SDNode *CSENode =
3453 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3454 if (CSENode)
3455 return SDValue(CSENode, 0);
3456
3457 return SDValue();
3458}
3459
3460/**
3461 * If we are facing some sort of diamond carry propagation pattern try to
3462 * break it up to generate something like:
3463 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3464 *
3465 * The end result is usually an increase in operation required, but because the
3466 * carry is now linearized, other transforms can kick in and optimize the DAG.
3467 *
3468 * Patterns typically look something like
3469 * (uaddo A, B)
3470 * / \
3471 * Carry Sum
3472 * | \
3473 * | (uaddo_carry *, 0, Z)
3474 * | /
3475 * \ Carry
3476 * | /
3477 * (uaddo_carry X, *, *)
3478 *
3479 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3480 * produce a combine with a single path for carry propagation.
3481 */
3483 SelectionDAG &DAG, SDValue X,
3484 SDValue Carry0, SDValue Carry1,
3485 SDNode *N) {
3486 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3487 return SDValue();
3488 if (Carry1.getOpcode() != ISD::UADDO)
3489 return SDValue();
3490
3491 SDValue Z;
3492
3493 /**
3494 * First look for a suitable Z. It will present itself in the form of
3495 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3496 */
3497 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3498 isNullConstant(Carry0.getOperand(1))) {
3499 Z = Carry0.getOperand(2);
3500 } else if (Carry0.getOpcode() == ISD::UADDO &&
3501 isOneConstant(Carry0.getOperand(1))) {
3502 EVT VT = Carry0->getValueType(1);
3503 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3504 } else {
3505 // We couldn't find a suitable Z.
3506 return SDValue();
3507 }
3508
3509
3510 auto cancelDiamond = [&](SDValue A,SDValue B) {
3511 SDLoc DL(N);
3512 SDValue NewY =
3513 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3514 Combiner.AddToWorklist(NewY.getNode());
3515 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3516 DAG.getConstant(0, DL, X.getValueType()),
3517 NewY.getValue(1));
3518 };
3519
3520 /**
3521 * (uaddo A, B)
3522 * |
3523 * Sum
3524 * |
3525 * (uaddo_carry *, 0, Z)
3526 */
3527 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3528 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3529 }
3530
3531 /**
3532 * (uaddo_carry A, 0, Z)
3533 * |
3534 * Sum
3535 * |
3536 * (uaddo *, B)
3537 */
3538 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3539 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3540 }
3541
3542 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3543 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3544 }
3545
3546 return SDValue();
3547}
3548
3549// If we are facing some sort of diamond carry/borrow in/out pattern try to
3550// match patterns like:
3551//
3552// (uaddo A, B) CarryIn
3553// | \ |
3554// | \ |
3555// PartialSum PartialCarryOutX /
3556// | | /
3557// | ____|____________/
3558// | / |
3559// (uaddo *, *) \________
3560// | \ \
3561// | \ |
3562// | PartialCarryOutY |
3563// | \ |
3564// | \ /
3565// AddCarrySum | ______/
3566// | /
3567// CarryOut = (or *, *)
3568//
3569// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3570//
3571// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3572//
3573// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3574// with a single path for carry/borrow out propagation.
3576 SDValue N0, SDValue N1, SDNode *N) {
3577 SDValue Carry0 = getAsCarry(TLI, N0);
3578 if (!Carry0)
3579 return SDValue();
3580 SDValue Carry1 = getAsCarry(TLI, N1);
3581 if (!Carry1)
3582 return SDValue();
3583
3584 unsigned Opcode = Carry0.getOpcode();
3585 if (Opcode != Carry1.getOpcode())
3586 return SDValue();
3587 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3588 return SDValue();
3589 // Guarantee identical type of CarryOut
3590 EVT CarryOutType = N->getValueType(0);
3591 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3592 CarryOutType != Carry1.getValue(1).getValueType())
3593 return SDValue();
3594
3595 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3596 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3597 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3598 std::swap(Carry0, Carry1);
3599
3600 // Check if nodes are connected in expected way.
3601 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3602 Carry1.getOperand(1) != Carry0.getValue(0))
3603 return SDValue();
3604
3605 // The carry in value must be on the righthand side for subtraction.
3606 unsigned CarryInOperandNum =
3607 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3608 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3609 return SDValue();
3610 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3611
3612 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3613 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3614 return SDValue();
3615
3616 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3617 CarryIn = getAsCarry(TLI, CarryIn, true);
3618 if (!CarryIn)
3619 return SDValue();
3620
3621 SDLoc DL(N);
3622 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3623 Carry1->getValueType(0));
3624 SDValue Merged =
3625 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3626 Carry0.getOperand(1), CarryIn);
3627
3628 // Please note that because we have proven that the result of the UADDO/USUBO
3629 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3630 // therefore prove that if the first UADDO/USUBO overflows, the second
3631 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3632 // maximum value.
3633 //
3634 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3635 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3636 //
3637 // This is important because it means that OR and XOR can be used to merge
3638 // carry flags; and that AND can return a constant zero.
3639 //
3640 // TODO: match other operations that can merge flags (ADD, etc)
3641 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3642 if (N->getOpcode() == ISD::AND)
3643 return DAG.getConstant(0, DL, CarryOutType);
3644 return Merged.getValue(1);
3645}
3646
3647SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3648 SDValue CarryIn, SDNode *N) {
3649 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3650 // carry.
3651 if (isBitwiseNot(N0))
3652 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3653 SDLoc DL(N);
3654 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3655 N0.getOperand(0), NotC);
3656 return CombineTo(
3657 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3658 }
3659
3660 // Iff the flag result is dead:
3661 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3662 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3663 // or the dependency between the instructions.
3664 if ((N0.getOpcode() == ISD::ADD ||
3665 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3666 N0.getValue(1) != CarryIn)) &&
3667 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3668 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3669 N0.getOperand(0), N0.getOperand(1), CarryIn);
3670
3671 /**
3672 * When one of the uaddo_carry argument is itself a carry, we may be facing
3673 * a diamond carry propagation. In which case we try to transform the DAG
3674 * to ensure linear carry propagation if that is possible.
3675 */
3676 if (auto Y = getAsCarry(TLI, N1)) {
3677 // Because both are carries, Y and Z can be swapped.
3678 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3679 return R;
3680 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3681 return R;
3682 }
3683
3684 return SDValue();
3685}
3686
3687SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3688 SDValue CarryIn, SDNode *N) {
3689 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3690 if (isBitwiseNot(N0)) {
3691 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3692 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3693 N0.getOperand(0), NotC);
3694 }
3695
3696 return SDValue();
3697}
3698
3699SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3700 SDValue N0 = N->getOperand(0);
3701 SDValue N1 = N->getOperand(1);
3702 SDValue CarryIn = N->getOperand(2);
3703 SDLoc DL(N);
3704
3705 // canonicalize constant to RHS
3706 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3707 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3708 if (N0C && !N1C)
3709 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3710
3711 // fold (saddo_carry x, y, false) -> (saddo x, y)
3712 if (isNullConstant(CarryIn)) {
3713 if (!LegalOperations ||
3714 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3715 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3716 }
3717
3718 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3719 return Combined;
3720
3721 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3722 return Combined;
3723
3724 return SDValue();
3725}
3726
3727// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3728// clamp/truncation if necessary.
3729static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3730 SDValue RHS, SelectionDAG &DAG,
3731 const SDLoc &DL) {
3732 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3733 "Illegal truncation");
3734
3735 if (DstVT == SrcVT)
3736 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3737
3738 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3739 // clamping RHS.
3741 DstVT.getScalarSizeInBits());
3742 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3743 return SDValue();
3744
3745 SDValue SatLimit =
3747 DstVT.getScalarSizeInBits()),
3748 DL, SrcVT);
3749 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3750 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3751 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3752 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3753}
3754
3755// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3756// usubsat(a,b), optionally as a truncated type.
3757SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3758 if (N->getOpcode() != ISD::SUB ||
3759 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3760 return SDValue();
3761
3762 EVT SubVT = N->getValueType(0);
3763 SDValue Op0 = N->getOperand(0);
3764 SDValue Op1 = N->getOperand(1);
3765
3766 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3767 // they may be converted to usubsat(a,b).
3768 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3769 SDValue MaxLHS = Op0.getOperand(0);
3770 SDValue MaxRHS = Op0.getOperand(1);
3771 if (MaxLHS == Op1)
3772 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3773 if (MaxRHS == Op1)
3774 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3775 }
3776
3777 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3778 SDValue MinLHS = Op1.getOperand(0);
3779 SDValue MinRHS = Op1.getOperand(1);
3780 if (MinLHS == Op0)
3781 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3782 if (MinRHS == Op0)
3783 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3784 }
3785
3786 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3787 if (Op1.getOpcode() == ISD::TRUNCATE &&
3788 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3789 Op1.getOperand(0).hasOneUse()) {
3790 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3791 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3792 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3793 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3794 DAG, DL);
3795 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3796 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3797 DAG, DL);
3798 }
3799
3800 return SDValue();
3801}
3802
3803// Since it may not be valid to emit a fold to zero for vector initializers
3804// check if we can before folding.
3805static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3806 SelectionDAG &DAG, bool LegalOperations) {
3807 if (!VT.isVector())
3808 return DAG.getConstant(0, DL, VT);
3809 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3810 return DAG.getConstant(0, DL, VT);
3811 return SDValue();
3812}
3813
3814SDValue DAGCombiner::visitSUB(SDNode *N) {
3815 SDValue N0 = N->getOperand(0);
3816 SDValue N1 = N->getOperand(1);
3817 EVT VT = N0.getValueType();
3818 unsigned BitWidth = VT.getScalarSizeInBits();
3819 SDLoc DL(N);
3820
3821 auto PeekThroughFreeze = [](SDValue N) {
3822 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3823 return N->getOperand(0);
3824 return N;
3825 };
3826
3827 // fold (sub x, x) -> 0
3828 // FIXME: Refactor this and xor and other similar operations together.
3829 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3830 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3831
3832 // fold (sub c1, c2) -> c3
3833 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3834 return C;
3835
3836 // fold vector ops
3837 if (VT.isVector()) {
3838 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3839 return FoldedVOp;
3840
3841 // fold (sub x, 0) -> x, vector edition
3843 return N0;
3844 }
3845
3846 if (SDValue NewSel = foldBinOpIntoSelect(N))
3847 return NewSel;
3848
3849 // fold (sub x, c) -> (add x, -c)
3851 return DAG.getNode(ISD::ADD, DL, VT, N0,
3852 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3853
3854 if (isNullOrNullSplat(N0)) {
3855 // Right-shifting everything out but the sign bit followed by negation is
3856 // the same as flipping arithmetic/logical shift type without the negation:
3857 // -(X >>u 31) -> (X >>s 31)
3858 // -(X >>s 31) -> (X >>u 31)
3859 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3861 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3862 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3863 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3864 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3865 }
3866 }
3867
3868 // 0 - X --> 0 if the sub is NUW.
3869 if (N->getFlags().hasNoUnsignedWrap())
3870 return N0;
3871
3873 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3874 // N1 must be 0 because negating the minimum signed value is undefined.
3875 if (N->getFlags().hasNoSignedWrap())
3876 return N0;
3877
3878 // 0 - X --> X if X is 0 or the minimum signed value.
3879 return N1;
3880 }
3881
3882 // Convert 0 - abs(x).
3883 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3885 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3886 return Result;
3887
3888 // Fold neg(splat(neg(x)) -> splat(x)
3889 if (VT.isVector()) {
3890 SDValue N1S = DAG.getSplatValue(N1, true);
3891 if (N1S && N1S.getOpcode() == ISD::SUB &&
3892 isNullConstant(N1S.getOperand(0)))
3893 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3894 }
3895 }
3896
3897 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3899 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3900
3901 // fold (A - (0-B)) -> A+B
3902 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3903 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3904
3905 // fold A-(A-B) -> B
3906 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3907 return N1.getOperand(1);
3908
3909 // fold (A+B)-A -> B
3910 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3911 return N0.getOperand(1);
3912
3913 // fold (A+B)-B -> A
3914 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3915 return N0.getOperand(0);
3916
3917 // fold (A+C1)-C2 -> A+(C1-C2)
3918 if (N0.getOpcode() == ISD::ADD) {
3919 SDValue N01 = N0.getOperand(1);
3920 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3921 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3922 }
3923
3924 // fold C2-(A+C1) -> (C2-C1)-A
3925 if (N1.getOpcode() == ISD::ADD) {
3926 SDValue N11 = N1.getOperand(1);
3927 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3928 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3929 }
3930
3931 // fold (A-C1)-C2 -> A-(C1+C2)
3932 if (N0.getOpcode() == ISD::SUB) {
3933 SDValue N01 = N0.getOperand(1);
3934 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3935 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3936 }
3937
3938 // fold (c1-A)-c2 -> (c1-c2)-A
3939 if (N0.getOpcode() == ISD::SUB) {
3940 SDValue N00 = N0.getOperand(0);
3941 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3942 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3943 }
3944
3945 SDValue A, B, C;
3946
3947 // fold ((A+(B+C))-B) -> A+C
3948 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
3949 return DAG.getNode(ISD::ADD, DL, VT, A, C);
3950
3951 // fold ((A+(B-C))-B) -> A-C
3952 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
3953 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3954
3955 // fold ((A-(B-C))-C) -> A-B
3956 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
3957 return DAG.getNode(ISD::SUB, DL, VT, A, B);
3958
3959 // fold (A-(B-C)) -> A+(C-B)
3960 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
3961 return DAG.getNode(ISD::ADD, DL, VT, N0,
3962 DAG.getNode(ISD::SUB, DL, VT, C, B));
3963
3964 // A - (A & B) -> A & (~B)
3965 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
3966 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
3967 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
3968
3969 // fold (A - (-B * C)) -> (A + (B * C))
3970 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
3971 return DAG.getNode(ISD::ADD, DL, VT, N0,
3972 DAG.getNode(ISD::MUL, DL, VT, B, C));
3973
3974 // If either operand of a sub is undef, the result is undef
3975 if (N0.isUndef())
3976 return N0;
3977 if (N1.isUndef())
3978 return N1;
3979
3980 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3981 return V;
3982
3983 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3984 return V;
3985
3986 // Try to match AVGCEIL fixedwidth pattern
3987 if (SDValue V = foldSubToAvg(N, DL))
3988 return V;
3989
3990 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
3991 return V;
3992
3993 if (SDValue V = foldSubToUSubSat(VT, N, DL))
3994 return V;
3995
3996 // (A - B) - 1 -> add (xor B, -1), A
3998 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
3999
4000 // Look for:
4001 // sub y, (xor x, -1)
4002 // And if the target does not like this form then turn into:
4003 // add (add x, y), 1
4004 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4005 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4006 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4007 }
4008
4009 // Hoist one-use addition by non-opaque constant:
4010 // (x + C) - y -> (x - y) + C
4011 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4012 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4013 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4014 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4015 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4016 }
4017 // y - (x + C) -> (y - x) - C
4018 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4019 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4020 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4021 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4022 }
4023 // (x - C) - y -> (x - y) - C
4024 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4025 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4026 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4027 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4028 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4029 }
4030 // (C - x) - y -> C - (x + y)
4031 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4032 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4033 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4034 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4035 }
4036
4037 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4038 // rather than 'sub 0/1' (the sext should get folded).
4039 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4040 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4041 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4042 TLI.getBooleanContents(VT) ==
4044 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4045 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4046 }
4047
4048 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
4049 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
4050 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
4051 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
4052 SDValue S0 = N1.getOperand(0);
4053 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
4055 if (C->getAPIntValue() == (BitWidth - 1))
4056 return DAG.getNode(ISD::ABS, DL, VT, S0);
4057 }
4058 }
4059
4060 // If the relocation model supports it, consider symbol offsets.
4061 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4062 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4063 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4064 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4065 if (GA->getGlobal() == GB->getGlobal())
4066 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4067 DL, VT);
4068 }
4069
4070 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4071 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4072 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4073 if (TN->getVT() == MVT::i1) {
4074