LLVM 20.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/Metadata.h"
60#include "llvm/Support/Debug.h"
68#include <algorithm>
69#include <cassert>
70#include <cstdint>
71#include <functional>
72#include <iterator>
73#include <optional>
74#include <string>
75#include <tuple>
76#include <utility>
77#include <variant>
78
79#include "MatchContext.h"
80
81using namespace llvm;
82using namespace llvm::SDPatternMatch;
83
84#define DEBUG_TYPE "dagcombine"
85
86STATISTIC(NodesCombined , "Number of dag nodes combined");
87STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
88STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
89STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
90STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
91STATISTIC(SlicedLoads, "Number of load sliced");
92STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
93
94DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
95 "Controls whether a DAG combine is performed for a node");
96
97static cl::opt<bool>
98CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
99 cl::desc("Enable DAG combiner's use of IR alias analysis"));
100
101static cl::opt<bool>
102UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
103 cl::desc("Enable DAG combiner's use of TBAA"));
104
105#ifndef NDEBUG
107CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
108 cl::desc("Only use DAG-combiner alias analysis in this"
109 " function"));
110#endif
111
112/// Hidden option to stress test load slicing, i.e., when this option
113/// is enabled, load slicing bypasses most of its profitability guards.
114static cl::opt<bool>
115StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
116 cl::desc("Bypass the profitability model of load slicing"),
117 cl::init(false));
118
119static cl::opt<bool>
120 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
121 cl::desc("DAG combiner may split indexing from loads"));
122
123static cl::opt<bool>
124 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
125 cl::desc("DAG combiner enable merging multiple stores "
126 "into a wider store"));
127
129 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
130 cl::desc("Limit the number of operands to inline for Token Factors"));
131
133 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
134 cl::desc("Limit the number of times for the same StoreNode and RootNode "
135 "to bail out in store merging dependence check"));
136
138 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
139 cl::desc("DAG combiner enable reducing the width of load/op/store "
140 "sequence"));
142 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
143 cl::Hidden, cl::init(false),
144 cl::desc("DAG combiner force override the narrowing profitable check when"
145 "reducing the width of load/op/store sequences"));
146
148 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
149 cl::desc("DAG combiner enable load/<replace bytes>/store with "
150 "a narrower store"));
151
153 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
154 cl::desc(
155 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
156namespace {
157
158 class DAGCombiner {
159 SelectionDAG &DAG;
160 const TargetLowering &TLI;
161 const SelectionDAGTargetInfo *STI;
163 CodeGenOptLevel OptLevel;
164 bool LegalDAG = false;
165 bool LegalOperations = false;
166 bool LegalTypes = false;
167 bool ForCodeSize;
168 bool DisableGenericCombines;
169
170 /// Worklist of all of the nodes that need to be simplified.
171 ///
172 /// This must behave as a stack -- new nodes to process are pushed onto the
173 /// back and when processing we pop off of the back.
174 ///
175 /// The worklist will not contain duplicates but may contain null entries
176 /// due to nodes being deleted from the underlying DAG. For fast lookup and
177 /// deduplication, the index of the node in this vector is stored in the
178 /// node in SDNode::CombinerWorklistIndex.
180
181 /// This records all nodes attempted to be added to the worklist since we
182 /// considered a new worklist entry. As we keep do not add duplicate nodes
183 /// in the worklist, this is different from the tail of the worklist.
185
186 /// Map from candidate StoreNode to the pair of RootNode and count.
187 /// The count is used to track how many times we have seen the StoreNode
188 /// with the same RootNode bail out in dependence check. If we have seen
189 /// the bail out for the same pair many times over a limit, we won't
190 /// consider the StoreNode with the same RootNode as store merging
191 /// candidate again.
193
194 // AA - Used for DAG load/store alias analysis.
195 AliasAnalysis *AA;
196
197 /// This caches all chains that have already been processed in
198 /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
199 /// stores candidates.
200 SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
201
202 /// When an instruction is simplified, add all users of the instruction to
203 /// the work lists because they might get more simplified now.
204 void AddUsersToWorklist(SDNode *N) {
205 for (SDNode *Node : N->users())
206 AddToWorklist(Node);
207 }
208
209 /// Convenient shorthand to add a node and all of its user to the worklist.
210 void AddToWorklistWithUsers(SDNode *N) {
211 AddUsersToWorklist(N);
212 AddToWorklist(N);
213 }
214
215 // Prune potentially dangling nodes. This is called after
216 // any visit to a node, but should also be called during a visit after any
217 // failed combine which may have created a DAG node.
218 void clearAddedDanglingWorklistEntries() {
219 // Check any nodes added to the worklist to see if they are prunable.
220 while (!PruningList.empty()) {
221 auto *N = PruningList.pop_back_val();
222 if (N->use_empty())
223 recursivelyDeleteUnusedNodes(N);
224 }
225 }
226
227 SDNode *getNextWorklistEntry() {
228 // Before we do any work, remove nodes that are not in use.
229 clearAddedDanglingWorklistEntries();
230 SDNode *N = nullptr;
231 // The Worklist holds the SDNodes in order, but it may contain null
232 // entries.
233 while (!N && !Worklist.empty()) {
234 N = Worklist.pop_back_val();
235 }
236
237 if (N) {
238 assert(N->getCombinerWorklistIndex() >= 0 &&
239 "Found a worklist entry without a corresponding map entry!");
240 // Set to -2 to indicate that we combined the node.
241 N->setCombinerWorklistIndex(-2);
242 }
243 return N;
244 }
245
246 /// Call the node-specific routine that folds each particular type of node.
248
249 public:
250 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
251 : DAG(D), TLI(D.getTargetLoweringInfo()),
252 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
253 ForCodeSize = DAG.shouldOptForSize();
254 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
255
256 MaximumLegalStoreInBits = 0;
257 // We use the minimum store size here, since that's all we can guarantee
258 // for the scalable vector types.
259 for (MVT VT : MVT::all_valuetypes())
260 if (EVT(VT).isSimple() && VT != MVT::Other &&
261 TLI.isTypeLegal(EVT(VT)) &&
262 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
263 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
264 }
265
266 void ConsiderForPruning(SDNode *N) {
267 // Mark this for potential pruning.
268 PruningList.insert(N);
269 }
270
271 /// Add to the worklist making sure its instance is at the back (next to be
272 /// processed.)
273 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
274 bool SkipIfCombinedBefore = false) {
275 assert(N->getOpcode() != ISD::DELETED_NODE &&
276 "Deleted Node added to Worklist");
277
278 // Skip handle nodes as they can't usefully be combined and confuse the
279 // zero-use deletion strategy.
280 if (N->getOpcode() == ISD::HANDLENODE)
281 return;
282
283 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
284 return;
285
286 if (IsCandidateForPruning)
287 ConsiderForPruning(N);
288
289 if (N->getCombinerWorklistIndex() < 0) {
290 N->setCombinerWorklistIndex(Worklist.size());
291 Worklist.push_back(N);
292 }
293 }
294
295 /// Remove all instances of N from the worklist.
296 void removeFromWorklist(SDNode *N) {
297 PruningList.remove(N);
298 StoreRootCountMap.erase(N);
299
300 int WorklistIndex = N->getCombinerWorklistIndex();
301 // If not in the worklist, the index might be -1 or -2 (was combined
302 // before). As the node gets deleted anyway, there's no need to update
303 // the index.
304 if (WorklistIndex < 0)
305 return; // Not in the worklist.
306
307 // Null out the entry rather than erasing it to avoid a linear operation.
308 Worklist[WorklistIndex] = nullptr;
309 N->setCombinerWorklistIndex(-1);
310 }
311
312 void deleteAndRecombine(SDNode *N);
313 bool recursivelyDeleteUnusedNodes(SDNode *N);
314
315 /// Replaces all uses of the results of one DAG node with new values.
316 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
317 bool AddTo = true);
318
319 /// Replaces all uses of the results of one DAG node with new values.
320 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
321 return CombineTo(N, &Res, 1, AddTo);
322 }
323
324 /// Replaces all uses of the results of one DAG node with new values.
325 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
326 bool AddTo = true) {
327 SDValue To[] = { Res0, Res1 };
328 return CombineTo(N, To, 2, AddTo);
329 }
330
331 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
332
333 private:
334 unsigned MaximumLegalStoreInBits;
335
336 /// Check the specified integer node value to see if it can be simplified or
337 /// if things it uses can be simplified by bit propagation.
338 /// If so, return true.
339 bool SimplifyDemandedBits(SDValue Op) {
340 unsigned BitWidth = Op.getScalarValueSizeInBits();
342 return SimplifyDemandedBits(Op, DemandedBits);
343 }
344
345 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
346 EVT VT = Op.getValueType();
347 APInt DemandedElts = VT.isFixedLengthVector()
349 : APInt(1, 1);
350 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
351 }
352
353 /// Check the specified vector node value to see if it can be simplified or
354 /// if things it uses can be simplified as it only uses some of the
355 /// elements. If so, return true.
356 bool SimplifyDemandedVectorElts(SDValue Op) {
357 // TODO: For now just pretend it cannot be simplified.
358 if (Op.getValueType().isScalableVector())
359 return false;
360
361 unsigned NumElts = Op.getValueType().getVectorNumElements();
362 APInt DemandedElts = APInt::getAllOnes(NumElts);
363 return SimplifyDemandedVectorElts(Op, DemandedElts);
364 }
365
366 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
367 const APInt &DemandedElts,
368 bool AssumeSingleUse = false);
369 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
370 bool AssumeSingleUse = false);
371
372 bool CombineToPreIndexedLoadStore(SDNode *N);
373 bool CombineToPostIndexedLoadStore(SDNode *N);
374 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
375 bool SliceUpLoad(SDNode *N);
376
377 // Looks up the chain to find a unique (unaliased) store feeding the passed
378 // load. If no such store is found, returns a nullptr.
379 // Note: This will look past a CALLSEQ_START if the load is chained to it so
380 // so that it can find stack stores for byval params.
381 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
382 // Scalars have size 0 to distinguish from singleton vectors.
383 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
384 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
385 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
386
387 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
388 /// load.
389 ///
390 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
391 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
392 /// \param EltNo index of the vector element to load.
393 /// \param OriginalLoad load that EVE came from to be replaced.
394 /// \returns EVE on success SDValue() on failure.
395 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
396 SDValue EltNo,
397 LoadSDNode *OriginalLoad);
398 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
399 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
400 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
401 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
402 SDValue PromoteIntBinOp(SDValue Op);
403 SDValue PromoteIntShiftOp(SDValue Op);
404 SDValue PromoteExtend(SDValue Op);
405 bool PromoteLoad(SDValue Op);
406
407 SDValue foldShiftToAvg(SDNode *N);
408
409 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
410 SDValue RHS, SDValue True, SDValue False,
412
413 /// Call the node-specific routine that knows how to fold each
414 /// particular type of node. If that doesn't do anything, try the
415 /// target-specific DAG combines.
416 SDValue combine(SDNode *N);
417
418 // Visitation implementation - Implement dag node combining for different
419 // node types. The semantics are as follows:
420 // Return Value:
421 // SDValue.getNode() == 0 - No change was made
422 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
423 // otherwise - N should be replaced by the returned Operand.
424 //
425 SDValue visitTokenFactor(SDNode *N);
426 SDValue visitMERGE_VALUES(SDNode *N);
427 SDValue visitADD(SDNode *N);
428 SDValue visitADDLike(SDNode *N);
429 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
430 SDValue visitSUB(SDNode *N);
431 SDValue visitADDSAT(SDNode *N);
432 SDValue visitSUBSAT(SDNode *N);
433 SDValue visitADDC(SDNode *N);
434 SDValue visitADDO(SDNode *N);
435 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
436 SDValue visitSUBC(SDNode *N);
437 SDValue visitSUBO(SDNode *N);
438 SDValue visitADDE(SDNode *N);
439 SDValue visitUADDO_CARRY(SDNode *N);
440 SDValue visitSADDO_CARRY(SDNode *N);
441 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
442 SDNode *N);
443 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
444 SDNode *N);
445 SDValue visitSUBE(SDNode *N);
446 SDValue visitUSUBO_CARRY(SDNode *N);
447 SDValue visitSSUBO_CARRY(SDNode *N);
448 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
449 SDValue visitMULFIX(SDNode *N);
450 SDValue useDivRem(SDNode *N);
451 SDValue visitSDIV(SDNode *N);
452 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
453 SDValue visitUDIV(SDNode *N);
454 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
455 SDValue visitREM(SDNode *N);
456 SDValue visitMULHU(SDNode *N);
457 SDValue visitMULHS(SDNode *N);
458 SDValue visitAVG(SDNode *N);
459 SDValue visitABD(SDNode *N);
460 SDValue visitSMUL_LOHI(SDNode *N);
461 SDValue visitUMUL_LOHI(SDNode *N);
462 SDValue visitMULO(SDNode *N);
463 SDValue visitIMINMAX(SDNode *N);
464 SDValue visitAND(SDNode *N);
465 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
466 SDValue visitOR(SDNode *N);
467 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
468 SDValue visitXOR(SDNode *N);
469 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
470 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
471 SDValue visitSHL(SDNode *N);
472 SDValue visitSRA(SDNode *N);
473 SDValue visitSRL(SDNode *N);
474 SDValue visitFunnelShift(SDNode *N);
475 SDValue visitSHLSAT(SDNode *N);
476 SDValue visitRotate(SDNode *N);
477 SDValue visitABS(SDNode *N);
478 SDValue visitBSWAP(SDNode *N);
479 SDValue visitBITREVERSE(SDNode *N);
480 SDValue visitCTLZ(SDNode *N);
481 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
482 SDValue visitCTTZ(SDNode *N);
483 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
484 SDValue visitCTPOP(SDNode *N);
485 SDValue visitSELECT(SDNode *N);
486 SDValue visitVSELECT(SDNode *N);
487 SDValue visitVP_SELECT(SDNode *N);
488 SDValue visitSELECT_CC(SDNode *N);
489 SDValue visitSETCC(SDNode *N);
490 SDValue visitSETCCCARRY(SDNode *N);
491 SDValue visitSIGN_EXTEND(SDNode *N);
492 SDValue visitZERO_EXTEND(SDNode *N);
493 SDValue visitANY_EXTEND(SDNode *N);
494 SDValue visitAssertExt(SDNode *N);
495 SDValue visitAssertAlign(SDNode *N);
496 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
497 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
498 SDValue visitTRUNCATE(SDNode *N);
499 SDValue visitTRUNCATE_USAT_U(SDNode *N);
500 SDValue visitBITCAST(SDNode *N);
501 SDValue visitFREEZE(SDNode *N);
502 SDValue visitBUILD_PAIR(SDNode *N);
503 SDValue visitFADD(SDNode *N);
504 SDValue visitVP_FADD(SDNode *N);
505 SDValue visitVP_FSUB(SDNode *N);
506 SDValue visitSTRICT_FADD(SDNode *N);
507 SDValue visitFSUB(SDNode *N);
508 SDValue visitFMUL(SDNode *N);
509 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
510 SDValue visitFMAD(SDNode *N);
511 SDValue visitFDIV(SDNode *N);
512 SDValue visitFREM(SDNode *N);
513 SDValue visitFSQRT(SDNode *N);
514 SDValue visitFCOPYSIGN(SDNode *N);
515 SDValue visitFPOW(SDNode *N);
516 SDValue visitFCANONICALIZE(SDNode *N);
517 SDValue visitSINT_TO_FP(SDNode *N);
518 SDValue visitUINT_TO_FP(SDNode *N);
519 SDValue visitFP_TO_SINT(SDNode *N);
520 SDValue visitFP_TO_UINT(SDNode *N);
521 SDValue visitXROUND(SDNode *N);
522 SDValue visitFP_ROUND(SDNode *N);
523 SDValue visitFP_EXTEND(SDNode *N);
524 SDValue visitFNEG(SDNode *N);
525 SDValue visitFABS(SDNode *N);
526 SDValue visitFCEIL(SDNode *N);
527 SDValue visitFTRUNC(SDNode *N);
528 SDValue visitFFREXP(SDNode *N);
529 SDValue visitFFLOOR(SDNode *N);
530 SDValue visitFMinMax(SDNode *N);
531 SDValue visitBRCOND(SDNode *N);
532 SDValue visitBR_CC(SDNode *N);
533 SDValue visitLOAD(SDNode *N);
534
535 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
536 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
537 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
538
539 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
540
541 SDValue visitSTORE(SDNode *N);
542 SDValue visitATOMIC_STORE(SDNode *N);
543 SDValue visitLIFETIME_END(SDNode *N);
544 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
545 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
546 SDValue visitBUILD_VECTOR(SDNode *N);
547 SDValue visitCONCAT_VECTORS(SDNode *N);
548 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
549 SDValue visitVECTOR_SHUFFLE(SDNode *N);
550 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
551 SDValue visitINSERT_SUBVECTOR(SDNode *N);
552 SDValue visitVECTOR_COMPRESS(SDNode *N);
553 SDValue visitMLOAD(SDNode *N);
554 SDValue visitMSTORE(SDNode *N);
555 SDValue visitMGATHER(SDNode *N);
556 SDValue visitMSCATTER(SDNode *N);
557 SDValue visitMHISTOGRAM(SDNode *N);
558 SDValue visitVPGATHER(SDNode *N);
559 SDValue visitVPSCATTER(SDNode *N);
560 SDValue visitVP_STRIDED_LOAD(SDNode *N);
561 SDValue visitVP_STRIDED_STORE(SDNode *N);
562 SDValue visitFP_TO_FP16(SDNode *N);
563 SDValue visitFP16_TO_FP(SDNode *N);
564 SDValue visitFP_TO_BF16(SDNode *N);
565 SDValue visitBF16_TO_FP(SDNode *N);
566 SDValue visitVECREDUCE(SDNode *N);
567 SDValue visitVPOp(SDNode *N);
568 SDValue visitGET_FPENV_MEM(SDNode *N);
569 SDValue visitSET_FPENV_MEM(SDNode *N);
570
571 template <class MatchContextClass>
572 SDValue visitFADDForFMACombine(SDNode *N);
573 template <class MatchContextClass>
574 SDValue visitFSUBForFMACombine(SDNode *N);
575 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
576
577 SDValue XformToShuffleWithZero(SDNode *N);
578 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
579 const SDLoc &DL,
580 SDNode *N,
581 SDValue N0,
582 SDValue N1);
583 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
584 SDValue N1, SDNodeFlags Flags);
585 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
586 SDValue N1, SDNodeFlags Flags);
587 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
588 EVT VT, SDValue N0, SDValue N1,
589 SDNodeFlags Flags = SDNodeFlags());
590
591 SDValue visitShiftByConstant(SDNode *N);
592
593 SDValue foldSelectOfConstants(SDNode *N);
594 SDValue foldVSelectOfConstants(SDNode *N);
595 SDValue foldBinOpIntoSelect(SDNode *BO);
596 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
597 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
598 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
599 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
601 bool NotExtCompare = false);
602 SDValue convertSelectOfFPConstantsToLoadOffset(
603 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
605 SDValue foldSignChangeInBitcast(SDNode *N);
606 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
608 SDValue foldSelectOfBinops(SDNode *N);
609 SDValue foldSextSetcc(SDNode *N);
610 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
611 const SDLoc &DL);
612 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
613 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
614 SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
615 SDValue False, ISD::CondCode CC, const SDLoc &DL);
616 SDValue unfoldMaskedMerge(SDNode *N);
617 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
618 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
619 const SDLoc &DL, bool foldBooleans);
620 SDValue rebuildSetCC(SDValue N);
621
622 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
623 SDValue &CC, bool MatchStrict = false) const;
624 bool isOneUseSetCC(SDValue N) const;
625
626 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
627 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
628
629 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
630 unsigned HiOp);
631 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
632 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
633 const TargetLowering &TLI);
634
635 SDValue CombineExtLoad(SDNode *N);
636 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
637 SDValue combineRepeatedFPDivisors(SDNode *N);
638 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
639 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
640 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
641 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
642 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
643 SDValue BuildSDIV(SDNode *N);
644 SDValue BuildSDIVPow2(SDNode *N);
645 SDValue BuildUDIV(SDNode *N);
646 SDValue BuildSREMPow2(SDNode *N);
647 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
648 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
649 bool KnownNeverZero = false,
650 bool InexpensiveOnly = false,
651 std::optional<EVT> OutVT = std::nullopt);
652 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
653 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
654 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
655 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
656 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
657 SDNodeFlags Flags, bool Reciprocal);
658 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
659 SDNodeFlags Flags, bool Reciprocal);
660 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
661 bool DemandHighBits = true);
662 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
663 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
664 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
665 unsigned PosOpcode, unsigned NegOpcode,
666 const SDLoc &DL);
667 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
668 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
669 unsigned PosOpcode, unsigned NegOpcode,
670 const SDLoc &DL);
671 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
672 SDValue MatchLoadCombine(SDNode *N);
673 SDValue mergeTruncStores(StoreSDNode *N);
674 SDValue reduceLoadWidth(SDNode *N);
675 SDValue ReduceLoadOpStoreWidth(SDNode *N);
677 SDValue TransformFPLoadStorePair(SDNode *N);
678 SDValue convertBuildVecZextToZext(SDNode *N);
679 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
680 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
681 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
682 SDValue reduceBuildVecToShuffle(SDNode *N);
683 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
684 ArrayRef<int> VectorMask, SDValue VecIn1,
685 SDValue VecIn2, unsigned LeftIdx,
686 bool DidSplitVec);
687 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
688
689 /// Walk up chain skipping non-aliasing memory nodes,
690 /// looking for aliasing nodes and adding them to the Aliases vector.
691 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
692 SmallVectorImpl<SDValue> &Aliases);
693
694 /// Return true if there is any possibility that the two addresses overlap.
695 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
696
697 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
698 /// chain (aliasing node.)
699 SDValue FindBetterChain(SDNode *N, SDValue Chain);
700
701 /// Try to replace a store and any possibly adjacent stores on
702 /// consecutive chains with better chains. Return true only if St is
703 /// replaced.
704 ///
705 /// Notice that other chains may still be replaced even if the function
706 /// returns false.
707 bool findBetterNeighborChains(StoreSDNode *St);
708
709 // Helper for findBetterNeighborChains. Walk up store chain add additional
710 // chained stores that do not overlap and can be parallelized.
711 bool parallelizeChainedStores(StoreSDNode *St);
712
713 /// Holds a pointer to an LSBaseSDNode as well as information on where it
714 /// is located in a sequence of memory operations connected by a chain.
715 struct MemOpLink {
716 // Ptr to the mem node.
717 LSBaseSDNode *MemNode;
718
719 // Offset from the base ptr.
720 int64_t OffsetFromBase;
721
722 MemOpLink(LSBaseSDNode *N, int64_t Offset)
723 : MemNode(N), OffsetFromBase(Offset) {}
724 };
725
726 // Classify the origin of a stored value.
727 enum class StoreSource { Unknown, Constant, Extract, Load };
728 StoreSource getStoreSource(SDValue StoreVal) {
729 switch (StoreVal.getOpcode()) {
730 case ISD::Constant:
731 case ISD::ConstantFP:
732 return StoreSource::Constant;
736 return StoreSource::Constant;
737 return StoreSource::Unknown;
740 return StoreSource::Extract;
741 case ISD::LOAD:
742 return StoreSource::Load;
743 default:
744 return StoreSource::Unknown;
745 }
746 }
747
748 /// This is a helper function for visitMUL to check the profitability
749 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
750 /// MulNode is the original multiply, AddNode is (add x, c1),
751 /// and ConstNode is c2.
752 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
753 SDValue ConstNode);
754
755 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
756 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
757 /// the type of the loaded value to be extended.
758 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
759 EVT LoadResultTy, EVT &ExtVT);
760
761 /// Helper function to calculate whether the given Load/Store can have its
762 /// width reduced to ExtVT.
763 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
764 EVT &MemVT, unsigned ShAmt = 0);
765
766 /// Used by BackwardsPropagateMask to find suitable loads.
767 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
768 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
769 ConstantSDNode *Mask, SDNode *&NodeToMask);
770 /// Attempt to propagate a given AND node back to load leaves so that they
771 /// can be combined into narrow loads.
772 bool BackwardsPropagateMask(SDNode *N);
773
774 /// Helper function for mergeConsecutiveStores which merges the component
775 /// store chains.
776 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
777 unsigned NumStores);
778
779 /// Helper function for mergeConsecutiveStores which checks if all the store
780 /// nodes have the same underlying object. We can still reuse the first
781 /// store's pointer info if all the stores are from the same object.
782 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
783
784 /// This is a helper function for mergeConsecutiveStores. When the source
785 /// elements of the consecutive stores are all constants or all extracted
786 /// vector elements, try to merge them into one larger store introducing
787 /// bitcasts if necessary. \return True if a merged store was created.
788 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
789 EVT MemVT, unsigned NumStores,
790 bool IsConstantSrc, bool UseVector,
791 bool UseTrunc);
792
793 /// This is a helper function for mergeConsecutiveStores. Stores that
794 /// potentially may be merged with St are placed in StoreNodes. On success,
795 /// returns a chain predecessor to all store candidates.
796 SDNode *getStoreMergeCandidates(StoreSDNode *St,
797 SmallVectorImpl<MemOpLink> &StoreNodes);
798
799 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
800 /// have indirect dependency through their operands. RootNode is the
801 /// predecessor to all stores calculated by getStoreMergeCandidates and is
802 /// used to prune the dependency check. \return True if safe to merge.
803 bool checkMergeStoreCandidatesForDependencies(
804 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
805 SDNode *RootNode);
806
807 /// This is a helper function for mergeConsecutiveStores. Given a list of
808 /// store candidates, find the first N that are consecutive in memory.
809 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
810 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
811 int64_t ElementSizeBytes) const;
812
813 /// This is a helper function for mergeConsecutiveStores. It is used for
814 /// store chains that are composed entirely of constant values.
815 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
816 unsigned NumConsecutiveStores,
817 EVT MemVT, SDNode *Root, bool AllowVectors);
818
819 /// This is a helper function for mergeConsecutiveStores. It is used for
820 /// store chains that are composed entirely of extracted vector elements.
821 /// When extracting multiple vector elements, try to store them in one
822 /// vector store rather than a sequence of scalar stores.
823 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
824 unsigned NumConsecutiveStores, EVT MemVT,
825 SDNode *Root);
826
827 /// This is a helper function for mergeConsecutiveStores. It is used for
828 /// store chains that are composed entirely of loaded values.
829 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
830 unsigned NumConsecutiveStores, EVT MemVT,
831 SDNode *Root, bool AllowVectors,
832 bool IsNonTemporalStore, bool IsNonTemporalLoad);
833
834 /// Merge consecutive store operations into a wide store.
835 /// This optimization uses wide integers or vectors when possible.
836 /// \return true if stores were merged.
837 bool mergeConsecutiveStores(StoreSDNode *St);
838
839 /// Try to transform a truncation where C is a constant:
840 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
841 ///
842 /// \p N needs to be a truncation and its first operand an AND. Other
843 /// requirements are checked by the function (e.g. that trunc is
844 /// single-use) and if missed an empty SDValue is returned.
845 SDValue distributeTruncateThroughAnd(SDNode *N);
846
847 /// Helper function to determine whether the target supports operation
848 /// given by \p Opcode for type \p VT, that is, whether the operation
849 /// is legal or custom before legalizing operations, and whether is
850 /// legal (but not custom) after legalization.
851 bool hasOperation(unsigned Opcode, EVT VT) {
852 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
853 }
854
855 public:
856 /// Runs the dag combiner on all nodes in the work list
857 void Run(CombineLevel AtLevel);
858
859 SelectionDAG &getDAG() const { return DAG; }
860
861 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
862 EVT getShiftAmountTy(EVT LHSTy) {
863 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
864 }
865
866 /// This method returns true if we are running before type legalization or
867 /// if the specified VT is legal.
868 bool isTypeLegal(const EVT &VT) {
869 if (!LegalTypes) return true;
870 return TLI.isTypeLegal(VT);
871 }
872
873 /// Convenience wrapper around TargetLowering::getSetCCResultType
874 EVT getSetCCResultType(EVT VT) const {
875 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
876 }
877
878 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
879 SDValue OrigLoad, SDValue ExtLoad,
880 ISD::NodeType ExtType);
881 };
882
883/// This class is a DAGUpdateListener that removes any deleted
884/// nodes from the worklist.
885class WorklistRemover : public SelectionDAG::DAGUpdateListener {
886 DAGCombiner &DC;
887
888public:
889 explicit WorklistRemover(DAGCombiner &dc)
890 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
891
892 void NodeDeleted(SDNode *N, SDNode *E) override {
893 DC.removeFromWorklist(N);
894 }
895};
896
897class WorklistInserter : public SelectionDAG::DAGUpdateListener {
898 DAGCombiner &DC;
899
900public:
901 explicit WorklistInserter(DAGCombiner &dc)
902 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
903
904 // FIXME: Ideally we could add N to the worklist, but this causes exponential
905 // compile time costs in large DAGs, e.g. Halide.
906 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
907};
908
909} // end anonymous namespace
910
911//===----------------------------------------------------------------------===//
912// TargetLowering::DAGCombinerInfo implementation
913//===----------------------------------------------------------------------===//
914
916 ((DAGCombiner*)DC)->AddToWorklist(N);
917}
918
920CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
921 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
922}
923
925CombineTo(SDNode *N, SDValue Res, bool AddTo) {
926 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
927}
928
930CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
931 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
932}
933
936 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
937}
938
941 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
942}
943
944//===----------------------------------------------------------------------===//
945// Helper Functions
946//===----------------------------------------------------------------------===//
947
948void DAGCombiner::deleteAndRecombine(SDNode *N) {
949 removeFromWorklist(N);
950
951 // If the operands of this node are only used by the node, they will now be
952 // dead. Make sure to re-visit them and recursively delete dead nodes.
953 for (const SDValue &Op : N->ops())
954 // For an operand generating multiple values, one of the values may
955 // become dead allowing further simplification (e.g. split index
956 // arithmetic from an indexed load).
957 if (Op->hasOneUse() || Op->getNumValues() > 1)
958 AddToWorklist(Op.getNode());
959
960 DAG.DeleteNode(N);
961}
962
963// APInts must be the same size for most operations, this helper
964// function zero extends the shorter of the pair so that they match.
965// We provide an Offset so that we can create bitwidths that won't overflow.
966static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
967 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
968 LHS = LHS.zext(Bits);
969 RHS = RHS.zext(Bits);
970}
971
972// Return true if this node is a setcc, or is a select_cc
973// that selects between the target values used for true and false, making it
974// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
975// the appropriate nodes based on the type of node we are checking. This
976// simplifies life a bit for the callers.
977bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
978 SDValue &CC, bool MatchStrict) const {
979 if (N.getOpcode() == ISD::SETCC) {
980 LHS = N.getOperand(0);
981 RHS = N.getOperand(1);
982 CC = N.getOperand(2);
983 return true;
984 }
985
986 if (MatchStrict &&
987 (N.getOpcode() == ISD::STRICT_FSETCC ||
988 N.getOpcode() == ISD::STRICT_FSETCCS)) {
989 LHS = N.getOperand(1);
990 RHS = N.getOperand(2);
991 CC = N.getOperand(3);
992 return true;
993 }
994
995 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
996 !TLI.isConstFalseVal(N.getOperand(3)))
997 return false;
998
999 if (TLI.getBooleanContents(N.getValueType()) ==
1001 return false;
1002
1003 LHS = N.getOperand(0);
1004 RHS = N.getOperand(1);
1005 CC = N.getOperand(4);
1006 return true;
1007}
1008
1009/// Return true if this is a SetCC-equivalent operation with only one use.
1010/// If this is true, it allows the users to invert the operation for free when
1011/// it is profitable to do so.
1012bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1013 SDValue N0, N1, N2;
1014 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1015 return true;
1016 return false;
1017}
1018
1020 if (!ScalarTy.isSimple())
1021 return false;
1022
1023 uint64_t MaskForTy = 0ULL;
1024 switch (ScalarTy.getSimpleVT().SimpleTy) {
1025 case MVT::i8:
1026 MaskForTy = 0xFFULL;
1027 break;
1028 case MVT::i16:
1029 MaskForTy = 0xFFFFULL;
1030 break;
1031 case MVT::i32:
1032 MaskForTy = 0xFFFFFFFFULL;
1033 break;
1034 default:
1035 return false;
1036 break;
1037 }
1038
1039 APInt Val;
1040 if (ISD::isConstantSplatVector(N, Val))
1041 return Val.getLimitedValue() == MaskForTy;
1042
1043 return false;
1044}
1045
1046// Determines if it is a constant integer or a splat/build vector of constant
1047// integers (and undefs).
1048// Do not permit build vector implicit truncation.
1049static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1050 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1051 return !(Const->isOpaque() && NoOpaques);
1052 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1053 return false;
1054 unsigned BitWidth = N.getScalarValueSizeInBits();
1055 for (const SDValue &Op : N->op_values()) {
1056 if (Op.isUndef())
1057 continue;
1058 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1059 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1060 (Const->isOpaque() && NoOpaques))
1061 return false;
1062 }
1063 return true;
1064}
1065
1066// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1067// undef's.
1068static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1069 if (V.getOpcode() != ISD::BUILD_VECTOR)
1070 return false;
1071 return isConstantOrConstantVector(V, NoOpaques) ||
1073}
1074
1075// Determine if this an indexed load with an opaque target constant index.
1076static bool canSplitIdx(LoadSDNode *LD) {
1077 return MaySplitLoadIndex &&
1078 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1079 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1080}
1081
1082bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1083 const SDLoc &DL,
1084 SDNode *N,
1085 SDValue N0,
1086 SDValue N1) {
1087 // Currently this only tries to ensure we don't undo the GEP splits done by
1088 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1089 // we check if the following transformation would be problematic:
1090 // (load/store (add, (add, x, offset1), offset2)) ->
1091 // (load/store (add, x, offset1+offset2)).
1092
1093 // (load/store (add, (add, x, y), offset2)) ->
1094 // (load/store (add, (add, x, offset2), y)).
1095
1096 if (N0.getOpcode() != ISD::ADD)
1097 return false;
1098
1099 // Check for vscale addressing modes.
1100 // (load/store (add/sub (add x, y), vscale))
1101 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1102 // (load/store (add/sub (add x, y), (mul vscale, C)))
1103 if ((N1.getOpcode() == ISD::VSCALE ||
1104 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1105 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1106 isa<ConstantSDNode>(N1.getOperand(1)))) &&
1107 N1.getValueType().getFixedSizeInBits() <= 64) {
1108 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1109 ? N1.getConstantOperandVal(0)
1110 : (N1.getOperand(0).getConstantOperandVal(0) *
1111 (N1.getOpcode() == ISD::SHL
1112 ? (1LL << N1.getConstantOperandVal(1))
1113 : N1.getConstantOperandVal(1)));
1114 if (Opc == ISD::SUB)
1115 ScalableOffset = -ScalableOffset;
1116 if (all_of(N->users(), [&](SDNode *Node) {
1117 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1118 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1120 AM.HasBaseReg = true;
1121 AM.ScalableOffset = ScalableOffset;
1122 EVT VT = LoadStore->getMemoryVT();
1123 unsigned AS = LoadStore->getAddressSpace();
1124 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1125 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1126 AS);
1127 }
1128 return false;
1129 }))
1130 return true;
1131 }
1132
1133 if (Opc != ISD::ADD)
1134 return false;
1135
1136 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1137 if (!C2)
1138 return false;
1139
1140 const APInt &C2APIntVal = C2->getAPIntValue();
1141 if (C2APIntVal.getSignificantBits() > 64)
1142 return false;
1143
1144 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1145 if (N0.hasOneUse())
1146 return false;
1147
1148 const APInt &C1APIntVal = C1->getAPIntValue();
1149 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1150 if (CombinedValueIntVal.getSignificantBits() > 64)
1151 return false;
1152 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1153
1154 for (SDNode *Node : N->users()) {
1155 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1156 // Is x[offset2] already not a legal addressing mode? If so then
1157 // reassociating the constants breaks nothing (we test offset2 because
1158 // that's the one we hope to fold into the load or store).
1160 AM.HasBaseReg = true;
1161 AM.BaseOffs = C2APIntVal.getSExtValue();
1162 EVT VT = LoadStore->getMemoryVT();
1163 unsigned AS = LoadStore->getAddressSpace();
1164 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1165 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1166 continue;
1167
1168 // Would x[offset1+offset2] still be a legal addressing mode?
1169 AM.BaseOffs = CombinedValue;
1170 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1171 return true;
1172 }
1173 }
1174 } else {
1175 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1176 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1177 return false;
1178
1179 for (SDNode *Node : N->users()) {
1180 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1181 if (!LoadStore)
1182 return false;
1183
1184 // Is x[offset2] a legal addressing mode? If so then
1185 // reassociating the constants breaks address pattern
1187 AM.HasBaseReg = true;
1188 AM.BaseOffs = C2APIntVal.getSExtValue();
1189 EVT VT = LoadStore->getMemoryVT();
1190 unsigned AS = LoadStore->getAddressSpace();
1191 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1192 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1193 return false;
1194 }
1195 return true;
1196 }
1197
1198 return false;
1199}
1200
1201/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1202/// \p N0 is the same kind of operation as \p Opc.
1203SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1204 SDValue N0, SDValue N1,
1205 SDNodeFlags Flags) {
1206 EVT VT = N0.getValueType();
1207
1208 if (N0.getOpcode() != Opc)
1209 return SDValue();
1210
1211 SDValue N00 = N0.getOperand(0);
1212 SDValue N01 = N0.getOperand(1);
1213
1215 SDNodeFlags NewFlags;
1216 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1217 Flags.hasNoUnsignedWrap())
1218 NewFlags |= SDNodeFlags::NoUnsignedWrap;
1219
1221 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1222 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) {
1223 NewFlags.setDisjoint(Flags.hasDisjoint() &&
1224 N0->getFlags().hasDisjoint());
1225 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1226 }
1227 return SDValue();
1228 }
1229 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1230 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1231 // iff (op x, c1) has one use
1232 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1233 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1234 }
1235 }
1236
1237 // Check for repeated operand logic simplifications.
1238 if (Opc == ISD::AND || Opc == ISD::OR) {
1239 // (N00 & N01) & N00 --> N00 & N01
1240 // (N00 & N01) & N01 --> N00 & N01
1241 // (N00 | N01) | N00 --> N00 | N01
1242 // (N00 | N01) | N01 --> N00 | N01
1243 if (N1 == N00 || N1 == N01)
1244 return N0;
1245 }
1246 if (Opc == ISD::XOR) {
1247 // (N00 ^ N01) ^ N00 --> N01
1248 if (N1 == N00)
1249 return N01;
1250 // (N00 ^ N01) ^ N01 --> N00
1251 if (N1 == N01)
1252 return N00;
1253 }
1254
1255 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1256 if (N1 != N01) {
1257 // Reassociate if (op N00, N1) already exist
1258 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1259 // if Op (Op N00, N1), N01 already exist
1260 // we need to stop reassciate to avoid dead loop
1261 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1262 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1263 }
1264 }
1265
1266 if (N1 != N00) {
1267 // Reassociate if (op N01, N1) already exist
1268 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1269 // if Op (Op N01, N1), N00 already exist
1270 // we need to stop reassciate to avoid dead loop
1271 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1272 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1273 }
1274 }
1275
1276 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1277 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1278 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1279 // comparisons with the same predicate. This enables optimizations as the
1280 // following one:
1281 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1282 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1283 if (Opc == ISD::AND || Opc == ISD::OR) {
1284 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1285 N01->getOpcode() == ISD::SETCC) {
1286 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1287 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1288 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1289 if (CC1 == CC00 && CC1 != CC01) {
1290 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1291 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1292 }
1293 if (CC1 == CC01 && CC1 != CC00) {
1294 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1295 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1296 }
1297 }
1298 }
1299 }
1300
1301 return SDValue();
1302}
1303
1304/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1305/// same kind of operation as \p Opc.
1306SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1307 SDValue N1, SDNodeFlags Flags) {
1308 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1309
1310 // Floating-point reassociation is not allowed without loose FP math.
1311 if (N0.getValueType().isFloatingPoint() ||
1313 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1314 return SDValue();
1315
1316 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1317 return Combined;
1318 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1319 return Combined;
1320 return SDValue();
1321}
1322
1323// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1324// Note that we only expect Flags to be passed from FP operations. For integer
1325// operations they need to be dropped.
1326SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1327 const SDLoc &DL, EVT VT, SDValue N0,
1328 SDValue N1, SDNodeFlags Flags) {
1329 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1330 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1331 N0->hasOneUse() && N1->hasOneUse() &&
1333 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1334 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1335 return DAG.getNode(RedOpc, DL, VT,
1336 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1337 N0.getOperand(0), N1.getOperand(0)));
1338 }
1339 return SDValue();
1340}
1341
1342SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1343 bool AddTo) {
1344 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1345 ++NodesCombined;
1346 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1347 To[0].dump(&DAG);
1348 dbgs() << " and " << NumTo - 1 << " other values\n");
1349 for (unsigned i = 0, e = NumTo; i != e; ++i)
1350 assert((!To[i].getNode() ||
1351 N->getValueType(i) == To[i].getValueType()) &&
1352 "Cannot combine value to value of different type!");
1353
1354 WorklistRemover DeadNodes(*this);
1355 DAG.ReplaceAllUsesWith(N, To);
1356 if (AddTo) {
1357 // Push the new nodes and any users onto the worklist
1358 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1359 if (To[i].getNode())
1360 AddToWorklistWithUsers(To[i].getNode());
1361 }
1362 }
1363
1364 // Finally, if the node is now dead, remove it from the graph. The node
1365 // may not be dead if the replacement process recursively simplified to
1366 // something else needing this node.
1367 if (N->use_empty())
1368 deleteAndRecombine(N);
1369 return SDValue(N, 0);
1370}
1371
1372void DAGCombiner::
1373CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1374 // Replace the old value with the new one.
1375 ++NodesCombined;
1376 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1377 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1378
1379 // Replace all uses.
1380 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1381
1382 // Push the new node and any (possibly new) users onto the worklist.
1383 AddToWorklistWithUsers(TLO.New.getNode());
1384
1385 // Finally, if the node is now dead, remove it from the graph.
1386 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1387}
1388
1389/// Check the specified integer node value to see if it can be simplified or if
1390/// things it uses can be simplified by bit propagation. If so, return true.
1391bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1392 const APInt &DemandedElts,
1393 bool AssumeSingleUse) {
1394 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1395 KnownBits Known;
1396 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1397 AssumeSingleUse))
1398 return false;
1399
1400 // Revisit the node.
1401 AddToWorklist(Op.getNode());
1402
1403 CommitTargetLoweringOpt(TLO);
1404 return true;
1405}
1406
1407/// Check the specified vector node value to see if it can be simplified or
1408/// if things it uses can be simplified as it only uses some of the elements.
1409/// If so, return true.
1410bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1411 const APInt &DemandedElts,
1412 bool AssumeSingleUse) {
1413 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1414 APInt KnownUndef, KnownZero;
1415 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1416 TLO, 0, AssumeSingleUse))
1417 return false;
1418
1419 // Revisit the node.
1420 AddToWorklist(Op.getNode());
1421
1422 CommitTargetLoweringOpt(TLO);
1423 return true;
1424}
1425
1426void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1427 SDLoc DL(Load);
1428 EVT VT = Load->getValueType(0);
1429 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1430
1431 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1432 Trunc.dump(&DAG); dbgs() << '\n');
1433
1434 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1435 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1436
1437 AddToWorklist(Trunc.getNode());
1438 recursivelyDeleteUnusedNodes(Load);
1439}
1440
1441SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1442 Replace = false;
1443 SDLoc DL(Op);
1444 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1445 LoadSDNode *LD = cast<LoadSDNode>(Op);
1446 EVT MemVT = LD->getMemoryVT();
1448 : LD->getExtensionType();
1449 Replace = true;
1450 return DAG.getExtLoad(ExtType, DL, PVT,
1451 LD->getChain(), LD->getBasePtr(),
1452 MemVT, LD->getMemOperand());
1453 }
1454
1455 unsigned Opc = Op.getOpcode();
1456 switch (Opc) {
1457 default: break;
1458 case ISD::AssertSext:
1459 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1460 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1461 break;
1462 case ISD::AssertZext:
1463 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1464 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1465 break;
1466 case ISD::Constant: {
1467 unsigned ExtOpc =
1468 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1469 return DAG.getNode(ExtOpc, DL, PVT, Op);
1470 }
1471 }
1472
1473 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1474 return SDValue();
1475 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1476}
1477
1478SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1480 return SDValue();
1481 EVT OldVT = Op.getValueType();
1482 SDLoc DL(Op);
1483 bool Replace = false;
1484 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1485 if (!NewOp.getNode())
1486 return SDValue();
1487 AddToWorklist(NewOp.getNode());
1488
1489 if (Replace)
1490 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1491 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1492 DAG.getValueType(OldVT));
1493}
1494
1495SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1496 EVT OldVT = Op.getValueType();
1497 SDLoc DL(Op);
1498 bool Replace = false;
1499 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1500 if (!NewOp.getNode())
1501 return SDValue();
1502 AddToWorklist(NewOp.getNode());
1503
1504 if (Replace)
1505 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1506 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1507}
1508
1509/// Promote the specified integer binary operation if the target indicates it is
1510/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1511/// i32 since i16 instructions are longer.
1512SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1513 if (!LegalOperations)
1514 return SDValue();
1515
1516 EVT VT = Op.getValueType();
1517 if (VT.isVector() || !VT.isInteger())
1518 return SDValue();
1519
1520 // If operation type is 'undesirable', e.g. i16 on x86, consider
1521 // promoting it.
1522 unsigned Opc = Op.getOpcode();
1523 if (TLI.isTypeDesirableForOp(Opc, VT))
1524 return SDValue();
1525
1526 EVT PVT = VT;
1527 // Consult target whether it is a good idea to promote this operation and
1528 // what's the right type to promote it to.
1529 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1530 assert(PVT != VT && "Don't know what type to promote to!");
1531
1532 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1533
1534 bool Replace0 = false;
1535 SDValue N0 = Op.getOperand(0);
1536 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1537
1538 bool Replace1 = false;
1539 SDValue N1 = Op.getOperand(1);
1540 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1541 SDLoc DL(Op);
1542
1543 SDValue RV =
1544 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1545
1546 // We are always replacing N0/N1's use in N and only need additional
1547 // replacements if there are additional uses.
1548 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1549 // (SDValue) here because the node may reference multiple values
1550 // (for example, the chain value of a load node).
1551 Replace0 &= !N0->hasOneUse();
1552 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1553
1554 // Combine Op here so it is preserved past replacements.
1555 CombineTo(Op.getNode(), RV);
1556
1557 // If operands have a use ordering, make sure we deal with
1558 // predecessor first.
1559 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1560 std::swap(N0, N1);
1561 std::swap(NN0, NN1);
1562 }
1563
1564 if (Replace0) {
1565 AddToWorklist(NN0.getNode());
1566 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1567 }
1568 if (Replace1) {
1569 AddToWorklist(NN1.getNode());
1570 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1571 }
1572 return Op;
1573 }
1574 return SDValue();
1575}
1576
1577/// Promote the specified integer shift operation if the target indicates it is
1578/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1579/// i32 since i16 instructions are longer.
1580SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1581 if (!LegalOperations)
1582 return SDValue();
1583
1584 EVT VT = Op.getValueType();
1585 if (VT.isVector() || !VT.isInteger())
1586 return SDValue();
1587
1588 // If operation type is 'undesirable', e.g. i16 on x86, consider
1589 // promoting it.
1590 unsigned Opc = Op.getOpcode();
1591 if (TLI.isTypeDesirableForOp(Opc, VT))
1592 return SDValue();
1593
1594 EVT PVT = VT;
1595 // Consult target whether it is a good idea to promote this operation and
1596 // what's the right type to promote it to.
1597 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1598 assert(PVT != VT && "Don't know what type to promote to!");
1599
1600 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1601
1602 bool Replace = false;
1603 SDValue N0 = Op.getOperand(0);
1604 if (Opc == ISD::SRA)
1605 N0 = SExtPromoteOperand(N0, PVT);
1606 else if (Opc == ISD::SRL)
1607 N0 = ZExtPromoteOperand(N0, PVT);
1608 else
1609 N0 = PromoteOperand(N0, PVT, Replace);
1610
1611 if (!N0.getNode())
1612 return SDValue();
1613
1614 SDLoc DL(Op);
1615 SDValue N1 = Op.getOperand(1);
1616 SDValue RV =
1617 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1618
1619 if (Replace)
1620 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1621
1622 // Deal with Op being deleted.
1623 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1624 return RV;
1625 }
1626 return SDValue();
1627}
1628
1629SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1630 if (!LegalOperations)
1631 return SDValue();
1632
1633 EVT VT = Op.getValueType();
1634 if (VT.isVector() || !VT.isInteger())
1635 return SDValue();
1636
1637 // If operation type is 'undesirable', e.g. i16 on x86, consider
1638 // promoting it.
1639 unsigned Opc = Op.getOpcode();
1640 if (TLI.isTypeDesirableForOp(Opc, VT))
1641 return SDValue();
1642
1643 EVT PVT = VT;
1644 // Consult target whether it is a good idea to promote this operation and
1645 // what's the right type to promote it to.
1646 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1647 assert(PVT != VT && "Don't know what type to promote to!");
1648 // fold (aext (aext x)) -> (aext x)
1649 // fold (aext (zext x)) -> (zext x)
1650 // fold (aext (sext x)) -> (sext x)
1651 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1652 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1653 }
1654 return SDValue();
1655}
1656
1657bool DAGCombiner::PromoteLoad(SDValue Op) {
1658 if (!LegalOperations)
1659 return false;
1660
1661 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1662 return false;
1663
1664 EVT VT = Op.getValueType();
1665 if (VT.isVector() || !VT.isInteger())
1666 return false;
1667
1668 // If operation type is 'undesirable', e.g. i16 on x86, consider
1669 // promoting it.
1670 unsigned Opc = Op.getOpcode();
1671 if (TLI.isTypeDesirableForOp(Opc, VT))
1672 return false;
1673
1674 EVT PVT = VT;
1675 // Consult target whether it is a good idea to promote this operation and
1676 // what's the right type to promote it to.
1677 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1678 assert(PVT != VT && "Don't know what type to promote to!");
1679
1680 SDLoc DL(Op);
1681 SDNode *N = Op.getNode();
1682 LoadSDNode *LD = cast<LoadSDNode>(N);
1683 EVT MemVT = LD->getMemoryVT();
1685 : LD->getExtensionType();
1686 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1687 LD->getChain(), LD->getBasePtr(),
1688 MemVT, LD->getMemOperand());
1689 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1690
1691 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1692 Result.dump(&DAG); dbgs() << '\n');
1693
1695 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1696
1697 AddToWorklist(Result.getNode());
1698 recursivelyDeleteUnusedNodes(N);
1699 return true;
1700 }
1701
1702 return false;
1703}
1704
1705/// Recursively delete a node which has no uses and any operands for
1706/// which it is the only use.
1707///
1708/// Note that this both deletes the nodes and removes them from the worklist.
1709/// It also adds any nodes who have had a user deleted to the worklist as they
1710/// may now have only one use and subject to other combines.
1711bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1712 if (!N->use_empty())
1713 return false;
1714
1716 Nodes.insert(N);
1717 do {
1718 N = Nodes.pop_back_val();
1719 if (!N)
1720 continue;
1721
1722 if (N->use_empty()) {
1723 for (const SDValue &ChildN : N->op_values())
1724 Nodes.insert(ChildN.getNode());
1725
1726 removeFromWorklist(N);
1727 DAG.DeleteNode(N);
1728 } else {
1729 AddToWorklist(N);
1730 }
1731 } while (!Nodes.empty());
1732 return true;
1733}
1734
1735//===----------------------------------------------------------------------===//
1736// Main DAG Combiner implementation
1737//===----------------------------------------------------------------------===//
1738
1739void DAGCombiner::Run(CombineLevel AtLevel) {
1740 // set the instance variables, so that the various visit routines may use it.
1741 Level = AtLevel;
1742 LegalDAG = Level >= AfterLegalizeDAG;
1743 LegalOperations = Level >= AfterLegalizeVectorOps;
1744 LegalTypes = Level >= AfterLegalizeTypes;
1745
1746 WorklistInserter AddNodes(*this);
1747
1748 // Add all the dag nodes to the worklist.
1749 //
1750 // Note: All nodes are not added to PruningList here, this is because the only
1751 // nodes which can be deleted are those which have no uses and all other nodes
1752 // which would otherwise be added to the worklist by the first call to
1753 // getNextWorklistEntry are already present in it.
1754 for (SDNode &Node : DAG.allnodes())
1755 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1756
1757 // Create a dummy node (which is not added to allnodes), that adds a reference
1758 // to the root node, preventing it from being deleted, and tracking any
1759 // changes of the root.
1760 HandleSDNode Dummy(DAG.getRoot());
1761
1762 // While we have a valid worklist entry node, try to combine it.
1763 while (SDNode *N = getNextWorklistEntry()) {
1764 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1765 // N is deleted from the DAG, since they too may now be dead or may have a
1766 // reduced number of uses, allowing other xforms.
1767 if (recursivelyDeleteUnusedNodes(N))
1768 continue;
1769
1770 WorklistRemover DeadNodes(*this);
1771
1772 // If this combine is running after legalizing the DAG, re-legalize any
1773 // nodes pulled off the worklist.
1774 if (LegalDAG) {
1775 SmallSetVector<SDNode *, 16> UpdatedNodes;
1776 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1777
1778 for (SDNode *LN : UpdatedNodes)
1779 AddToWorklistWithUsers(LN);
1780
1781 if (!NIsValid)
1782 continue;
1783 }
1784
1785 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1786
1787 // Add any operands of the new node which have not yet been combined to the
1788 // worklist as well. getNextWorklistEntry flags nodes that have been
1789 // combined before. Because the worklist uniques things already, this won't
1790 // repeatedly process the same operand.
1791 for (const SDValue &ChildN : N->op_values())
1792 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1793 /*SkipIfCombinedBefore=*/true);
1794
1795 SDValue RV = combine(N);
1796
1797 if (!RV.getNode())
1798 continue;
1799
1800 ++NodesCombined;
1801
1802 // Invalidate cached info.
1803 ChainsWithoutMergeableStores.clear();
1804
1805 // If we get back the same node we passed in, rather than a new node or
1806 // zero, we know that the node must have defined multiple values and
1807 // CombineTo was used. Since CombineTo takes care of the worklist
1808 // mechanics for us, we have no work to do in this case.
1809 if (RV.getNode() == N)
1810 continue;
1811
1812 assert(N->getOpcode() != ISD::DELETED_NODE &&
1813 RV.getOpcode() != ISD::DELETED_NODE &&
1814 "Node was deleted but visit returned new node!");
1815
1816 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1817
1818 if (N->getNumValues() == RV->getNumValues())
1819 DAG.ReplaceAllUsesWith(N, RV.getNode());
1820 else {
1821 assert(N->getValueType(0) == RV.getValueType() &&
1822 N->getNumValues() == 1 && "Type mismatch");
1823 DAG.ReplaceAllUsesWith(N, &RV);
1824 }
1825
1826 // Push the new node and any users onto the worklist. Omit this if the
1827 // new node is the EntryToken (e.g. if a store managed to get optimized
1828 // out), because re-visiting the EntryToken and its users will not uncover
1829 // any additional opportunities, but there may be a large number of such
1830 // users, potentially causing compile time explosion.
1831 if (RV.getOpcode() != ISD::EntryToken)
1832 AddToWorklistWithUsers(RV.getNode());
1833
1834 // Finally, if the node is now dead, remove it from the graph. The node
1835 // may not be dead if the replacement process recursively simplified to
1836 // something else needing this node. This will also take care of adding any
1837 // operands which have lost a user to the worklist.
1838 recursivelyDeleteUnusedNodes(N);
1839 }
1840
1841 // If the root changed (e.g. it was a dead load, update the root).
1842 DAG.setRoot(Dummy.getValue());
1843 DAG.RemoveDeadNodes();
1844}
1845
1846SDValue DAGCombiner::visit(SDNode *N) {
1847 // clang-format off
1848 switch (N->getOpcode()) {
1849 default: break;
1850 case ISD::TokenFactor: return visitTokenFactor(N);
1851 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1852 case ISD::ADD: return visitADD(N);
1853 case ISD::SUB: return visitSUB(N);
1854 case ISD::SADDSAT:
1855 case ISD::UADDSAT: return visitADDSAT(N);
1856 case ISD::SSUBSAT:
1857 case ISD::USUBSAT: return visitSUBSAT(N);
1858 case ISD::ADDC: return visitADDC(N);
1859 case ISD::SADDO:
1860 case ISD::UADDO: return visitADDO(N);
1861 case ISD::SUBC: return visitSUBC(N);
1862 case ISD::SSUBO:
1863 case ISD::USUBO: return visitSUBO(N);
1864 case ISD::ADDE: return visitADDE(N);
1865 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1866 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1867 case ISD::SUBE: return visitSUBE(N);
1868 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1869 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1870 case ISD::SMULFIX:
1871 case ISD::SMULFIXSAT:
1872 case ISD::UMULFIX:
1873 case ISD::UMULFIXSAT: return visitMULFIX(N);
1874 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1875 case ISD::SDIV: return visitSDIV(N);
1876 case ISD::UDIV: return visitUDIV(N);
1877 case ISD::SREM:
1878 case ISD::UREM: return visitREM(N);
1879 case ISD::MULHU: return visitMULHU(N);
1880 case ISD::MULHS: return visitMULHS(N);
1881 case ISD::AVGFLOORS:
1882 case ISD::AVGFLOORU:
1883 case ISD::AVGCEILS:
1884 case ISD::AVGCEILU: return visitAVG(N);
1885 case ISD::ABDS:
1886 case ISD::ABDU: return visitABD(N);
1887 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1888 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1889 case ISD::SMULO:
1890 case ISD::UMULO: return visitMULO(N);
1891 case ISD::SMIN:
1892 case ISD::SMAX:
1893 case ISD::UMIN:
1894 case ISD::UMAX: return visitIMINMAX(N);
1895 case ISD::AND: return visitAND(N);
1896 case ISD::OR: return visitOR(N);
1897 case ISD::XOR: return visitXOR(N);
1898 case ISD::SHL: return visitSHL(N);
1899 case ISD::SRA: return visitSRA(N);
1900 case ISD::SRL: return visitSRL(N);
1901 case ISD::ROTR:
1902 case ISD::ROTL: return visitRotate(N);
1903 case ISD::FSHL:
1904 case ISD::FSHR: return visitFunnelShift(N);
1905 case ISD::SSHLSAT:
1906 case ISD::USHLSAT: return visitSHLSAT(N);
1907 case ISD::ABS: return visitABS(N);
1908 case ISD::BSWAP: return visitBSWAP(N);
1909 case ISD::BITREVERSE: return visitBITREVERSE(N);
1910 case ISD::CTLZ: return visitCTLZ(N);
1911 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1912 case ISD::CTTZ: return visitCTTZ(N);
1913 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1914 case ISD::CTPOP: return visitCTPOP(N);
1915 case ISD::SELECT: return visitSELECT(N);
1916 case ISD::VSELECT: return visitVSELECT(N);
1917 case ISD::SELECT_CC: return visitSELECT_CC(N);
1918 case ISD::SETCC: return visitSETCC(N);
1919 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1920 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1921 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1922 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1923 case ISD::AssertSext:
1924 case ISD::AssertZext: return visitAssertExt(N);
1925 case ISD::AssertAlign: return visitAssertAlign(N);
1926 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1929 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1930 case ISD::TRUNCATE: return visitTRUNCATE(N);
1931 case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
1932 case ISD::BITCAST: return visitBITCAST(N);
1933 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1934 case ISD::FADD: return visitFADD(N);
1935 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1936 case ISD::FSUB: return visitFSUB(N);
1937 case ISD::FMUL: return visitFMUL(N);
1938 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1939 case ISD::FMAD: return visitFMAD(N);
1940 case ISD::FDIV: return visitFDIV(N);
1941 case ISD::FREM: return visitFREM(N);
1942 case ISD::FSQRT: return visitFSQRT(N);
1943 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1944 case ISD::FPOW: return visitFPOW(N);
1945 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1946 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1947 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1948 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1949 case ISD::LROUND:
1950 case ISD::LLROUND:
1951 case ISD::LRINT:
1952 case ISD::LLRINT: return visitXROUND(N);
1953 case ISD::FP_ROUND: return visitFP_ROUND(N);
1954 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1955 case ISD::FNEG: return visitFNEG(N);
1956 case ISD::FABS: return visitFABS(N);
1957 case ISD::FFLOOR: return visitFFLOOR(N);
1958 case ISD::FMINNUM:
1959 case ISD::FMAXNUM:
1960 case ISD::FMINIMUM:
1961 case ISD::FMAXIMUM:
1962 case ISD::FMINIMUMNUM:
1963 case ISD::FMAXIMUMNUM: return visitFMinMax(N);
1964 case ISD::FCEIL: return visitFCEIL(N);
1965 case ISD::FTRUNC: return visitFTRUNC(N);
1966 case ISD::FFREXP: return visitFFREXP(N);
1967 case ISD::BRCOND: return visitBRCOND(N);
1968 case ISD::BR_CC: return visitBR_CC(N);
1969 case ISD::LOAD: return visitLOAD(N);
1970 case ISD::STORE: return visitSTORE(N);
1971 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
1972 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1973 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1974 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1975 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1976 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1977 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1978 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1979 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1980 case ISD::MGATHER: return visitMGATHER(N);
1981 case ISD::MLOAD: return visitMLOAD(N);
1982 case ISD::MSCATTER: return visitMSCATTER(N);
1983 case ISD::MSTORE: return visitMSTORE(N);
1984 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
1985 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
1986 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1987 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1988 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1989 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1990 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
1991 case ISD::FREEZE: return visitFREEZE(N);
1992 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
1993 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1994 case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
1997 case ISD::VECREDUCE_ADD:
1998 case ISD::VECREDUCE_MUL:
1999 case ISD::VECREDUCE_AND:
2000 case ISD::VECREDUCE_OR:
2001 case ISD::VECREDUCE_XOR:
2009 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2010#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2011#include "llvm/IR/VPIntrinsics.def"
2012 return visitVPOp(N);
2013 }
2014 // clang-format on
2015 return SDValue();
2016}
2017
2018SDValue DAGCombiner::combine(SDNode *N) {
2019 if (!DebugCounter::shouldExecute(DAGCombineCounter))
2020 return SDValue();
2021
2022 SDValue RV;
2023 if (!DisableGenericCombines)
2024 RV = visit(N);
2025
2026 // If nothing happened, try a target-specific DAG combine.
2027 if (!RV.getNode()) {
2028 assert(N->getOpcode() != ISD::DELETED_NODE &&
2029 "Node was deleted but visit returned NULL!");
2030
2031 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2032 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2033
2034 // Expose the DAG combiner to the target combiner impls.
2036 DagCombineInfo(DAG, Level, false, this);
2037
2038 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2039 }
2040 }
2041
2042 // If nothing happened still, try promoting the operation.
2043 if (!RV.getNode()) {
2044 switch (N->getOpcode()) {
2045 default: break;
2046 case ISD::ADD:
2047 case ISD::SUB:
2048 case ISD::MUL:
2049 case ISD::AND:
2050 case ISD::OR:
2051 case ISD::XOR:
2052 RV = PromoteIntBinOp(SDValue(N, 0));
2053 break;
2054 case ISD::SHL:
2055 case ISD::SRA:
2056 case ISD::SRL:
2057 RV = PromoteIntShiftOp(SDValue(N, 0));
2058 break;
2059 case ISD::SIGN_EXTEND:
2060 case ISD::ZERO_EXTEND:
2061 case ISD::ANY_EXTEND:
2062 RV = PromoteExtend(SDValue(N, 0));
2063 break;
2064 case ISD::LOAD:
2065 if (PromoteLoad(SDValue(N, 0)))
2066 RV = SDValue(N, 0);
2067 break;
2068 }
2069 }
2070
2071 // If N is a commutative binary node, try to eliminate it if the commuted
2072 // version is already present in the DAG.
2073 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2074 SDValue N0 = N->getOperand(0);
2075 SDValue N1 = N->getOperand(1);
2076
2077 // Constant operands are canonicalized to RHS.
2078 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2079 SDValue Ops[] = {N1, N0};
2080 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2081 N->getFlags());
2082 if (CSENode)
2083 return SDValue(CSENode, 0);
2084 }
2085 }
2086
2087 return RV;
2088}
2089
2090/// Given a node, return its input chain if it has one, otherwise return a null
2091/// sd operand.
2093 if (unsigned NumOps = N->getNumOperands()) {
2094 if (N->getOperand(0).getValueType() == MVT::Other)
2095 return N->getOperand(0);
2096 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2097 return N->getOperand(NumOps-1);
2098 for (unsigned i = 1; i < NumOps-1; ++i)
2099 if (N->getOperand(i).getValueType() == MVT::Other)
2100 return N->getOperand(i);
2101 }
2102 return SDValue();
2103}
2104
2105SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2106 SDValue Operand = N->getOperand(0);
2107 EVT VT = Operand.getValueType();
2108 SDLoc dl(N);
2109
2110 // Canonicalize undef to quiet NaN.
2111 if (Operand.isUndef()) {
2112 APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2113 return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2114 }
2115 return SDValue();
2116}
2117
2118SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2119 // If N has two operands, where one has an input chain equal to the other,
2120 // the 'other' chain is redundant.
2121 if (N->getNumOperands() == 2) {
2122 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2123 return N->getOperand(0);
2124 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2125 return N->getOperand(1);
2126 }
2127
2128 // Don't simplify token factors if optnone.
2129 if (OptLevel == CodeGenOptLevel::None)
2130 return SDValue();
2131
2132 // Don't simplify the token factor if the node itself has too many operands.
2133 if (N->getNumOperands() > TokenFactorInlineLimit)
2134 return SDValue();
2135
2136 // If the sole user is a token factor, we should make sure we have a
2137 // chance to merge them together. This prevents TF chains from inhibiting
2138 // optimizations.
2139 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2140 AddToWorklist(*(N->user_begin()));
2141
2142 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2143 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2145 bool Changed = false; // If we should replace this token factor.
2146
2147 // Start out with this token factor.
2148 TFs.push_back(N);
2149
2150 // Iterate through token factors. The TFs grows when new token factors are
2151 // encountered.
2152 for (unsigned i = 0; i < TFs.size(); ++i) {
2153 // Limit number of nodes to inline, to avoid quadratic compile times.
2154 // We have to add the outstanding Token Factors to Ops, otherwise we might
2155 // drop Ops from the resulting Token Factors.
2156 if (Ops.size() > TokenFactorInlineLimit) {
2157 for (unsigned j = i; j < TFs.size(); j++)
2158 Ops.emplace_back(TFs[j], 0);
2159 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2160 // combiner worklist later.
2161 TFs.resize(i);
2162 break;
2163 }
2164
2165 SDNode *TF = TFs[i];
2166 // Check each of the operands.
2167 for (const SDValue &Op : TF->op_values()) {
2168 switch (Op.getOpcode()) {
2169 case ISD::EntryToken:
2170 // Entry tokens don't need to be added to the list. They are
2171 // redundant.
2172 Changed = true;
2173 break;
2174
2175 case ISD::TokenFactor:
2176 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2177 // Queue up for processing.
2178 TFs.push_back(Op.getNode());
2179 Changed = true;
2180 break;
2181 }
2182 [[fallthrough]];
2183
2184 default:
2185 // Only add if it isn't already in the list.
2186 if (SeenOps.insert(Op.getNode()).second)
2187 Ops.push_back(Op);
2188 else
2189 Changed = true;
2190 break;
2191 }
2192 }
2193 }
2194
2195 // Re-visit inlined Token Factors, to clean them up in case they have been
2196 // removed. Skip the first Token Factor, as this is the current node.
2197 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2198 AddToWorklist(TFs[i]);
2199
2200 // Remove Nodes that are chained to another node in the list. Do so
2201 // by walking up chains breath-first stopping when we've seen
2202 // another operand. In general we must climb to the EntryNode, but we can exit
2203 // early if we find all remaining work is associated with just one operand as
2204 // no further pruning is possible.
2205
2206 // List of nodes to search through and original Ops from which they originate.
2208 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2209 SmallPtrSet<SDNode *, 16> SeenChains;
2210 bool DidPruneOps = false;
2211
2212 unsigned NumLeftToConsider = 0;
2213 for (const SDValue &Op : Ops) {
2214 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2215 OpWorkCount.push_back(1);
2216 }
2217
2218 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2219 // If this is an Op, we can remove the op from the list. Remark any
2220 // search associated with it as from the current OpNumber.
2221 if (SeenOps.contains(Op)) {
2222 Changed = true;
2223 DidPruneOps = true;
2224 unsigned OrigOpNumber = 0;
2225 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2226 OrigOpNumber++;
2227 assert((OrigOpNumber != Ops.size()) &&
2228 "expected to find TokenFactor Operand");
2229 // Re-mark worklist from OrigOpNumber to OpNumber
2230 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2231 if (Worklist[i].second == OrigOpNumber) {
2232 Worklist[i].second = OpNumber;
2233 }
2234 }
2235 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2236 OpWorkCount[OrigOpNumber] = 0;
2237 NumLeftToConsider--;
2238 }
2239 // Add if it's a new chain
2240 if (SeenChains.insert(Op).second) {
2241 OpWorkCount[OpNumber]++;
2242 Worklist.push_back(std::make_pair(Op, OpNumber));
2243 }
2244 };
2245
2246 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2247 // We need at least be consider at least 2 Ops to prune.
2248 if (NumLeftToConsider <= 1)
2249 break;
2250 auto CurNode = Worklist[i].first;
2251 auto CurOpNumber = Worklist[i].second;
2252 assert((OpWorkCount[CurOpNumber] > 0) &&
2253 "Node should not appear in worklist");
2254 switch (CurNode->getOpcode()) {
2255 case ISD::EntryToken:
2256 // Hitting EntryToken is the only way for the search to terminate without
2257 // hitting
2258 // another operand's search. Prevent us from marking this operand
2259 // considered.
2260 NumLeftToConsider++;
2261 break;
2262 case ISD::TokenFactor:
2263 for (const SDValue &Op : CurNode->op_values())
2264 AddToWorklist(i, Op.getNode(), CurOpNumber);
2265 break;
2267 case ISD::LIFETIME_END:
2268 case ISD::CopyFromReg:
2269 case ISD::CopyToReg:
2270 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2271 break;
2272 default:
2273 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2274 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2275 break;
2276 }
2277 OpWorkCount[CurOpNumber]--;
2278 if (OpWorkCount[CurOpNumber] == 0)
2279 NumLeftToConsider--;
2280 }
2281
2282 // If we've changed things around then replace token factor.
2283 if (Changed) {
2285 if (Ops.empty()) {
2286 // The entry token is the only possible outcome.
2287 Result = DAG.getEntryNode();
2288 } else {
2289 if (DidPruneOps) {
2290 SmallVector<SDValue, 8> PrunedOps;
2291 //
2292 for (const SDValue &Op : Ops) {
2293 if (SeenChains.count(Op.getNode()) == 0)
2294 PrunedOps.push_back(Op);
2295 }
2296 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2297 } else {
2298 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2299 }
2300 }
2301 return Result;
2302 }
2303 return SDValue();
2304}
2305
2306/// MERGE_VALUES can always be eliminated.
2307SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2308 WorklistRemover DeadNodes(*this);
2309 // Replacing results may cause a different MERGE_VALUES to suddenly
2310 // be CSE'd with N, and carry its uses with it. Iterate until no
2311 // uses remain, to ensure that the node can be safely deleted.
2312 // First add the users of this node to the work list so that they
2313 // can be tried again once they have new operands.
2314 AddUsersToWorklist(N);
2315 do {
2316 // Do as a single replacement to avoid rewalking use lists.
2317 SmallVector<SDValue, 8> Ops(N->ops());
2318 DAG.ReplaceAllUsesWith(N, Ops.data());
2319 } while (!N->use_empty());
2320 deleteAndRecombine(N);
2321 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2322}
2323
2324/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2325/// ConstantSDNode pointer else nullptr.
2327 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2328 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2329}
2330
2331// isTruncateOf - If N is a truncate of some other value, return true, record
2332// the value being truncated in Op and which of Op's bits are zero/one in Known.
2333// This function computes KnownBits to avoid a duplicated call to
2334// computeKnownBits in the caller.
2336 KnownBits &Known) {
2337 if (N->getOpcode() == ISD::TRUNCATE) {
2338 Op = N->getOperand(0);
2339 Known = DAG.computeKnownBits(Op);
2340 if (N->getFlags().hasNoUnsignedWrap())
2341 Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2342 return true;
2343 }
2344
2345 if (N.getValueType().getScalarType() != MVT::i1 ||
2346 !sd_match(
2348 return false;
2349
2350 Known = DAG.computeKnownBits(Op);
2351 return (Known.Zero | 1).isAllOnes();
2352}
2353
2354/// Return true if 'Use' is a load or a store that uses N as its base pointer
2355/// and that N may be folded in the load / store addressing mode.
2357 const TargetLowering &TLI) {
2358 EVT VT;
2359 unsigned AS;
2360
2361 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2362 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2363 return false;
2364 VT = LD->getMemoryVT();
2365 AS = LD->getAddressSpace();
2366 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2367 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2368 return false;
2369 VT = ST->getMemoryVT();
2370 AS = ST->getAddressSpace();
2371 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2372 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2373 return false;
2374 VT = LD->getMemoryVT();
2375 AS = LD->getAddressSpace();
2376 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2377 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2378 return false;
2379 VT = ST->getMemoryVT();
2380 AS = ST->getAddressSpace();
2381 } else {
2382 return false;
2383 }
2384
2386 if (N->getOpcode() == ISD::ADD) {
2387 AM.HasBaseReg = true;
2388 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2389 if (Offset)
2390 // [reg +/- imm]
2391 AM.BaseOffs = Offset->getSExtValue();
2392 else
2393 // [reg +/- reg]
2394 AM.Scale = 1;
2395 } else if (N->getOpcode() == ISD::SUB) {
2396 AM.HasBaseReg = true;
2397 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2398 if (Offset)
2399 // [reg +/- imm]
2400 AM.BaseOffs = -Offset->getSExtValue();
2401 else
2402 // [reg +/- reg]
2403 AM.Scale = 1;
2404 } else {
2405 return false;
2406 }
2407
2408 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2409 VT.getTypeForEVT(*DAG.getContext()), AS);
2410}
2411
2412/// This inverts a canonicalization in IR that replaces a variable select arm
2413/// with an identity constant. Codegen improves if we re-use the variable
2414/// operand rather than load a constant. This can also be converted into a
2415/// masked vector operation if the target supports it.
2417 bool ShouldCommuteOperands) {
2418 // Match a select as operand 1. The identity constant that we are looking for
2419 // is only valid as operand 1 of a non-commutative binop.
2420 SDValue N0 = N->getOperand(0);
2421 SDValue N1 = N->getOperand(1);
2422 if (ShouldCommuteOperands)
2423 std::swap(N0, N1);
2424
2425 // TODO: Should this apply to scalar select too?
2426 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2427 return SDValue();
2428
2429 // We can't hoist all instructions because of immediate UB (not speculatable).
2430 // For example div/rem by zero.
2432 return SDValue();
2433
2434 unsigned Opcode = N->getOpcode();
2435 EVT VT = N->getValueType(0);
2436 SDValue Cond = N1.getOperand(0);
2437 SDValue TVal = N1.getOperand(1);
2438 SDValue FVal = N1.getOperand(2);
2439
2440 // This transform increases uses of N0, so freeze it to be safe.
2441 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2442 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2443 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2444 SDValue F0 = DAG.getFreeze(N0);
2445 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2446 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2447 }
2448 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2449 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2450 SDValue F0 = DAG.getFreeze(N0);
2451 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2452 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2453 }
2454
2455 return SDValue();
2456}
2457
2458SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2459 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2460 "Unexpected binary operator");
2461
2462 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2463 auto BinOpcode = BO->getOpcode();
2464 EVT VT = BO->getValueType(0);
2465 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2466 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2467 return Sel;
2468
2469 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2470 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2471 return Sel;
2472 }
2473
2474 // Don't do this unless the old select is going away. We want to eliminate the
2475 // binary operator, not replace a binop with a select.
2476 // TODO: Handle ISD::SELECT_CC.
2477 unsigned SelOpNo = 0;
2478 SDValue Sel = BO->getOperand(0);
2479 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2480 SelOpNo = 1;
2481 Sel = BO->getOperand(1);
2482
2483 // Peek through trunc to shift amount type.
2484 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2485 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2486 // This is valid when the truncated bits of x are already zero.
2487 SDValue Op;
2488 KnownBits Known;
2489 if (isTruncateOf(DAG, Sel, Op, Known) &&
2491 Sel = Op;
2492 }
2493 }
2494
2495 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2496 return SDValue();
2497
2498 SDValue CT = Sel.getOperand(1);
2499 if (!isConstantOrConstantVector(CT, true) &&
2501 return SDValue();
2502
2503 SDValue CF = Sel.getOperand(2);
2504 if (!isConstantOrConstantVector(CF, true) &&
2506 return SDValue();
2507
2508 // Bail out if any constants are opaque because we can't constant fold those.
2509 // The exception is "and" and "or" with either 0 or -1 in which case we can
2510 // propagate non constant operands into select. I.e.:
2511 // and (select Cond, 0, -1), X --> select Cond, 0, X
2512 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2513 bool CanFoldNonConst =
2514 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2517
2518 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2519 if (!CanFoldNonConst &&
2520 !isConstantOrConstantVector(CBO, true) &&
2522 return SDValue();
2523
2524 SDLoc DL(Sel);
2525 SDValue NewCT, NewCF;
2526
2527 if (CanFoldNonConst) {
2528 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2529 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2530 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2531 NewCT = CT;
2532 else
2533 NewCT = CBO;
2534
2535 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2536 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2537 NewCF = CF;
2538 else
2539 NewCF = CBO;
2540 } else {
2541 // We have a select-of-constants followed by a binary operator with a
2542 // constant. Eliminate the binop by pulling the constant math into the
2543 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2544 // CBO, CF + CBO
2545 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2546 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2547 if (!NewCT)
2548 return SDValue();
2549
2550 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2551 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2552 if (!NewCF)
2553 return SDValue();
2554 }
2555
2556 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2557 SelectOp->setFlags(BO->getFlags());
2558 return SelectOp;
2559}
2560
2562 SelectionDAG &DAG) {
2563 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2564 "Expecting add or sub");
2565
2566 // Match a constant operand and a zext operand for the math instruction:
2567 // add Z, C
2568 // sub C, Z
2569 bool IsAdd = N->getOpcode() == ISD::ADD;
2570 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2571 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2572 auto *CN = dyn_cast<ConstantSDNode>(C);
2573 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2574 return SDValue();
2575
2576 // Match the zext operand as a setcc of a boolean.
2577 if (Z.getOperand(0).getValueType() != MVT::i1)
2578 return SDValue();
2579
2580 // Match the compare as: setcc (X & 1), 0, eq.
2581 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2583 return SDValue();
2584
2585 // We are adding/subtracting a constant and an inverted low bit. Turn that
2586 // into a subtract/add of the low bit with incremented/decremented constant:
2587 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2588 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2589 EVT VT = C.getValueType();
2590 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2591 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2592 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2593 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2594}
2595
2596// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2597SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2598 SDValue N0 = N->getOperand(0);
2599 EVT VT = N0.getValueType();
2600 SDValue A, B;
2601
2602 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2604 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2605 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2606 }
2607 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2609 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2610 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2611 }
2612 return SDValue();
2613}
2614
2615/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2616/// a shift and add with a different constant.
2618 SelectionDAG &DAG) {
2619 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2620 "Expecting add or sub");
2621
2622 // We need a constant operand for the add/sub, and the other operand is a
2623 // logical shift right: add (srl), C or sub C, (srl).
2624 bool IsAdd = N->getOpcode() == ISD::ADD;
2625 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2626 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2627 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2628 ShiftOp.getOpcode() != ISD::SRL)
2629 return SDValue();
2630
2631 // The shift must be of a 'not' value.
2632 SDValue Not = ShiftOp.getOperand(0);
2633 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2634 return SDValue();
2635
2636 // The shift must be moving the sign bit to the least-significant-bit.
2637 EVT VT = ShiftOp.getValueType();
2638 SDValue ShAmt = ShiftOp.getOperand(1);
2639 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2640 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2641 return SDValue();
2642
2643 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2644 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2645 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2646 if (SDValue NewC = DAG.FoldConstantArithmetic(
2647 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2648 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2649 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2650 Not.getOperand(0), ShAmt);
2651 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2652 }
2653
2654 return SDValue();
2655}
2656
2657static bool
2659 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2660 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2661}
2662
2663/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2664/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2665/// are no common bits set in the operands).
2666SDValue DAGCombiner::visitADDLike(SDNode *N) {
2667 SDValue N0 = N->getOperand(0);
2668 SDValue N1 = N->getOperand(1);
2669 EVT VT = N0.getValueType();
2670 SDLoc DL(N);
2671
2672 // fold (add x, undef) -> undef
2673 if (N0.isUndef())
2674 return N0;
2675 if (N1.isUndef())
2676 return N1;
2677
2678 // fold (add c1, c2) -> c1+c2
2679 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2680 return C;
2681
2682 // canonicalize constant to RHS
2685 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2686
2687 if (areBitwiseNotOfEachother(N0, N1))
2688 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2689
2690 // fold vector ops
2691 if (VT.isVector()) {
2692 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2693 return FoldedVOp;
2694
2695 // fold (add x, 0) -> x, vector edition
2697 return N0;
2698 }
2699
2700 // fold (add x, 0) -> x
2701 if (isNullConstant(N1))
2702 return N0;
2703
2704 if (N0.getOpcode() == ISD::SUB) {
2705 SDValue N00 = N0.getOperand(0);
2706 SDValue N01 = N0.getOperand(1);
2707
2708 // fold ((A-c1)+c2) -> (A+(c2-c1))
2709 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2710 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2711
2712 // fold ((c1-A)+c2) -> (c1+c2)-A
2713 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2714 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2715 }
2716
2717 // add (sext i1 X), 1 -> zext (not i1 X)
2718 // We don't transform this pattern:
2719 // add (zext i1 X), -1 -> sext (not i1 X)
2720 // because most (?) targets generate better code for the zext form.
2721 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2722 isOneOrOneSplat(N1)) {
2723 SDValue X = N0.getOperand(0);
2724 if ((!LegalOperations ||
2725 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2727 X.getScalarValueSizeInBits() == 1) {
2728 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2729 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2730 }
2731 }
2732
2733 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2734 // iff (or x, c0) is equivalent to (add x, c0).
2735 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2736 // iff (xor x, c0) is equivalent to (add x, c0).
2737 if (DAG.isADDLike(N0)) {
2738 SDValue N01 = N0.getOperand(1);
2739 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2740 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2741 }
2742
2743 if (SDValue NewSel = foldBinOpIntoSelect(N))
2744 return NewSel;
2745
2746 // reassociate add
2747 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2748 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2749 return RADD;
2750
2751 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2752 // equivalent to (add x, c).
2753 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2754 // equivalent to (add x, c).
2755 // Do this optimization only when adding c does not introduce instructions
2756 // for adding carries.
2757 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2758 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2759 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2760 // If N0's type does not split or is a sign mask, it does not introduce
2761 // add carry.
2762 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2763 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2766 if (NoAddCarry)
2767 return DAG.getNode(
2768 ISD::ADD, DL, VT,
2769 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2770 N0.getOperand(1));
2771 }
2772 return SDValue();
2773 };
2774 if (SDValue Add = ReassociateAddOr(N0, N1))
2775 return Add;
2776 if (SDValue Add = ReassociateAddOr(N1, N0))
2777 return Add;
2778
2779 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2780 if (SDValue SD =
2781 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2782 return SD;
2783 }
2784
2785 SDValue A, B, C, D;
2786
2787 // fold ((0-A) + B) -> B-A
2788 if (sd_match(N0, m_Neg(m_Value(A))))
2789 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2790
2791 // fold (A + (0-B)) -> A-B
2792 if (sd_match(N1, m_Neg(m_Value(B))))
2793 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2794
2795 // fold (A+(B-A)) -> B
2796 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2797 return B;
2798
2799 // fold ((B-A)+A) -> B
2800 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2801 return B;
2802
2803 // fold ((A-B)+(C-A)) -> (C-B)
2804 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2806 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2807
2808 // fold ((A-B)+(B-C)) -> (A-C)
2809 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2811 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2812
2813 // fold (A+(B-(A+C))) to (B-C)
2814 // fold (A+(B-(C+A))) to (B-C)
2815 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2816 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2817
2818 // fold (A+((B-A)+or-C)) to (B+or-C)
2819 if (sd_match(N1,
2821 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2822 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2823
2824 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2825 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
2826 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
2828 return DAG.getNode(ISD::SUB, DL, VT,
2829 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
2830 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
2831
2832 // fold (add (umax X, C), -C) --> (usubsat X, C)
2833 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2834 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2835 return (!Max && !Op) ||
2836 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2837 };
2838 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2839 /*AllowUndefs*/ true))
2840 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2841 N0.getOperand(1));
2842 }
2843
2845 return SDValue(N, 0);
2846
2847 if (isOneOrOneSplat(N1)) {
2848 // fold (add (xor a, -1), 1) -> (sub 0, a)
2849 if (isBitwiseNot(N0))
2850 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2851 N0.getOperand(0));
2852
2853 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2854 if (N0.getOpcode() == ISD::ADD) {
2855 SDValue A, Xor;
2856
2857 if (isBitwiseNot(N0.getOperand(0))) {
2858 A = N0.getOperand(1);
2859 Xor = N0.getOperand(0);
2860 } else if (isBitwiseNot(N0.getOperand(1))) {
2861 A = N0.getOperand(0);
2862 Xor = N0.getOperand(1);
2863 }
2864
2865 if (Xor)
2866 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2867 }
2868
2869 // Look for:
2870 // add (add x, y), 1
2871 // And if the target does not like this form then turn into:
2872 // sub y, (xor x, -1)
2873 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2874 N0.hasOneUse() &&
2875 // Limit this to after legalization if the add has wrap flags
2876 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2877 !N->getFlags().hasNoSignedWrap()))) {
2878 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2879 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2880 }
2881 }
2882
2883 // (x - y) + -1 -> add (xor y, -1), x
2884 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2885 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
2886 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
2887 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
2888 }
2889
2890 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
2891 // This can help if the inner add has multiple uses.
2892 APInt CM, CA;
2893 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
2894 if (VT.getScalarSizeInBits() <= 64) {
2896 m_ConstInt(CM)))) &&
2898 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2900 // If all the inputs are nuw, the outputs can be nuw. If all the input
2901 // are _also_ nsw the outputs can be too.
2902 if (N->getFlags().hasNoUnsignedWrap() &&
2903 N0->getFlags().hasNoUnsignedWrap() &&
2906 if (N->getFlags().hasNoSignedWrap() &&
2907 N0->getFlags().hasNoSignedWrap() &&
2910 }
2911 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2912 DAG.getConstant(CM, DL, VT), Flags);
2913 return DAG.getNode(
2914 ISD::ADD, DL, VT, Mul,
2915 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2916 }
2917 // Also look in case there is an intermediate add.
2918 if (sd_match(N0, m_OneUse(m_Add(
2920 m_ConstInt(CM))),
2921 m_Value(B)))) &&
2923 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2925 // If all the inputs are nuw, the outputs can be nuw. If all the input
2926 // are _also_ nsw the outputs can be too.
2927 SDValue OMul =
2928 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
2929 if (N->getFlags().hasNoUnsignedWrap() &&
2930 N0->getFlags().hasNoUnsignedWrap() &&
2931 OMul->getFlags().hasNoUnsignedWrap() &&
2932 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2934 if (N->getFlags().hasNoSignedWrap() &&
2935 N0->getFlags().hasNoSignedWrap() &&
2936 OMul->getFlags().hasNoSignedWrap() &&
2937 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
2939 }
2940 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2941 DAG.getConstant(CM, DL, VT), Flags);
2942 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
2943 return DAG.getNode(
2944 ISD::ADD, DL, VT, Add,
2945 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2946 }
2947 }
2948 }
2949
2950 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2951 return Combined;
2952
2953 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2954 return Combined;
2955
2956 return SDValue();
2957}
2958
2959// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2960SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2961 SDValue N0 = N->getOperand(0);
2962 EVT VT = N0.getValueType();
2963 SDValue A, B;
2964
2965 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
2967 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2968 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2969 }
2970 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
2972 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2973 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2974 }
2975
2976 return SDValue();
2977}
2978
2979SDValue DAGCombiner::visitADD(SDNode *N) {
2980 SDValue N0 = N->getOperand(0);
2981 SDValue N1 = N->getOperand(1);
2982 EVT VT = N0.getValueType();
2983 SDLoc DL(N);
2984
2985 if (SDValue Combined = visitADDLike(N))
2986 return Combined;
2987
2988 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2989 return V;
2990
2991 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2992 return V;
2993
2994 // Try to match AVGFLOOR fixedwidth pattern
2995 if (SDValue V = foldAddToAvg(N, DL))
2996 return V;
2997
2998 // fold (a+b) -> (a|b) iff a and b share no bits.
2999 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
3000 DAG.haveNoCommonBitsSet(N0, N1))
3001 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
3002
3003 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
3004 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3005 const APInt &C0 = N0->getConstantOperandAPInt(0);
3006 const APInt &C1 = N1->getConstantOperandAPInt(0);
3007 return DAG.getVScale(DL, VT, C0 + C1);
3008 }
3009
3010 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3011 if (N0.getOpcode() == ISD::ADD &&
3012 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3013 N1.getOpcode() == ISD::VSCALE) {
3014 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3015 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3016 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3017 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3018 }
3019
3020 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3021 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3022 N1.getOpcode() == ISD::STEP_VECTOR) {
3023 const APInt &C0 = N0->getConstantOperandAPInt(0);
3024 const APInt &C1 = N1->getConstantOperandAPInt(0);
3025 APInt NewStep = C0 + C1;
3026 return DAG.getStepVector(DL, VT, NewStep);
3027 }
3028
3029 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3030 if (N0.getOpcode() == ISD::ADD &&
3032 N1.getOpcode() == ISD::STEP_VECTOR) {
3033 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3034 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3035 APInt NewStep = SV0 + SV1;
3036 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3037 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3038 }
3039
3040 return SDValue();
3041}
3042
3043SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3044 unsigned Opcode = N->getOpcode();
3045 SDValue N0 = N->getOperand(0);
3046 SDValue N1 = N->getOperand(1);
3047 EVT VT = N0.getValueType();
3048 bool IsSigned = Opcode == ISD::SADDSAT;
3049 SDLoc DL(N);
3050
3051 // fold (add_sat x, undef) -> -1
3052 if (N0.isUndef() || N1.isUndef())
3053 return DAG.getAllOnesConstant(DL, VT);
3054
3055 // fold (add_sat c1, c2) -> c3
3056 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3057 return C;
3058
3059 // canonicalize constant to RHS
3062 return DAG.getNode(Opcode, DL, VT, N1, N0);
3063
3064 // fold vector ops
3065 if (VT.isVector()) {
3066 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3067 return FoldedVOp;
3068
3069 // fold (add_sat x, 0) -> x, vector edition
3071 return N0;
3072 }
3073
3074 // fold (add_sat x, 0) -> x
3075 if (isNullConstant(N1))
3076 return N0;
3077
3078 // If it cannot overflow, transform into an add.
3079 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3080 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3081
3082 return SDValue();
3083}
3084
3086 bool ForceCarryReconstruction = false) {
3087 bool Masked = false;
3088
3089 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3090 while (true) {
3091 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3092 V = V.getOperand(0);
3093 continue;
3094 }
3095
3096 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3097 if (ForceCarryReconstruction)
3098 return V;
3099
3100 Masked = true;
3101 V = V.getOperand(0);
3102 continue;
3103 }
3104
3105 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3106 return V;
3107
3108 break;
3109 }
3110
3111 // If this is not a carry, return.
3112 if (V.getResNo() != 1)
3113 return SDValue();
3114
3115 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3116 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3117 return SDValue();
3118
3119 EVT VT = V->getValueType(0);
3120 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3121 return SDValue();
3122
3123 // If the result is masked, then no matter what kind of bool it is we can
3124 // return. If it isn't, then we need to make sure the bool type is either 0 or
3125 // 1 and not other values.
3126 if (Masked ||
3127 TLI.getBooleanContents(V.getValueType()) ==
3129 return V;
3130
3131 return SDValue();
3132}
3133
3134/// Given the operands of an add/sub operation, see if the 2nd operand is a
3135/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3136/// the opcode and bypass the mask operation.
3137static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3138 SelectionDAG &DAG, const SDLoc &DL) {
3139 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3140 N1 = N1.getOperand(0);
3141
3142 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3143 return SDValue();
3144
3145 EVT VT = N0.getValueType();
3146 SDValue N10 = N1.getOperand(0);
3147 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3148 N10 = N10.getOperand(0);
3149
3150 if (N10.getValueType() != VT)
3151 return SDValue();
3152
3153 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3154 return SDValue();
3155
3156 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3157 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3158 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3159}
3160
3161/// Helper for doing combines based on N0 and N1 being added to each other.
3162SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3163 SDNode *LocReference) {
3164 EVT VT = N0.getValueType();
3165 SDLoc DL(LocReference);
3166
3167 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3168 SDValue Y, N;
3169 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3170 return DAG.getNode(ISD::SUB, DL, VT, N0,
3171 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3172
3173 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3174 return V;
3175
3176 // Look for:
3177 // add (add x, 1), y
3178 // And if the target does not like this form then turn into:
3179 // sub y, (xor x, -1)
3180 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3181 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3182 // Limit this to after legalization if the add has wrap flags
3183 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3184 !N0->getFlags().hasNoSignedWrap()))) {
3185 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3186 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3187 }
3188
3189 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3190 // Hoist one-use subtraction by non-opaque constant:
3191 // (x - C) + y -> (x + y) - C
3192 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3193 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3194 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3195 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3196 }
3197 // Hoist one-use subtraction from non-opaque constant:
3198 // (C - x) + y -> (y - x) + C
3199 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3200 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3201 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3202 }
3203 }
3204
3205 // add (mul x, C), x -> mul x, C+1
3206 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3207 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3208 N0.hasOneUse()) {
3209 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3210 DAG.getConstant(1, DL, VT));
3211 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3212 }
3213
3214 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3215 // rather than 'add 0/-1' (the zext should get folded).
3216 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3217 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3218 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3220 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3221 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3222 }
3223
3224 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3225 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3226 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3227 if (TN->getVT() == MVT::i1) {
3228 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3229 DAG.getConstant(1, DL, VT));
3230 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3231 }
3232 }
3233
3234 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3235 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3236 N1.getResNo() == 0)
3237 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3238 N0, N1.getOperand(0), N1.getOperand(2));
3239
3240 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3242 if (SDValue Carry = getAsCarry(TLI, N1))
3243 return DAG.getNode(ISD::UADDO_CARRY, DL,
3244 DAG.getVTList(VT, Carry.getValueType()), N0,
3245 DAG.getConstant(0, DL, VT), Carry);
3246
3247 return SDValue();
3248}
3249
3250SDValue DAGCombiner::visitADDC(SDNode *N) {
3251 SDValue N0 = N->getOperand(0);
3252 SDValue N1 = N->getOperand(1);
3253 EVT VT = N0.getValueType();
3254 SDLoc DL(N);
3255
3256 // If the flag result is dead, turn this into an ADD.
3257 if (!N->hasAnyUseOfValue(1))
3258 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3259 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3260
3261 // canonicalize constant to RHS.
3262 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3263 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3264 if (N0C && !N1C)
3265 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3266
3267 // fold (addc x, 0) -> x + no carry out
3268 if (isNullConstant(N1))
3269 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3270 DL, MVT::Glue));
3271
3272 // If it cannot overflow, transform into an add.
3274 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3275 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3276
3277 return SDValue();
3278}
3279
3280/**
3281 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3282 * then the flip also occurs if computing the inverse is the same cost.
3283 * This function returns an empty SDValue in case it cannot flip the boolean
3284 * without increasing the cost of the computation. If you want to flip a boolean
3285 * no matter what, use DAG.getLogicalNOT.
3286 */
3288 const TargetLowering &TLI,
3289 bool Force) {
3290 if (Force && isa<ConstantSDNode>(V))
3291 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3292
3293 if (V.getOpcode() != ISD::XOR)
3294 return SDValue();
3295
3296 if (DAG.isBoolConstant(V.getOperand(1)) == true)
3297 return V.getOperand(0);
3298 if (Force && isConstOrConstSplat(V.getOperand(1), false))
3299 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3300 return SDValue();
3301}
3302
3303SDValue DAGCombiner::visitADDO(SDNode *N) {
3304 SDValue N0 = N->getOperand(0);
3305 SDValue N1 = N->getOperand(1);
3306 EVT VT = N0.getValueType();
3307 bool IsSigned = (ISD::SADDO == N->getOpcode());
3308
3309 EVT CarryVT = N->getValueType(1);
3310 SDLoc DL(N);
3311
3312 // If the flag result is dead, turn this into an ADD.
3313 if (!N->hasAnyUseOfValue(1))
3314 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3315 DAG.getUNDEF(CarryVT));
3316
3317 // canonicalize constant to RHS.
3320 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3321
3322 // fold (addo x, 0) -> x + no carry out
3323 if (isNullOrNullSplat(N1))
3324 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3325
3326 // If it cannot overflow, transform into an add.
3327 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3328 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3329 DAG.getConstant(0, DL, CarryVT));
3330
3331 if (IsSigned) {
3332 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3333 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3334 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3335 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3336 } else {
3337 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3338 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3339 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3340 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3341 return CombineTo(
3342 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3343 }
3344
3345 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3346 return Combined;
3347
3348 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3349 return Combined;
3350 }
3351
3352 return SDValue();
3353}
3354
3355SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3356 EVT VT = N0.getValueType();
3357 if (VT.isVector())
3358 return SDValue();
3359
3360 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3361 // If Y + 1 cannot overflow.
3362 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3363 SDValue Y = N1.getOperand(0);
3364 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3366 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3367 N1.getOperand(2));
3368 }
3369
3370 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3372 if (SDValue Carry = getAsCarry(TLI, N1))
3373 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3374 DAG.getConstant(0, SDLoc(N), VT), Carry);
3375
3376 return SDValue();
3377}
3378
3379SDValue DAGCombiner::visitADDE(SDNode *N) {
3380 SDValue N0 = N->getOperand(0);
3381 SDValue N1 = N->getOperand(1);
3382 SDValue CarryIn = N->getOperand(2);
3383
3384 // canonicalize constant to RHS
3385 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3386 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3387 if (N0C && !N1C)
3388 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3389 N1, N0, CarryIn);
3390
3391 // fold (adde x, y, false) -> (addc x, y)
3392 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3393 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3394
3395 return SDValue();
3396}
3397
3398SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3399 SDValue N0 = N->getOperand(0);
3400 SDValue N1 = N->getOperand(1);
3401 SDValue CarryIn = N->getOperand(2);
3402 SDLoc DL(N);
3403
3404 // canonicalize constant to RHS
3405 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3406 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3407 if (N0C && !N1C)
3408 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3409
3410 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3411 if (isNullConstant(CarryIn)) {
3412 if (!LegalOperations ||
3413 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3414 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3415 }
3416
3417 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3418 if (isNullConstant(N0) && isNullConstant(N1)) {
3419 EVT VT = N0.getValueType();
3420 EVT CarryVT = CarryIn.getValueType();
3421 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3422 AddToWorklist(CarryExt.getNode());
3423 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3424 DAG.getConstant(1, DL, VT)),
3425 DAG.getConstant(0, DL, CarryVT));
3426 }
3427
3428 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3429 return Combined;
3430
3431 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3432 return Combined;
3433
3434 // We want to avoid useless duplication.
3435 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3436 // not a binary operation, this is not really possible to leverage this
3437 // existing mechanism for it. However, if more operations require the same
3438 // deduplication logic, then it may be worth generalize.
3439 SDValue Ops[] = {N1, N0, CarryIn};
3440 SDNode *CSENode =
3441 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3442 if (CSENode)
3443 return SDValue(CSENode, 0);
3444
3445 return SDValue();
3446}
3447
3448/**
3449 * If we are facing some sort of diamond carry propagation pattern try to
3450 * break it up to generate something like:
3451 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3452 *
3453 * The end result is usually an increase in operation required, but because the
3454 * carry is now linearized, other transforms can kick in and optimize the DAG.
3455 *
3456 * Patterns typically look something like
3457 * (uaddo A, B)
3458 * / \
3459 * Carry Sum
3460 * | \
3461 * | (uaddo_carry *, 0, Z)
3462 * | /
3463 * \ Carry
3464 * | /
3465 * (uaddo_carry X, *, *)
3466 *
3467 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3468 * produce a combine with a single path for carry propagation.
3469 */
3471 SelectionDAG &DAG, SDValue X,
3472 SDValue Carry0, SDValue Carry1,
3473 SDNode *N) {
3474 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3475 return SDValue();
3476 if (Carry1.getOpcode() != ISD::UADDO)
3477 return SDValue();
3478
3479 SDValue Z;
3480
3481 /**
3482 * First look for a suitable Z. It will present itself in the form of
3483 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3484 */
3485 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3486 isNullConstant(Carry0.getOperand(1))) {
3487 Z = Carry0.getOperand(2);
3488 } else if (Carry0.getOpcode() == ISD::UADDO &&
3489 isOneConstant(Carry0.getOperand(1))) {
3490 EVT VT = Carry0->getValueType(1);
3491 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3492 } else {
3493 // We couldn't find a suitable Z.
3494 return SDValue();
3495 }
3496
3497
3498 auto cancelDiamond = [&](SDValue A,SDValue B) {
3499 SDLoc DL(N);
3500 SDValue NewY =
3501 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3502 Combiner.AddToWorklist(NewY.getNode());
3503 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3504 DAG.getConstant(0, DL, X.getValueType()),
3505 NewY.getValue(1));
3506 };
3507
3508 /**
3509 * (uaddo A, B)
3510 * |
3511 * Sum
3512 * |
3513 * (uaddo_carry *, 0, Z)
3514 */
3515 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3516 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3517 }
3518
3519 /**
3520 * (uaddo_carry A, 0, Z)
3521 * |
3522 * Sum
3523 * |
3524 * (uaddo *, B)
3525 */
3526 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3527 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3528 }
3529
3530 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3531 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3532 }
3533
3534 return SDValue();
3535}
3536
3537// If we are facing some sort of diamond carry/borrow in/out pattern try to
3538// match patterns like:
3539//
3540// (uaddo A, B) CarryIn
3541// | \ |
3542// | \ |
3543// PartialSum PartialCarryOutX /
3544// | | /
3545// | ____|____________/
3546// | / |
3547// (uaddo *, *) \________
3548// | \ \
3549// | \ |
3550// | PartialCarryOutY |
3551// | \ |
3552// | \ /
3553// AddCarrySum | ______/
3554// | /
3555// CarryOut = (or *, *)
3556//
3557// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3558//
3559// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3560//
3561// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3562// with a single path for carry/borrow out propagation.
3564 SDValue N0, SDValue N1, SDNode *N) {
3565 SDValue Carry0 = getAsCarry(TLI, N0);
3566 if (!Carry0)
3567 return SDValue();
3568 SDValue Carry1 = getAsCarry(TLI, N1);
3569 if (!Carry1)
3570 return SDValue();
3571
3572 unsigned Opcode = Carry0.getOpcode();
3573 if (Opcode != Carry1.getOpcode())
3574 return SDValue();
3575 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3576 return SDValue();
3577 // Guarantee identical type of CarryOut
3578 EVT CarryOutType = N->getValueType(0);
3579 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3580 CarryOutType != Carry1.getValue(1).getValueType())
3581 return SDValue();
3582
3583 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3584 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3585 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3586 std::swap(Carry0, Carry1);
3587
3588 // Check if nodes are connected in expected way.
3589 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3590 Carry1.getOperand(1) != Carry0.getValue(0))
3591 return SDValue();
3592
3593 // The carry in value must be on the righthand side for subtraction.
3594 unsigned CarryInOperandNum =
3595 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3596 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3597 return SDValue();
3598 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3599
3600 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3601 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3602 return SDValue();
3603
3604 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3605 CarryIn = getAsCarry(TLI, CarryIn, true);
3606 if (!CarryIn)
3607 return SDValue();
3608
3609 SDLoc DL(N);
3610 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3611 Carry1->getValueType(0));
3612 SDValue Merged =
3613 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3614 Carry0.getOperand(1), CarryIn);
3615
3616 // Please note that because we have proven that the result of the UADDO/USUBO
3617 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3618 // therefore prove that if the first UADDO/USUBO overflows, the second
3619 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3620 // maximum value.
3621 //
3622 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3623 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3624 //
3625 // This is important because it means that OR and XOR can be used to merge
3626 // carry flags; and that AND can return a constant zero.
3627 //
3628 // TODO: match other operations that can merge flags (ADD, etc)
3629 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3630 if (N->getOpcode() == ISD::AND)
3631 return DAG.getConstant(0, DL, CarryOutType);
3632 return Merged.getValue(1);
3633}
3634
3635SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3636 SDValue CarryIn, SDNode *N) {
3637 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3638 // carry.
3639 if (isBitwiseNot(N0))
3640 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3641 SDLoc DL(N);
3642 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3643 N0.getOperand(0), NotC);
3644 return CombineTo(
3645 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3646 }
3647
3648 // Iff the flag result is dead:
3649 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3650 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3651 // or the dependency between the instructions.
3652 if ((N0.getOpcode() == ISD::ADD ||
3653 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3654 N0.getValue(1) != CarryIn)) &&
3655 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3656 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3657 N0.getOperand(0), N0.getOperand(1), CarryIn);
3658
3659 /**
3660 * When one of the uaddo_carry argument is itself a carry, we may be facing
3661 * a diamond carry propagation. In which case we try to transform the DAG
3662 * to ensure linear carry propagation if that is possible.
3663 */
3664 if (auto Y = getAsCarry(TLI, N1)) {
3665 // Because both are carries, Y and Z can be swapped.
3666 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3667 return R;
3668 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3669 return R;
3670 }
3671
3672 return SDValue();
3673}
3674
3675SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3676 SDValue CarryIn, SDNode *N) {
3677 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3678 if (isBitwiseNot(N0)) {
3679 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3680 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3681 N0.getOperand(0), NotC);
3682 }
3683
3684 return SDValue();
3685}
3686
3687SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3688 SDValue N0 = N->getOperand(0);
3689 SDValue N1 = N->getOperand(1);
3690 SDValue CarryIn = N->getOperand(2);
3691 SDLoc DL(N);
3692
3693 // canonicalize constant to RHS
3694 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3695 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3696 if (N0C && !N1C)
3697 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3698
3699 // fold (saddo_carry x, y, false) -> (saddo x, y)
3700 if (isNullConstant(CarryIn)) {
3701 if (!LegalOperations ||
3702 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3703 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3704 }
3705
3706 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3707 return Combined;
3708
3709 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3710 return Combined;
3711
3712 return SDValue();
3713}
3714
3715// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3716// clamp/truncation if necessary.
3717static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3718 SDValue RHS, SelectionDAG &DAG,
3719 const SDLoc &DL) {
3720 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3721 "Illegal truncation");
3722
3723 if (DstVT == SrcVT)
3724 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3725
3726 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3727 // clamping RHS.
3729 DstVT.getScalarSizeInBits());
3730 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3731 return SDValue();
3732
3733 SDValue SatLimit =
3735 DstVT.getScalarSizeInBits()),
3736 DL, SrcVT);
3737 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3738 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3739 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3740 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3741}
3742
3743// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3744// usubsat(a,b), optionally as a truncated type.
3745SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3746 if (N->getOpcode() != ISD::SUB ||
3747 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3748 return SDValue();
3749
3750 EVT SubVT = N->getValueType(0);
3751 SDValue Op0 = N->getOperand(0);
3752 SDValue Op1 = N->getOperand(1);
3753
3754 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3755 // they may be converted to usubsat(a,b).
3756 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3757 SDValue MaxLHS = Op0.getOperand(0);
3758 SDValue MaxRHS = Op0.getOperand(1);
3759 if (MaxLHS == Op1)
3760 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3761 if (MaxRHS == Op1)
3762 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3763 }
3764
3765 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3766 SDValue MinLHS = Op1.getOperand(0);
3767 SDValue MinRHS = Op1.getOperand(1);
3768 if (MinLHS == Op0)
3769 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3770 if (MinRHS == Op0)
3771 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3772 }
3773
3774 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3775 if (Op1.getOpcode() == ISD::TRUNCATE &&
3776 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3777 Op1.getOperand(0).hasOneUse()) {
3778 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3779 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3780 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3781 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3782 DAG, DL);
3783 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3784 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3785 DAG, DL);
3786 }
3787
3788 return SDValue();
3789}
3790
3791// Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
3792// counting leading ones. Broadly, it replaces the substraction with a left
3793// shift.
3794//
3795// * DAG Legalisation Pattern:
3796//
3797// (sub (ctlz (zeroextend (not Src)))
3798// BitWidthDiff)
3799//
3800// if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
3801// -->
3802//
3803// (ctlz_zero_undef (not (shl (anyextend Src)
3804// BitWidthDiff)))
3805//
3806// * Type Legalisation Pattern:
3807//
3808// (sub (ctlz (and (xor Src XorMask)
3809// AndMask))
3810// BitWidthDiff)
3811//
3812// if AndMask has only trailing ones
3813// and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
3814// and XorMask has more trailing ones than AndMask
3815// -->
3816//
3817// (ctlz_zero_undef (not (shl Src BitWidthDiff)))
3818template <class MatchContextClass>
3820 const SDLoc DL(N);
3821 SDValue N0 = N->getOperand(0);
3822 EVT VT = N0.getValueType();
3823 unsigned BitWidth = VT.getScalarSizeInBits();
3824
3825 MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
3826
3827 APInt AndMask;
3828 APInt XorMask;
3829 APInt BitWidthDiff;
3830
3831 SDValue CtlzOp;
3832 SDValue Src;
3833
3834 if (!sd_context_match(
3835 N, Matcher, m_Sub(m_Ctlz(m_Value(CtlzOp)), m_ConstInt(BitWidthDiff))))
3836 return SDValue();
3837
3838 if (sd_context_match(CtlzOp, Matcher, m_ZExt(m_Not(m_Value(Src))))) {
3839 // DAG Legalisation Pattern:
3840 // (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
3841 if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
3842 return SDValue();
3843
3844 Src = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Src);
3845 } else if (sd_context_match(CtlzOp, Matcher,
3846 m_And(m_Xor(m_Value(Src), m_ConstInt(XorMask)),
3847 m_ConstInt(AndMask)))) {
3848 // Type Legalisation Pattern:
3849 // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
3850 unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
3851 if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
3852 return SDValue();
3853 } else
3854 return SDValue();
3855
3856 SDValue ShiftConst = DAG.getShiftAmountConstant(BitWidthDiff, VT, DL);
3857 SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
3858 SDValue Not =
3859 Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
3860
3861 return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
3862}
3863
3864// Since it may not be valid to emit a fold to zero for vector initializers
3865// check if we can before folding.
3866static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3867 SelectionDAG &DAG, bool LegalOperations) {
3868 if (!VT.isVector())
3869 return DAG.getConstant(0, DL, VT);
3870 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3871 return DAG.getConstant(0, DL, VT);
3872 return SDValue();
3873}
3874
3875SDValue DAGCombiner::visitSUB(SDNode *N) {
3876 SDValue N0 = N->getOperand(0);
3877 SDValue N1 = N->getOperand(1);
3878 EVT VT = N0.getValueType();
3879 unsigned BitWidth = VT.getScalarSizeInBits();
3880 SDLoc DL(N);
3881
3882 auto PeekThroughFreeze = [](SDValue N) {
3883 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3884 return N->getOperand(0);
3885 return N;
3886 };
3887
3888 if (SDValue V = foldSubCtlzNot<EmptyMatchContext>(N, DAG))
3889 return V;
3890
3891 // fold (sub x, x) -> 0
3892 // FIXME: Refactor this and xor and other similar operations together.
3893 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3894 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3895
3896 // fold (sub c1, c2) -> c3
3897 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3898 return C;
3899
3900 // fold vector ops
3901 if (VT.isVector()) {
3902 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3903 return FoldedVOp;
3904
3905 // fold (sub x, 0) -> x, vector edition
3907 return N0;
3908 }
3909
3910 if (SDValue NewSel = foldBinOpIntoSelect(N))
3911 return NewSel;
3912
3913 // fold (sub x, c) -> (add x, -c)
3915 return DAG.getNode(ISD::ADD, DL, VT, N0,
3916 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3917
3918 if (isNullOrNullSplat(N0)) {
3919 // Right-shifting everything out but the sign bit followed by negation is
3920 // the same as flipping arithmetic/logical shift type without the negation:
3921 // -(X >>u 31) -> (X >>s 31)
3922 // -(X >>s 31) -> (X >>u 31)
3923 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3925 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3926 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3927 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3928 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3929 }
3930 }
3931
3932 // 0 - X --> 0 if the sub is NUW.
3933 if (N->getFlags().hasNoUnsignedWrap())
3934 return N0;
3935
3937 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3938 // N1 must be 0 because negating the minimum signed value is undefined.
3939 if (N->getFlags().hasNoSignedWrap())
3940 return N0;
3941
3942 // 0 - X --> X if X is 0 or the minimum signed value.
3943 return N1;
3944 }
3945
3946 // Convert 0 - abs(x).
3947 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3949 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3950 return Result;
3951
3952 // Similar to the previous rule, but this time targeting an expanded abs.
3953 // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
3954 // as well as
3955 // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
3956 // Note that these two are applicable to both signed and unsigned min/max.
3957 SDValue X;
3958 SDValue S0;
3959 auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
3960 if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
3961 m_UMax(m_Value(X), NegPat),
3962 m_SMin(m_Value(X), NegPat),
3963 m_UMin(m_Value(X), NegPat))))) {
3964 unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
3965 if (hasOperation(NewOpc, VT))
3966 return DAG.getNode(NewOpc, DL, VT, X, S0);
3967 }
3968
3969 // Fold neg(splat(neg(x)) -> splat(x)
3970 if (VT.isVector()) {
3971 SDValue N1S = DAG.getSplatValue(N1, true);
3972 if (N1S && N1S.getOpcode() == ISD::SUB &&
3973 isNullConstant(N1S.getOperand(0)))
3974 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3975 }
3976 }
3977
3978 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3980 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3981
3982 // fold (A - (0-B)) -> A+B
3983 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3984 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3985
3986 // fold A-(A-B) -> B
3987 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3988 return N1.getOperand(1);
3989
3990 // fold (A+B)-A -> B
3991 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3992 return N0.getOperand(1);
3993
3994 // fold (A+B)-B -> A
3995 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3996 return N0.getOperand(0);
3997
3998 // fold (A+C1)-C2 -> A+(C1-C2)
3999 if (N0.getOpcode() == ISD::ADD) {
4000 SDValue N01 = N0.getOperand(1);
4001 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
4002 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
4003 }
4004
4005 // fold C2-(A+C1) -> (C2-C1)-A
4006 if (N1.getOpcode() == ISD::ADD) {
4007 SDValue N11 = N1.getOperand(1);
4008 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
4009 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
4010 }
4011
4012 // fold (A-C1)-C2 -> A-(C1+C2)
4013 if (N0.getOpcode() == ISD::SUB) {
4014 SDValue N01 = N0.getOperand(1);
4015 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
4016 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
4017 }
4018
4019 // fold (c1-A)-c2 -> (c1-c2)-A
4020 if (N0.getOpcode() == ISD::SUB) {
4021 SDValue N00 = N0.getOperand(0);
4022 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
4023 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
4024 }
4025
4026 SDValue A, B, C;
4027
4028 // fold ((A+(B+C))-B) -> A+C
4029 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
4030 return DAG.getNode(ISD::ADD, DL, VT, A, C);
4031
4032 // fold ((A+(B-C))-B) -> A-C
4033 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
4034 return DAG.getNode(ISD::SUB, DL, VT, A, C);
4035
4036 // fold ((A-(B-C))-C) -> A-B
4037 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
4038 return DAG.getNode(ISD::SUB, DL, VT, A, B);
4039
4040 // fold (A-(B-C)) -> A+(C-B)
4041 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
4042 return DAG.getNode(ISD::ADD, DL, VT, N0,
4043 DAG.getNode(ISD::SUB, DL, VT, C, B));
4044
4045 // A - (A & B) -> A & (~B)
4046 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
4047 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
4048 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
4049
4050 // fold (A - (-B * C)) -> (A + (B * C))
4051 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
4052 return DAG.getNode(ISD::ADD, DL, VT, N0,
4053 DAG.getNode(ISD::MUL, DL, VT, B, C));
4054
4055 // If either operand of a sub is undef, the result is undef
4056 if (N0.isUndef())
4057 return N0;
4058 if (N1.isUndef())
4059 return N1;
4060
4061 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4062 return V;
4063
4064 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4065 return V;
4066
4067 // Try to match AVGCEIL fixedwidth pattern
4068 if (SDValue V = foldSubToAvg(N, DL))
4069 return V;
4070
4071 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
4072 return V;
4073
4074 if (SDValue V = foldSubToUSubSat(VT, N, DL))
4075 return V;
4076
4077 // (A - B) - 1 -> add (xor B, -1), A