LLVM 18.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/Metadata.h"
60#include "llvm/Support/Debug.h"
67#include <algorithm>
68#include <cassert>
69#include <cstdint>
70#include <functional>
71#include <iterator>
72#include <optional>
73#include <string>
74#include <tuple>
75#include <utility>
76#include <variant>
77
78using namespace llvm;
79
80#define DEBUG_TYPE "dagcombine"
81
82STATISTIC(NodesCombined , "Number of dag nodes combined");
83STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
84STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
85STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
86STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
87STATISTIC(SlicedLoads, "Number of load sliced");
88STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
89
90static cl::opt<bool>
91CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92 cl::desc("Enable DAG combiner's use of IR alias analysis"));
93
94static cl::opt<bool>
95UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96 cl::desc("Enable DAG combiner's use of TBAA"));
97
98#ifndef NDEBUG
100CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101 cl::desc("Only use DAG-combiner alias analysis in this"
102 " function"));
103#endif
104
105/// Hidden option to stress test load slicing, i.e., when this option
106/// is enabled, load slicing bypasses most of its profitability guards.
107static cl::opt<bool>
108StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109 cl::desc("Bypass the profitability model of load slicing"),
110 cl::init(false));
111
112static cl::opt<bool>
113 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114 cl::desc("DAG combiner may split indexing from loads"));
115
116static cl::opt<bool>
117 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118 cl::desc("DAG combiner enable merging multiple stores "
119 "into a wider store"));
120
122 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123 cl::desc("Limit the number of operands to inline for Token Factors"));
124
126 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127 cl::desc("Limit the number of times for the same StoreNode and RootNode "
128 "to bail out in store merging dependence check"));
129
131 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132 cl::desc("DAG combiner enable reducing the width of load/op/store "
133 "sequence"));
134
136 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137 cl::desc("DAG combiner enable load/<replace bytes>/store with "
138 "a narrower store"));
139
141 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
142 cl::desc(
143 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
144
145namespace {
146
147 class DAGCombiner {
148 SelectionDAG &DAG;
149 const TargetLowering &TLI;
150 const SelectionDAGTargetInfo *STI;
152 CodeGenOptLevel OptLevel;
153 bool LegalDAG = false;
154 bool LegalOperations = false;
155 bool LegalTypes = false;
156 bool ForCodeSize;
157 bool DisableGenericCombines;
158
159 /// Worklist of all of the nodes that need to be simplified.
160 ///
161 /// This must behave as a stack -- new nodes to process are pushed onto the
162 /// back and when processing we pop off of the back.
163 ///
164 /// The worklist will not contain duplicates but may contain null entries
165 /// due to nodes being deleted from the underlying DAG.
167
168 /// Mapping from an SDNode to its position on the worklist.
169 ///
170 /// This is used to find and remove nodes from the worklist (by nulling
171 /// them) when they are deleted from the underlying DAG. It relies on
172 /// stable indices of nodes within the worklist.
174
175 /// This records all nodes attempted to be added to the worklist since we
176 /// considered a new worklist entry. As we keep do not add duplicate nodes
177 /// in the worklist, this is different from the tail of the worklist.
179
180 /// Set of nodes which have been combined (at least once).
181 ///
182 /// This is used to allow us to reliably add any operands of a DAG node
183 /// which have not yet been combined to the worklist.
184 SmallPtrSet<SDNode *, 32> CombinedNodes;
185
186 /// Map from candidate StoreNode to the pair of RootNode and count.
187 /// The count is used to track how many times we have seen the StoreNode
188 /// with the same RootNode bail out in dependence check. If we have seen
189 /// the bail out for the same pair many times over a limit, we won't
190 /// consider the StoreNode with the same RootNode as store merging
191 /// candidate again.
193
194 // AA - Used for DAG load/store alias analysis.
195 AliasAnalysis *AA;
196
197 /// When an instruction is simplified, add all users of the instruction to
198 /// the work lists because they might get more simplified now.
199 void AddUsersToWorklist(SDNode *N) {
200 for (SDNode *Node : N->uses())
201 AddToWorklist(Node);
202 }
203
204 /// Convenient shorthand to add a node and all of its user to the worklist.
205 void AddToWorklistWithUsers(SDNode *N) {
206 AddUsersToWorklist(N);
207 AddToWorklist(N);
208 }
209
210 // Prune potentially dangling nodes. This is called after
211 // any visit to a node, but should also be called during a visit after any
212 // failed combine which may have created a DAG node.
213 void clearAddedDanglingWorklistEntries() {
214 // Check any nodes added to the worklist to see if they are prunable.
215 while (!PruningList.empty()) {
216 auto *N = PruningList.pop_back_val();
217 if (N->use_empty())
218 recursivelyDeleteUnusedNodes(N);
219 }
220 }
221
222 SDNode *getNextWorklistEntry() {
223 // Before we do any work, remove nodes that are not in use.
224 clearAddedDanglingWorklistEntries();
225 SDNode *N = nullptr;
226 // The Worklist holds the SDNodes in order, but it may contain null
227 // entries.
228 while (!N && !Worklist.empty()) {
229 N = Worklist.pop_back_val();
230 }
231
232 if (N) {
233 bool GoodWorklistEntry = WorklistMap.erase(N);
234 (void)GoodWorklistEntry;
235 assert(GoodWorklistEntry &&
236 "Found a worklist entry without a corresponding map entry!");
237 }
238 return N;
239 }
240
241 /// Call the node-specific routine that folds each particular type of node.
242 SDValue visit(SDNode *N);
243
244 public:
245 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
246 : DAG(D), TLI(D.getTargetLoweringInfo()),
247 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
248 ForCodeSize = DAG.shouldOptForSize();
249 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
250
251 MaximumLegalStoreInBits = 0;
252 // We use the minimum store size here, since that's all we can guarantee
253 // for the scalable vector types.
254 for (MVT VT : MVT::all_valuetypes())
255 if (EVT(VT).isSimple() && VT != MVT::Other &&
256 TLI.isTypeLegal(EVT(VT)) &&
257 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
258 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
259 }
260
261 void ConsiderForPruning(SDNode *N) {
262 // Mark this for potential pruning.
263 PruningList.insert(N);
264 }
265
266 /// Add to the worklist making sure its instance is at the back (next to be
267 /// processed.)
268 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true) {
269 assert(N->getOpcode() != ISD::DELETED_NODE &&
270 "Deleted Node added to Worklist");
271
272 // Skip handle nodes as they can't usefully be combined and confuse the
273 // zero-use deletion strategy.
274 if (N->getOpcode() == ISD::HANDLENODE)
275 return;
276
277 if (IsCandidateForPruning)
278 ConsiderForPruning(N);
279
280 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
281 Worklist.push_back(N);
282 }
283
284 /// Remove all instances of N from the worklist.
285 void removeFromWorklist(SDNode *N) {
286 CombinedNodes.erase(N);
287 PruningList.remove(N);
288 StoreRootCountMap.erase(N);
289
290 auto It = WorklistMap.find(N);
291 if (It == WorklistMap.end())
292 return; // Not in the worklist.
293
294 // Null out the entry rather than erasing it to avoid a linear operation.
295 Worklist[It->second] = nullptr;
296 WorklistMap.erase(It);
297 }
298
299 void deleteAndRecombine(SDNode *N);
300 bool recursivelyDeleteUnusedNodes(SDNode *N);
301
302 /// Replaces all uses of the results of one DAG node with new values.
303 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
304 bool AddTo = true);
305
306 /// Replaces all uses of the results of one DAG node with new values.
307 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
308 return CombineTo(N, &Res, 1, AddTo);
309 }
310
311 /// Replaces all uses of the results of one DAG node with new values.
312 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
313 bool AddTo = true) {
314 SDValue To[] = { Res0, Res1 };
315 return CombineTo(N, To, 2, AddTo);
316 }
317
318 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
319
320 private:
321 unsigned MaximumLegalStoreInBits;
322
323 /// Check the specified integer node value to see if it can be simplified or
324 /// if things it uses can be simplified by bit propagation.
325 /// If so, return true.
326 bool SimplifyDemandedBits(SDValue Op) {
327 unsigned BitWidth = Op.getScalarValueSizeInBits();
329 return SimplifyDemandedBits(Op, DemandedBits);
330 }
331
332 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
333 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
334 KnownBits Known;
335 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
336 return false;
337
338 // Revisit the node.
339 AddToWorklist(Op.getNode());
340
341 CommitTargetLoweringOpt(TLO);
342 return true;
343 }
344
345 /// Check the specified vector node value to see if it can be simplified or
346 /// if things it uses can be simplified as it only uses some of the
347 /// elements. If so, return true.
348 bool SimplifyDemandedVectorElts(SDValue Op) {
349 // TODO: For now just pretend it cannot be simplified.
350 if (Op.getValueType().isScalableVector())
351 return false;
352
353 unsigned NumElts = Op.getValueType().getVectorNumElements();
354 APInt DemandedElts = APInt::getAllOnes(NumElts);
355 return SimplifyDemandedVectorElts(Op, DemandedElts);
356 }
357
358 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
359 const APInt &DemandedElts,
360 bool AssumeSingleUse = false);
361 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
362 bool AssumeSingleUse = false);
363
364 bool CombineToPreIndexedLoadStore(SDNode *N);
365 bool CombineToPostIndexedLoadStore(SDNode *N);
366 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
367 bool SliceUpLoad(SDNode *N);
368
369 // Looks up the chain to find a unique (unaliased) store feeding the passed
370 // load. If no such store is found, returns a nullptr.
371 // Note: This will look past a CALLSEQ_START if the load is chained to it so
372 // so that it can find stack stores for byval params.
373 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
374 // Scalars have size 0 to distinguish from singleton vectors.
375 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
376 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
377 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
378
379 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
380 /// load.
381 ///
382 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
383 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
384 /// \param EltNo index of the vector element to load.
385 /// \param OriginalLoad load that EVE came from to be replaced.
386 /// \returns EVE on success SDValue() on failure.
387 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
388 SDValue EltNo,
389 LoadSDNode *OriginalLoad);
390 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
391 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
392 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
393 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
394 SDValue PromoteIntBinOp(SDValue Op);
395 SDValue PromoteIntShiftOp(SDValue Op);
396 SDValue PromoteExtend(SDValue Op);
397 bool PromoteLoad(SDValue Op);
398
399 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
400 SDValue RHS, SDValue True, SDValue False,
402
403 /// Call the node-specific routine that knows how to fold each
404 /// particular type of node. If that doesn't do anything, try the
405 /// target-specific DAG combines.
406 SDValue combine(SDNode *N);
407
408 // Visitation implementation - Implement dag node combining for different
409 // node types. The semantics are as follows:
410 // Return Value:
411 // SDValue.getNode() == 0 - No change was made
412 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
413 // otherwise - N should be replaced by the returned Operand.
414 //
415 SDValue visitTokenFactor(SDNode *N);
416 SDValue visitMERGE_VALUES(SDNode *N);
417 SDValue visitADD(SDNode *N);
418 SDValue visitADDLike(SDNode *N);
419 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
420 SDValue visitSUB(SDNode *N);
421 SDValue visitADDSAT(SDNode *N);
422 SDValue visitSUBSAT(SDNode *N);
423 SDValue visitADDC(SDNode *N);
424 SDValue visitADDO(SDNode *N);
425 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
426 SDValue visitSUBC(SDNode *N);
427 SDValue visitSUBO(SDNode *N);
428 SDValue visitADDE(SDNode *N);
429 SDValue visitUADDO_CARRY(SDNode *N);
430 SDValue visitSADDO_CARRY(SDNode *N);
431 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
432 SDNode *N);
433 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
434 SDNode *N);
435 SDValue visitSUBE(SDNode *N);
436 SDValue visitUSUBO_CARRY(SDNode *N);
437 SDValue visitSSUBO_CARRY(SDNode *N);
438 SDValue visitMUL(SDNode *N);
439 SDValue visitMULFIX(SDNode *N);
440 SDValue useDivRem(SDNode *N);
441 SDValue visitSDIV(SDNode *N);
442 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
443 SDValue visitUDIV(SDNode *N);
444 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
445 SDValue visitREM(SDNode *N);
446 SDValue visitMULHU(SDNode *N);
447 SDValue visitMULHS(SDNode *N);
448 SDValue visitAVG(SDNode *N);
449 SDValue visitABD(SDNode *N);
450 SDValue visitSMUL_LOHI(SDNode *N);
451 SDValue visitUMUL_LOHI(SDNode *N);
452 SDValue visitMULO(SDNode *N);
453 SDValue visitIMINMAX(SDNode *N);
454 SDValue visitAND(SDNode *N);
455 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
456 SDValue visitOR(SDNode *N);
457 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
458 SDValue visitXOR(SDNode *N);
459 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
460 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
461 SDValue visitSHL(SDNode *N);
462 SDValue visitSRA(SDNode *N);
463 SDValue visitSRL(SDNode *N);
464 SDValue visitFunnelShift(SDNode *N);
465 SDValue visitSHLSAT(SDNode *N);
466 SDValue visitRotate(SDNode *N);
467 SDValue visitABS(SDNode *N);
468 SDValue visitBSWAP(SDNode *N);
469 SDValue visitBITREVERSE(SDNode *N);
470 SDValue visitCTLZ(SDNode *N);
471 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
472 SDValue visitCTTZ(SDNode *N);
473 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
474 SDValue visitCTPOP(SDNode *N);
475 SDValue visitSELECT(SDNode *N);
476 SDValue visitVSELECT(SDNode *N);
477 SDValue visitSELECT_CC(SDNode *N);
478 SDValue visitSETCC(SDNode *N);
479 SDValue visitSETCCCARRY(SDNode *N);
480 SDValue visitSIGN_EXTEND(SDNode *N);
481 SDValue visitZERO_EXTEND(SDNode *N);
482 SDValue visitANY_EXTEND(SDNode *N);
483 SDValue visitAssertExt(SDNode *N);
484 SDValue visitAssertAlign(SDNode *N);
485 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
486 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
487 SDValue visitTRUNCATE(SDNode *N);
488 SDValue visitBITCAST(SDNode *N);
489 SDValue visitFREEZE(SDNode *N);
490 SDValue visitBUILD_PAIR(SDNode *N);
491 SDValue visitFADD(SDNode *N);
492 SDValue visitVP_FADD(SDNode *N);
493 SDValue visitVP_FSUB(SDNode *N);
494 SDValue visitSTRICT_FADD(SDNode *N);
495 SDValue visitFSUB(SDNode *N);
496 SDValue visitFMUL(SDNode *N);
497 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
498 SDValue visitFDIV(SDNode *N);
499 SDValue visitFREM(SDNode *N);
500 SDValue visitFSQRT(SDNode *N);
501 SDValue visitFCOPYSIGN(SDNode *N);
502 SDValue visitFPOW(SDNode *N);
503 SDValue visitSINT_TO_FP(SDNode *N);
504 SDValue visitUINT_TO_FP(SDNode *N);
505 SDValue visitFP_TO_SINT(SDNode *N);
506 SDValue visitFP_TO_UINT(SDNode *N);
507 SDValue visitFP_ROUND(SDNode *N);
508 SDValue visitFP_EXTEND(SDNode *N);
509 SDValue visitFNEG(SDNode *N);
510 SDValue visitFABS(SDNode *N);
511 SDValue visitFCEIL(SDNode *N);
512 SDValue visitFTRUNC(SDNode *N);
513 SDValue visitFFREXP(SDNode *N);
514 SDValue visitFFLOOR(SDNode *N);
515 SDValue visitFMinMax(SDNode *N);
516 SDValue visitBRCOND(SDNode *N);
517 SDValue visitBR_CC(SDNode *N);
518 SDValue visitLOAD(SDNode *N);
519
520 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
521 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
522 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
523
524 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
525
526 SDValue visitSTORE(SDNode *N);
527 SDValue visitLIFETIME_END(SDNode *N);
528 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
529 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
530 SDValue visitBUILD_VECTOR(SDNode *N);
531 SDValue visitCONCAT_VECTORS(SDNode *N);
532 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
533 SDValue visitVECTOR_SHUFFLE(SDNode *N);
534 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
535 SDValue visitINSERT_SUBVECTOR(SDNode *N);
536 SDValue visitMLOAD(SDNode *N);
537 SDValue visitMSTORE(SDNode *N);
538 SDValue visitMGATHER(SDNode *N);
539 SDValue visitMSCATTER(SDNode *N);
540 SDValue visitVPGATHER(SDNode *N);
541 SDValue visitVPSCATTER(SDNode *N);
542 SDValue visitVP_STRIDED_LOAD(SDNode *N);
543 SDValue visitVP_STRIDED_STORE(SDNode *N);
544 SDValue visitFP_TO_FP16(SDNode *N);
545 SDValue visitFP16_TO_FP(SDNode *N);
546 SDValue visitFP_TO_BF16(SDNode *N);
547 SDValue visitVECREDUCE(SDNode *N);
548 SDValue visitVPOp(SDNode *N);
549 SDValue visitGET_FPENV_MEM(SDNode *N);
550 SDValue visitSET_FPENV_MEM(SDNode *N);
551
552 template <class MatchContextClass>
553 SDValue visitFADDForFMACombine(SDNode *N);
554 template <class MatchContextClass>
555 SDValue visitFSUBForFMACombine(SDNode *N);
556 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
557
558 SDValue XformToShuffleWithZero(SDNode *N);
559 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
560 const SDLoc &DL,
561 SDNode *N,
562 SDValue N0,
563 SDValue N1);
564 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
565 SDValue N1, SDNodeFlags Flags);
566 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
567 SDValue N1, SDNodeFlags Flags);
568 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
569 EVT VT, SDValue N0, SDValue N1,
570 SDNodeFlags Flags = SDNodeFlags());
571
572 SDValue visitShiftByConstant(SDNode *N);
573
574 SDValue foldSelectOfConstants(SDNode *N);
575 SDValue foldVSelectOfConstants(SDNode *N);
576 SDValue foldBinOpIntoSelect(SDNode *BO);
577 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
578 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
579 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
580 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
582 bool NotExtCompare = false);
583 SDValue convertSelectOfFPConstantsToLoadOffset(
584 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
586 SDValue foldSignChangeInBitcast(SDNode *N);
587 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
589 SDValue foldSelectOfBinops(SDNode *N);
590 SDValue foldSextSetcc(SDNode *N);
591 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
592 const SDLoc &DL);
593 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
594 SDValue foldABSToABD(SDNode *N);
595 SDValue unfoldMaskedMerge(SDNode *N);
596 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
597 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
598 const SDLoc &DL, bool foldBooleans);
599 SDValue rebuildSetCC(SDValue N);
600
601 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
602 SDValue &CC, bool MatchStrict = false) const;
603 bool isOneUseSetCC(SDValue N) const;
604
605 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
606 unsigned HiOp);
607 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
608 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
609 const TargetLowering &TLI);
610
611 SDValue CombineExtLoad(SDNode *N);
612 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
613 SDValue combineRepeatedFPDivisors(SDNode *N);
614 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
615 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
616 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
617 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
618 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
619 SDValue BuildSDIV(SDNode *N);
620 SDValue BuildSDIVPow2(SDNode *N);
621 SDValue BuildUDIV(SDNode *N);
622 SDValue BuildSREMPow2(SDNode *N);
623 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
624 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
625 bool KnownNeverZero = false,
626 bool InexpensiveOnly = false,
627 std::optional<EVT> OutVT = std::nullopt);
628 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
629 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
630 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
631 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
632 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
633 SDNodeFlags Flags, bool Reciprocal);
634 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
635 SDNodeFlags Flags, bool Reciprocal);
636 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
637 bool DemandHighBits = true);
638 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
639 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
640 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
641 unsigned PosOpcode, unsigned NegOpcode,
642 const SDLoc &DL);
643 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
644 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
645 unsigned PosOpcode, unsigned NegOpcode,
646 const SDLoc &DL);
647 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
648 SDValue MatchLoadCombine(SDNode *N);
649 SDValue mergeTruncStores(StoreSDNode *N);
650 SDValue reduceLoadWidth(SDNode *N);
651 SDValue ReduceLoadOpStoreWidth(SDNode *N);
653 SDValue TransformFPLoadStorePair(SDNode *N);
654 SDValue convertBuildVecZextToZext(SDNode *N);
655 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
656 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
657 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
658 SDValue reduceBuildVecToShuffle(SDNode *N);
659 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
660 ArrayRef<int> VectorMask, SDValue VecIn1,
661 SDValue VecIn2, unsigned LeftIdx,
662 bool DidSplitVec);
663 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
664
665 /// Walk up chain skipping non-aliasing memory nodes,
666 /// looking for aliasing nodes and adding them to the Aliases vector.
667 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
668 SmallVectorImpl<SDValue> &Aliases);
669
670 /// Return true if there is any possibility that the two addresses overlap.
671 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
672
673 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
674 /// chain (aliasing node.)
675 SDValue FindBetterChain(SDNode *N, SDValue Chain);
676
677 /// Try to replace a store and any possibly adjacent stores on
678 /// consecutive chains with better chains. Return true only if St is
679 /// replaced.
680 ///
681 /// Notice that other chains may still be replaced even if the function
682 /// returns false.
683 bool findBetterNeighborChains(StoreSDNode *St);
684
685 // Helper for findBetterNeighborChains. Walk up store chain add additional
686 // chained stores that do not overlap and can be parallelized.
687 bool parallelizeChainedStores(StoreSDNode *St);
688
689 /// Holds a pointer to an LSBaseSDNode as well as information on where it
690 /// is located in a sequence of memory operations connected by a chain.
691 struct MemOpLink {
692 // Ptr to the mem node.
693 LSBaseSDNode *MemNode;
694
695 // Offset from the base ptr.
696 int64_t OffsetFromBase;
697
698 MemOpLink(LSBaseSDNode *N, int64_t Offset)
699 : MemNode(N), OffsetFromBase(Offset) {}
700 };
701
702 // Classify the origin of a stored value.
703 enum class StoreSource { Unknown, Constant, Extract, Load };
704 StoreSource getStoreSource(SDValue StoreVal) {
705 switch (StoreVal.getOpcode()) {
706 case ISD::Constant:
707 case ISD::ConstantFP:
708 return StoreSource::Constant;
712 return StoreSource::Constant;
713 return StoreSource::Unknown;
716 return StoreSource::Extract;
717 case ISD::LOAD:
718 return StoreSource::Load;
719 default:
720 return StoreSource::Unknown;
721 }
722 }
723
724 /// This is a helper function for visitMUL to check the profitability
725 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
726 /// MulNode is the original multiply, AddNode is (add x, c1),
727 /// and ConstNode is c2.
728 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
729 SDValue ConstNode);
730
731 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
732 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
733 /// the type of the loaded value to be extended.
734 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
735 EVT LoadResultTy, EVT &ExtVT);
736
737 /// Helper function to calculate whether the given Load/Store can have its
738 /// width reduced to ExtVT.
739 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
740 EVT &MemVT, unsigned ShAmt = 0);
741
742 /// Used by BackwardsPropagateMask to find suitable loads.
743 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
744 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
745 ConstantSDNode *Mask, SDNode *&NodeToMask);
746 /// Attempt to propagate a given AND node back to load leaves so that they
747 /// can be combined into narrow loads.
748 bool BackwardsPropagateMask(SDNode *N);
749
750 /// Helper function for mergeConsecutiveStores which merges the component
751 /// store chains.
752 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
753 unsigned NumStores);
754
755 /// Helper function for mergeConsecutiveStores which checks if all the store
756 /// nodes have the same underlying object. We can still reuse the first
757 /// store's pointer info if all the stores are from the same object.
758 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
759
760 /// This is a helper function for mergeConsecutiveStores. When the source
761 /// elements of the consecutive stores are all constants or all extracted
762 /// vector elements, try to merge them into one larger store introducing
763 /// bitcasts if necessary. \return True if a merged store was created.
764 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
765 EVT MemVT, unsigned NumStores,
766 bool IsConstantSrc, bool UseVector,
767 bool UseTrunc);
768
769 /// This is a helper function for mergeConsecutiveStores. Stores that
770 /// potentially may be merged with St are placed in StoreNodes. RootNode is
771 /// a chain predecessor to all store candidates.
772 void getStoreMergeCandidates(StoreSDNode *St,
773 SmallVectorImpl<MemOpLink> &StoreNodes,
774 SDNode *&Root);
775
776 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
777 /// have indirect dependency through their operands. RootNode is the
778 /// predecessor to all stores calculated by getStoreMergeCandidates and is
779 /// used to prune the dependency check. \return True if safe to merge.
780 bool checkMergeStoreCandidatesForDependencies(
781 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
782 SDNode *RootNode);
783
784 /// This is a helper function for mergeConsecutiveStores. Given a list of
785 /// store candidates, find the first N that are consecutive in memory.
786 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
787 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
788 int64_t ElementSizeBytes) const;
789
790 /// This is a helper function for mergeConsecutiveStores. It is used for
791 /// store chains that are composed entirely of constant values.
792 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
793 unsigned NumConsecutiveStores,
794 EVT MemVT, SDNode *Root, bool AllowVectors);
795
796 /// This is a helper function for mergeConsecutiveStores. It is used for
797 /// store chains that are composed entirely of extracted vector elements.
798 /// When extracting multiple vector elements, try to store them in one
799 /// vector store rather than a sequence of scalar stores.
800 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
801 unsigned NumConsecutiveStores, EVT MemVT,
802 SDNode *Root);
803
804 /// This is a helper function for mergeConsecutiveStores. It is used for
805 /// store chains that are composed entirely of loaded values.
806 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
807 unsigned NumConsecutiveStores, EVT MemVT,
808 SDNode *Root, bool AllowVectors,
809 bool IsNonTemporalStore, bool IsNonTemporalLoad);
810
811 /// Merge consecutive store operations into a wide store.
812 /// This optimization uses wide integers or vectors when possible.
813 /// \return true if stores were merged.
814 bool mergeConsecutiveStores(StoreSDNode *St);
815
816 /// Try to transform a truncation where C is a constant:
817 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
818 ///
819 /// \p N needs to be a truncation and its first operand an AND. Other
820 /// requirements are checked by the function (e.g. that trunc is
821 /// single-use) and if missed an empty SDValue is returned.
822 SDValue distributeTruncateThroughAnd(SDNode *N);
823
824 /// Helper function to determine whether the target supports operation
825 /// given by \p Opcode for type \p VT, that is, whether the operation
826 /// is legal or custom before legalizing operations, and whether is
827 /// legal (but not custom) after legalization.
828 bool hasOperation(unsigned Opcode, EVT VT) {
829 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
830 }
831
832 public:
833 /// Runs the dag combiner on all nodes in the work list
834 void Run(CombineLevel AtLevel);
835
836 SelectionDAG &getDAG() const { return DAG; }
837
838 /// Returns a type large enough to hold any valid shift amount - before type
839 /// legalization these can be huge.
840 EVT getShiftAmountTy(EVT LHSTy) {
841 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
842 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
843 }
844
845 /// This method returns true if we are running before type legalization or
846 /// if the specified VT is legal.
847 bool isTypeLegal(const EVT &VT) {
848 if (!LegalTypes) return true;
849 return TLI.isTypeLegal(VT);
850 }
851
852 /// Convenience wrapper around TargetLowering::getSetCCResultType
853 EVT getSetCCResultType(EVT VT) const {
854 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
855 }
856
857 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
858 SDValue OrigLoad, SDValue ExtLoad,
859 ISD::NodeType ExtType);
860 };
861
862/// This class is a DAGUpdateListener that removes any deleted
863/// nodes from the worklist.
864class WorklistRemover : public SelectionDAG::DAGUpdateListener {
865 DAGCombiner &DC;
866
867public:
868 explicit WorklistRemover(DAGCombiner &dc)
869 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
870
871 void NodeDeleted(SDNode *N, SDNode *E) override {
872 DC.removeFromWorklist(N);
873 }
874};
875
876class WorklistInserter : public SelectionDAG::DAGUpdateListener {
877 DAGCombiner &DC;
878
879public:
880 explicit WorklistInserter(DAGCombiner &dc)
881 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
882
883 // FIXME: Ideally we could add N to the worklist, but this causes exponential
884 // compile time costs in large DAGs, e.g. Halide.
885 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
886};
887
888class EmptyMatchContext {
889 SelectionDAG &DAG;
890 const TargetLowering &TLI;
891
892public:
893 EmptyMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
894 : DAG(DAG), TLI(TLI) {}
895
896 bool match(SDValue OpN, unsigned Opcode) const {
897 return Opcode == OpN->getOpcode();
898 }
899
900 // Same as SelectionDAG::getNode().
901 template <typename... ArgT> SDValue getNode(ArgT &&...Args) {
902 return DAG.getNode(std::forward<ArgT>(Args)...);
903 }
904
905 bool isOperationLegalOrCustom(unsigned Op, EVT VT,
906 bool LegalOnly = false) const {
907 return TLI.isOperationLegalOrCustom(Op, VT, LegalOnly);
908 }
909};
910
911class VPMatchContext {
912 SelectionDAG &DAG;
913 const TargetLowering &TLI;
914 SDValue RootMaskOp;
915 SDValue RootVectorLenOp;
916
917public:
918 VPMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
919 : DAG(DAG), TLI(TLI), RootMaskOp(), RootVectorLenOp() {
920 assert(Root->isVPOpcode());
921 if (auto RootMaskPos = ISD::getVPMaskIdx(Root->getOpcode()))
922 RootMaskOp = Root->getOperand(*RootMaskPos);
923
924 if (auto RootVLenPos =
926 RootVectorLenOp = Root->getOperand(*RootVLenPos);
927 }
928
929 /// whether \p OpVal is a node that is functionally compatible with the
930 /// NodeType \p Opc
931 bool match(SDValue OpVal, unsigned Opc) const {
932 if (!OpVal->isVPOpcode())
933 return OpVal->getOpcode() == Opc;
934
935 auto BaseOpc = ISD::getBaseOpcodeForVP(OpVal->getOpcode(),
936 !OpVal->getFlags().hasNoFPExcept());
937 if (BaseOpc != Opc)
938 return false;
939
940 // Make sure the mask of OpVal is true mask or is same as Root's.
941 unsigned VPOpcode = OpVal->getOpcode();
942 if (auto MaskPos = ISD::getVPMaskIdx(VPOpcode)) {
943 SDValue MaskOp = OpVal.getOperand(*MaskPos);
944 if (RootMaskOp != MaskOp &&
946 return false;
947 }
948
949 // Make sure the EVL of OpVal is same as Root's.
950 if (auto VLenPos = ISD::getVPExplicitVectorLengthIdx(VPOpcode))
951 if (RootVectorLenOp != OpVal.getOperand(*VLenPos))
952 return false;
953 return true;
954 }
955
956 // Specialize based on number of operands.
957 // TODO emit VP intrinsics where MaskOp/VectorLenOp != null
958 // SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return
959 // DAG.getNode(Opcode, DL, VT); }
960 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand) {
961 unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
962 assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
963 ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
964 return DAG.getNode(VPOpcode, DL, VT,
965 {Operand, RootMaskOp, RootVectorLenOp});
966 }
967
968 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
969 SDValue N2) {
970 unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
971 assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
972 ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
973 return DAG.getNode(VPOpcode, DL, VT,
974 {N1, N2, RootMaskOp, RootVectorLenOp});
975 }
976
977 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
978 SDValue N2, SDValue N3) {
979 unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
980 assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
981 ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
982 return DAG.getNode(VPOpcode, DL, VT,
983 {N1, N2, N3, RootMaskOp, RootVectorLenOp});
984 }
985
986 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
987 SDNodeFlags Flags) {
988 unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
989 assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
990 ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
991 return DAG.getNode(VPOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp},
992 Flags);
993 }
994
995 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
996 SDValue N2, SDNodeFlags Flags) {
997 unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
998 assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
999 ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
1000 return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp},
1001 Flags);
1002 }
1003
1004 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
1005 SDValue N2, SDValue N3, SDNodeFlags Flags) {
1006 unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
1007 assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
1008 ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
1009 return DAG.getNode(VPOpcode, DL, VT,
1010 {N1, N2, N3, RootMaskOp, RootVectorLenOp}, Flags);
1011 }
1012
1013 bool isOperationLegalOrCustom(unsigned Op, EVT VT,
1014 bool LegalOnly = false) const {
1015 unsigned VPOp = ISD::getVPForBaseOpcode(Op);
1016 return TLI.isOperationLegalOrCustom(VPOp, VT, LegalOnly);
1017 }
1018};
1019
1020} // end anonymous namespace
1021
1022//===----------------------------------------------------------------------===//
1023// TargetLowering::DAGCombinerInfo implementation
1024//===----------------------------------------------------------------------===//
1025
1027 ((DAGCombiner*)DC)->AddToWorklist(N);
1028}
1029
1031CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
1032 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
1033}
1034
1036CombineTo(SDNode *N, SDValue Res, bool AddTo) {
1037 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
1038}
1039
1041CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
1042 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
1043}
1044
1047 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
1048}
1049
1052 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
1053}
1054
1055//===----------------------------------------------------------------------===//
1056// Helper Functions
1057//===----------------------------------------------------------------------===//
1058
1059void DAGCombiner::deleteAndRecombine(SDNode *N) {
1060 removeFromWorklist(N);
1061
1062 // If the operands of this node are only used by the node, they will now be
1063 // dead. Make sure to re-visit them and recursively delete dead nodes.
1064 for (const SDValue &Op : N->ops())
1065 // For an operand generating multiple values, one of the values may
1066 // become dead allowing further simplification (e.g. split index
1067 // arithmetic from an indexed load).
1068 if (Op->hasOneUse() || Op->getNumValues() > 1)
1069 AddToWorklist(Op.getNode());
1070
1071 DAG.DeleteNode(N);
1072}
1073
1074// APInts must be the same size for most operations, this helper
1075// function zero extends the shorter of the pair so that they match.
1076// We provide an Offset so that we can create bitwidths that won't overflow.
1077static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
1078 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
1079 LHS = LHS.zext(Bits);
1080 RHS = RHS.zext(Bits);
1081}
1082
1083// Return true if this node is a setcc, or is a select_cc
1084// that selects between the target values used for true and false, making it
1085// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
1086// the appropriate nodes based on the type of node we are checking. This
1087// simplifies life a bit for the callers.
1088bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
1089 SDValue &CC, bool MatchStrict) const {
1090 if (N.getOpcode() == ISD::SETCC) {
1091 LHS = N.getOperand(0);
1092 RHS = N.getOperand(1);
1093 CC = N.getOperand(2);
1094 return true;
1095 }
1096
1097 if (MatchStrict &&
1098 (N.getOpcode() == ISD::STRICT_FSETCC ||
1099 N.getOpcode() == ISD::STRICT_FSETCCS)) {
1100 LHS = N.getOperand(1);
1101 RHS = N.getOperand(2);
1102 CC = N.getOperand(3);
1103 return true;
1104 }
1105
1106 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
1107 !TLI.isConstFalseVal(N.getOperand(3)))
1108 return false;
1109
1110 if (TLI.getBooleanContents(N.getValueType()) ==
1112 return false;
1113
1114 LHS = N.getOperand(0);
1115 RHS = N.getOperand(1);
1116 CC = N.getOperand(4);
1117 return true;
1118}
1119
1120/// Return true if this is a SetCC-equivalent operation with only one use.
1121/// If this is true, it allows the users to invert the operation for free when
1122/// it is profitable to do so.
1123bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1124 SDValue N0, N1, N2;
1125 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1126 return true;
1127 return false;
1128}
1129
1131 if (!ScalarTy.isSimple())
1132 return false;
1133
1134 uint64_t MaskForTy = 0ULL;
1135 switch (ScalarTy.getSimpleVT().SimpleTy) {
1136 case MVT::i8:
1137 MaskForTy = 0xFFULL;
1138 break;
1139 case MVT::i16:
1140 MaskForTy = 0xFFFFULL;
1141 break;
1142 case MVT::i32:
1143 MaskForTy = 0xFFFFFFFFULL;
1144 break;
1145 default:
1146 return false;
1147 break;
1148 }
1149
1150 APInt Val;
1151 if (ISD::isConstantSplatVector(N, Val))
1152 return Val.getLimitedValue() == MaskForTy;
1153
1154 return false;
1155}
1156
1157// Determines if it is a constant integer or a splat/build vector of constant
1158// integers (and undefs).
1159// Do not permit build vector implicit truncation.
1160static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1161 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1162 return !(Const->isOpaque() && NoOpaques);
1163 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1164 return false;
1165 unsigned BitWidth = N.getScalarValueSizeInBits();
1166 for (const SDValue &Op : N->op_values()) {
1167 if (Op.isUndef())
1168 continue;
1169 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1170 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1171 (Const->isOpaque() && NoOpaques))
1172 return false;
1173 }
1174 return true;
1175}
1176
1177// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1178// undef's.
1179static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1180 if (V.getOpcode() != ISD::BUILD_VECTOR)
1181 return false;
1182 return isConstantOrConstantVector(V, NoOpaques) ||
1184}
1185
1186// Determine if this an indexed load with an opaque target constant index.
1187static bool canSplitIdx(LoadSDNode *LD) {
1188 return MaySplitLoadIndex &&
1189 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1190 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1191}
1192
1193bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1194 const SDLoc &DL,
1195 SDNode *N,
1196 SDValue N0,
1197 SDValue N1) {
1198 // Currently this only tries to ensure we don't undo the GEP splits done by
1199 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1200 // we check if the following transformation would be problematic:
1201 // (load/store (add, (add, x, offset1), offset2)) ->
1202 // (load/store (add, x, offset1+offset2)).
1203
1204 // (load/store (add, (add, x, y), offset2)) ->
1205 // (load/store (add, (add, x, offset2), y)).
1206
1207 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1208 return false;
1209
1210 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1211 if (!C2)
1212 return false;
1213
1214 const APInt &C2APIntVal = C2->getAPIntValue();
1215 if (C2APIntVal.getSignificantBits() > 64)
1216 return false;
1217
1218 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1219 if (N0.hasOneUse())
1220 return false;
1221
1222 const APInt &C1APIntVal = C1->getAPIntValue();
1223 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1224 if (CombinedValueIntVal.getSignificantBits() > 64)
1225 return false;
1226 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1227
1228 for (SDNode *Node : N->uses()) {
1229 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1230 // Is x[offset2] already not a legal addressing mode? If so then
1231 // reassociating the constants breaks nothing (we test offset2 because
1232 // that's the one we hope to fold into the load or store).
1234 AM.HasBaseReg = true;
1235 AM.BaseOffs = C2APIntVal.getSExtValue();
1236 EVT VT = LoadStore->getMemoryVT();
1237 unsigned AS = LoadStore->getAddressSpace();
1238 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1239 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1240 continue;
1241
1242 // Would x[offset1+offset2] still be a legal addressing mode?
1243 AM.BaseOffs = CombinedValue;
1244 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1245 return true;
1246 }
1247 }
1248 } else {
1249 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1250 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1251 return false;
1252
1253 for (SDNode *Node : N->uses()) {
1254 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1255 if (!LoadStore)
1256 return false;
1257
1258 // Is x[offset2] a legal addressing mode? If so then
1259 // reassociating the constants breaks address pattern
1261 AM.HasBaseReg = true;
1262 AM.BaseOffs = C2APIntVal.getSExtValue();
1263 EVT VT = LoadStore->getMemoryVT();
1264 unsigned AS = LoadStore->getAddressSpace();
1265 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1266 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1267 return false;
1268 }
1269 return true;
1270 }
1271
1272 return false;
1273}
1274
1275// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1276// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1277SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1278 SDValue N0, SDValue N1,
1279 SDNodeFlags Flags) {
1280 EVT VT = N0.getValueType();
1281
1282 if (N0.getOpcode() != Opc)
1283 return SDValue();
1284
1285 SDValue N00 = N0.getOperand(0);
1286 SDValue N01 = N0.getOperand(1);
1287
1290 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1291 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1292 return DAG.getNode(Opc, DL, VT, N00, OpNode);
1293 return SDValue();
1294 }
1295 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1296 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1297 // iff (op x, c1) has one use
1298 SDNodeFlags NewFlags;
1299 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1300 Flags.hasNoUnsignedWrap())
1301 NewFlags.setNoUnsignedWrap(true);
1302 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1303 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1304 }
1305 }
1306
1307 // Check for repeated operand logic simplifications.
1308 if (Opc == ISD::AND || Opc == ISD::OR) {
1309 // (N00 & N01) & N00 --> N00 & N01
1310 // (N00 & N01) & N01 --> N00 & N01
1311 // (N00 | N01) | N00 --> N00 | N01
1312 // (N00 | N01) | N01 --> N00 | N01
1313 if (N1 == N00 || N1 == N01)
1314 return N0;
1315 }
1316 if (Opc == ISD::XOR) {
1317 // (N00 ^ N01) ^ N00 --> N01
1318 if (N1 == N00)
1319 return N01;
1320 // (N00 ^ N01) ^ N01 --> N00
1321 if (N1 == N01)
1322 return N00;
1323 }
1324
1325 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1326 if (N1 != N01) {
1327 // Reassociate if (op N00, N1) already exist
1328 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1329 // if Op (Op N00, N1), N01 already exist
1330 // we need to stop reassciate to avoid dead loop
1331 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1332 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1333 }
1334 }
1335
1336 if (N1 != N00) {
1337 // Reassociate if (op N01, N1) already exist
1338 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1339 // if Op (Op N01, N1), N00 already exist
1340 // we need to stop reassciate to avoid dead loop
1341 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1342 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1343 }
1344 }
1345
1346 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1347 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1348 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1349 // comparisons with the same predicate. This enables optimizations as the
1350 // following one:
1351 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1352 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1353 if (Opc == ISD::AND || Opc == ISD::OR) {
1354 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1355 N01->getOpcode() == ISD::SETCC) {
1356 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1357 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1358 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1359 if (CC1 == CC00 && CC1 != CC01) {
1360 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1361 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1362 }
1363 if (CC1 == CC01 && CC1 != CC00) {
1364 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1365 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1366 }
1367 }
1368 }
1369 }
1370
1371 return SDValue();
1372}
1373
1374// Try to reassociate commutative binops.
1375SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1376 SDValue N1, SDNodeFlags Flags) {
1377 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1378
1379 // Floating-point reassociation is not allowed without loose FP math.
1380 if (N0.getValueType().isFloatingPoint() ||
1382 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1383 return SDValue();
1384
1385 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1386 return Combined;
1387 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1388 return Combined;
1389 return SDValue();
1390}
1391
1392// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1393// Note that we only expect Flags to be passed from FP operations. For integer
1394// operations they need to be dropped.
1395SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1396 const SDLoc &DL, EVT VT, SDValue N0,
1397 SDValue N1, SDNodeFlags Flags) {
1398 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1399 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1400 N0->hasOneUse() && N1->hasOneUse() &&
1402 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1403 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1404 return DAG.getNode(RedOpc, DL, VT,
1405 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1406 N0.getOperand(0), N1.getOperand(0)));
1407 }
1408 return SDValue();
1409}
1410
1411SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1412 bool AddTo) {
1413 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1414 ++NodesCombined;
1415 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1416 To[0].dump(&DAG);
1417 dbgs() << " and " << NumTo - 1 << " other values\n");
1418 for (unsigned i = 0, e = NumTo; i != e; ++i)
1419 assert((!To[i].getNode() ||
1420 N->getValueType(i) == To[i].getValueType()) &&
1421 "Cannot combine value to value of different type!");
1422
1423 WorklistRemover DeadNodes(*this);
1424 DAG.ReplaceAllUsesWith(N, To);
1425 if (AddTo) {
1426 // Push the new nodes and any users onto the worklist
1427 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1428 if (To[i].getNode())
1429 AddToWorklistWithUsers(To[i].getNode());
1430 }
1431 }
1432
1433 // Finally, if the node is now dead, remove it from the graph. The node
1434 // may not be dead if the replacement process recursively simplified to
1435 // something else needing this node.
1436 if (N->use_empty())
1437 deleteAndRecombine(N);
1438 return SDValue(N, 0);
1439}
1440
1441void DAGCombiner::
1442CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1443 // Replace the old value with the new one.
1444 ++NodesCombined;
1445 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1446 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1447
1448 // Replace all uses.
1449 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1450
1451 // Push the new node and any (possibly new) users onto the worklist.
1452 AddToWorklistWithUsers(TLO.New.getNode());
1453
1454 // Finally, if the node is now dead, remove it from the graph.
1455 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1456}
1457
1458/// Check the specified integer node value to see if it can be simplified or if
1459/// things it uses can be simplified by bit propagation. If so, return true.
1460bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1461 const APInt &DemandedElts,
1462 bool AssumeSingleUse) {
1463 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1464 KnownBits Known;
1465 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1466 AssumeSingleUse))
1467 return false;
1468
1469 // Revisit the node.
1470 AddToWorklist(Op.getNode());
1471
1472 CommitTargetLoweringOpt(TLO);
1473 return true;
1474}
1475
1476/// Check the specified vector node value to see if it can be simplified or
1477/// if things it uses can be simplified as it only uses some of the elements.
1478/// If so, return true.
1479bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1480 const APInt &DemandedElts,
1481 bool AssumeSingleUse) {
1482 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1483 APInt KnownUndef, KnownZero;
1484 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1485 TLO, 0, AssumeSingleUse))
1486 return false;
1487
1488 // Revisit the node.
1489 AddToWorklist(Op.getNode());
1490
1491 CommitTargetLoweringOpt(TLO);
1492 return true;
1493}
1494
1495void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1496 SDLoc DL(Load);
1497 EVT VT = Load->getValueType(0);
1498 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1499
1500 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1501 Trunc.dump(&DAG); dbgs() << '\n');
1502
1503 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1504 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1505
1506 AddToWorklist(Trunc.getNode());
1507 recursivelyDeleteUnusedNodes(Load);
1508}
1509
1510SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1511 Replace = false;
1512 SDLoc DL(Op);
1513 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1514 LoadSDNode *LD = cast<LoadSDNode>(Op);
1515 EVT MemVT = LD->getMemoryVT();
1517 : LD->getExtensionType();
1518 Replace = true;
1519 return DAG.getExtLoad(ExtType, DL, PVT,
1520 LD->getChain(), LD->getBasePtr(),
1521 MemVT, LD->getMemOperand());
1522 }
1523
1524 unsigned Opc = Op.getOpcode();
1525 switch (Opc) {
1526 default: break;
1527 case ISD::AssertSext:
1528 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1529 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1530 break;
1531 case ISD::AssertZext:
1532 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1533 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1534 break;
1535 case ISD::Constant: {
1536 unsigned ExtOpc =
1537 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1538 return DAG.getNode(ExtOpc, DL, PVT, Op);
1539 }
1540 }
1541
1542 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1543 return SDValue();
1544 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1545}
1546
1547SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1549 return SDValue();
1550 EVT OldVT = Op.getValueType();
1551 SDLoc DL(Op);
1552 bool Replace = false;
1553 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1554 if (!NewOp.getNode())
1555 return SDValue();
1556 AddToWorklist(NewOp.getNode());
1557
1558 if (Replace)
1559 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1560 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1561 DAG.getValueType(OldVT));
1562}
1563
1564SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1565 EVT OldVT = Op.getValueType();
1566 SDLoc DL(Op);
1567 bool Replace = false;
1568 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1569 if (!NewOp.getNode())
1570 return SDValue();
1571 AddToWorklist(NewOp.getNode());
1572
1573 if (Replace)
1574 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1575 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1576}
1577
1578/// Promote the specified integer binary operation if the target indicates it is
1579/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1580/// i32 since i16 instructions are longer.
1581SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1582 if (!LegalOperations)
1583 return SDValue();
1584
1585 EVT VT = Op.getValueType();
1586 if (VT.isVector() || !VT.isInteger())
1587 return SDValue();
1588
1589 // If operation type is 'undesirable', e.g. i16 on x86, consider
1590 // promoting it.
1591 unsigned Opc = Op.getOpcode();
1592 if (TLI.isTypeDesirableForOp(Opc, VT))
1593 return SDValue();
1594
1595 EVT PVT = VT;
1596 // Consult target whether it is a good idea to promote this operation and
1597 // what's the right type to promote it to.
1598 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1599 assert(PVT != VT && "Don't know what type to promote to!");
1600
1601 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1602
1603 bool Replace0 = false;
1604 SDValue N0 = Op.getOperand(0);
1605 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1606
1607 bool Replace1 = false;
1608 SDValue N1 = Op.getOperand(1);
1609 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1610 SDLoc DL(Op);
1611
1612 SDValue RV =
1613 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1614
1615 // We are always replacing N0/N1's use in N and only need additional
1616 // replacements if there are additional uses.
1617 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1618 // (SDValue) here because the node may reference multiple values
1619 // (for example, the chain value of a load node).
1620 Replace0 &= !N0->hasOneUse();
1621 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1622
1623 // Combine Op here so it is preserved past replacements.
1624 CombineTo(Op.getNode(), RV);
1625
1626 // If operands have a use ordering, make sure we deal with
1627 // predecessor first.
1628 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1629 std::swap(N0, N1);
1630 std::swap(NN0, NN1);
1631 }
1632
1633 if (Replace0) {
1634 AddToWorklist(NN0.getNode());
1635 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1636 }
1637 if (Replace1) {
1638 AddToWorklist(NN1.getNode());
1639 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1640 }
1641 return Op;
1642 }
1643 return SDValue();
1644}
1645
1646/// Promote the specified integer shift operation if the target indicates it is
1647/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1648/// i32 since i16 instructions are longer.
1649SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1650 if (!LegalOperations)
1651 return SDValue();
1652
1653 EVT VT = Op.getValueType();
1654 if (VT.isVector() || !VT.isInteger())
1655 return SDValue();
1656
1657 // If operation type is 'undesirable', e.g. i16 on x86, consider
1658 // promoting it.
1659 unsigned Opc = Op.getOpcode();
1660 if (TLI.isTypeDesirableForOp(Opc, VT))
1661 return SDValue();
1662
1663 EVT PVT = VT;
1664 // Consult target whether it is a good idea to promote this operation and
1665 // what's the right type to promote it to.
1666 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1667 assert(PVT != VT && "Don't know what type to promote to!");
1668
1669 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1670
1671 bool Replace = false;
1672 SDValue N0 = Op.getOperand(0);
1673 if (Opc == ISD::SRA)
1674 N0 = SExtPromoteOperand(N0, PVT);
1675 else if (Opc == ISD::SRL)
1676 N0 = ZExtPromoteOperand(N0, PVT);
1677 else
1678 N0 = PromoteOperand(N0, PVT, Replace);
1679
1680 if (!N0.getNode())
1681 return SDValue();
1682
1683 SDLoc DL(Op);
1684 SDValue N1 = Op.getOperand(1);
1685 SDValue RV =
1686 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1687
1688 if (Replace)
1689 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1690
1691 // Deal with Op being deleted.
1692 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1693 return RV;
1694 }
1695 return SDValue();
1696}
1697
1698SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1699 if (!LegalOperations)
1700 return SDValue();
1701
1702 EVT VT = Op.getValueType();
1703 if (VT.isVector() || !VT.isInteger())
1704 return SDValue();
1705
1706 // If operation type is 'undesirable', e.g. i16 on x86, consider
1707 // promoting it.
1708 unsigned Opc = Op.getOpcode();
1709 if (TLI.isTypeDesirableForOp(Opc, VT))
1710 return SDValue();
1711
1712 EVT PVT = VT;
1713 // Consult target whether it is a good idea to promote this operation and
1714 // what's the right type to promote it to.
1715 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1716 assert(PVT != VT && "Don't know what type to promote to!");
1717 // fold (aext (aext x)) -> (aext x)
1718 // fold (aext (zext x)) -> (zext x)
1719 // fold (aext (sext x)) -> (sext x)
1720 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1721 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1722 }
1723 return SDValue();
1724}
1725
1726bool DAGCombiner::PromoteLoad(SDValue Op) {
1727 if (!LegalOperations)
1728 return false;
1729
1730 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1731 return false;
1732
1733 EVT VT = Op.getValueType();
1734 if (VT.isVector() || !VT.isInteger())
1735 return false;
1736
1737 // If operation type is 'undesirable', e.g. i16 on x86, consider
1738 // promoting it.
1739 unsigned Opc = Op.getOpcode();
1740 if (TLI.isTypeDesirableForOp(Opc, VT))
1741 return false;
1742
1743 EVT PVT = VT;
1744 // Consult target whether it is a good idea to promote this operation and
1745 // what's the right type to promote it to.
1746 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1747 assert(PVT != VT && "Don't know what type to promote to!");
1748
1749 SDLoc DL(Op);
1750 SDNode *N = Op.getNode();
1751 LoadSDNode *LD = cast<LoadSDNode>(N);
1752 EVT MemVT = LD->getMemoryVT();
1754 : LD->getExtensionType();
1755 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1756 LD->getChain(), LD->getBasePtr(),
1757 MemVT, LD->getMemOperand());
1758 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1759
1760 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1761 Result.dump(&DAG); dbgs() << '\n');
1762
1764 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1765
1766 AddToWorklist(Result.getNode());
1767 recursivelyDeleteUnusedNodes(N);
1768 return true;
1769 }
1770
1771 return false;
1772}
1773
1774/// Recursively delete a node which has no uses and any operands for
1775/// which it is the only use.
1776///
1777/// Note that this both deletes the nodes and removes them from the worklist.
1778/// It also adds any nodes who have had a user deleted to the worklist as they
1779/// may now have only one use and subject to other combines.
1780bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1781 if (!N->use_empty())
1782 return false;
1783
1785 Nodes.insert(N);
1786 do {
1787 N = Nodes.pop_back_val();
1788 if (!N)
1789 continue;
1790
1791 if (N->use_empty()) {
1792 for (const SDValue &ChildN : N->op_values())
1793 Nodes.insert(ChildN.getNode());
1794
1795 removeFromWorklist(N);
1796 DAG.DeleteNode(N);
1797 } else {
1798 AddToWorklist(N);
1799 }
1800 } while (!Nodes.empty());
1801 return true;
1802}
1803
1804//===----------------------------------------------------------------------===//
1805// Main DAG Combiner implementation
1806//===----------------------------------------------------------------------===//
1807
1808void DAGCombiner::Run(CombineLevel AtLevel) {
1809 // set the instance variables, so that the various visit routines may use it.
1810 Level = AtLevel;
1811 LegalDAG = Level >= AfterLegalizeDAG;
1812 LegalOperations = Level >= AfterLegalizeVectorOps;
1813 LegalTypes = Level >= AfterLegalizeTypes;
1814
1815 WorklistInserter AddNodes(*this);
1816
1817 // Add all the dag nodes to the worklist.
1818 //
1819 // Note: All nodes are not added to PruningList here, this is because the only
1820 // nodes which can be deleted are those which have no uses and all other nodes
1821 // which would otherwise be added to the worklist by the first call to
1822 // getNextWorklistEntry are already present in it.
1823 for (SDNode &Node : DAG.allnodes())
1824 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1825
1826 // Create a dummy node (which is not added to allnodes), that adds a reference
1827 // to the root node, preventing it from being deleted, and tracking any
1828 // changes of the root.
1829 HandleSDNode Dummy(DAG.getRoot());
1830
1831 // While we have a valid worklist entry node, try to combine it.
1832 while (SDNode *N = getNextWorklistEntry()) {
1833 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1834 // N is deleted from the DAG, since they too may now be dead or may have a
1835 // reduced number of uses, allowing other xforms.
1836 if (recursivelyDeleteUnusedNodes(N))
1837 continue;
1838
1839 WorklistRemover DeadNodes(*this);
1840
1841 // If this combine is running after legalizing the DAG, re-legalize any
1842 // nodes pulled off the worklist.
1843 if (LegalDAG) {
1844 SmallSetVector<SDNode *, 16> UpdatedNodes;
1845 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1846
1847 for (SDNode *LN : UpdatedNodes)
1848 AddToWorklistWithUsers(LN);
1849
1850 if (!NIsValid)
1851 continue;
1852 }
1853
1854 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1855
1856 // Add any operands of the new node which have not yet been combined to the
1857 // worklist as well. Because the worklist uniques things already, this
1858 // won't repeatedly process the same operand.
1859 for (const SDValue &ChildN : N->op_values())
1860 if (!CombinedNodes.count(ChildN.getNode()))
1861 AddToWorklist(ChildN.getNode());
1862
1863 CombinedNodes.insert(N);
1864 SDValue RV = combine(N);
1865
1866 if (!RV.getNode())
1867 continue;
1868
1869 ++NodesCombined;
1870
1871 // If we get back the same node we passed in, rather than a new node or
1872 // zero, we know that the node must have defined multiple values and
1873 // CombineTo was used. Since CombineTo takes care of the worklist
1874 // mechanics for us, we have no work to do in this case.
1875 if (RV.getNode() == N)
1876 continue;
1877
1878 assert(N->getOpcode() != ISD::DELETED_NODE &&
1879 RV.getOpcode() != ISD::DELETED_NODE &&
1880 "Node was deleted but visit returned new node!");
1881
1882 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1883
1884 if (N->getNumValues() == RV->getNumValues())
1885 DAG.ReplaceAllUsesWith(N, RV.getNode());
1886 else {
1887 assert(N->getValueType(0) == RV.getValueType() &&
1888 N->getNumValues() == 1 && "Type mismatch");
1889 DAG.ReplaceAllUsesWith(N, &RV);
1890 }
1891
1892 // Push the new node and any users onto the worklist. Omit this if the
1893 // new node is the EntryToken (e.g. if a store managed to get optimized
1894 // out), because re-visiting the EntryToken and its users will not uncover
1895 // any additional opportunities, but there may be a large number of such
1896 // users, potentially causing compile time explosion.
1897 if (RV.getOpcode() != ISD::EntryToken)
1898 AddToWorklistWithUsers(RV.getNode());
1899
1900 // Finally, if the node is now dead, remove it from the graph. The node
1901 // may not be dead if the replacement process recursively simplified to
1902 // something else needing this node. This will also take care of adding any
1903 // operands which have lost a user to the worklist.
1904 recursivelyDeleteUnusedNodes(N);
1905 }
1906
1907 // If the root changed (e.g. it was a dead load, update the root).
1908 DAG.setRoot(Dummy.getValue());
1909 DAG.RemoveDeadNodes();
1910}
1911
1912SDValue DAGCombiner::visit(SDNode *N) {
1913 switch (N->getOpcode()) {
1914 default: break;
1915 case ISD::TokenFactor: return visitTokenFactor(N);
1916 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1917 case ISD::ADD: return visitADD(N);
1918 case ISD::SUB: return visitSUB(N);
1919 case ISD::SADDSAT:
1920 case ISD::UADDSAT: return visitADDSAT(N);
1921 case ISD::SSUBSAT:
1922 case ISD::USUBSAT: return visitSUBSAT(N);
1923 case ISD::ADDC: return visitADDC(N);
1924 case ISD::SADDO:
1925 case ISD::UADDO: return visitADDO(N);
1926 case ISD::SUBC: return visitSUBC(N);
1927 case ISD::SSUBO:
1928 case ISD::USUBO: return visitSUBO(N);
1929 case ISD::ADDE: return visitADDE(N);
1930 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1931 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1932 case ISD::SUBE: return visitSUBE(N);
1933 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1934 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1935 case ISD::SMULFIX:
1936 case ISD::SMULFIXSAT:
1937 case ISD::UMULFIX:
1938 case ISD::UMULFIXSAT: return visitMULFIX(N);
1939 case ISD::MUL: return visitMUL(N);
1940 case ISD::SDIV: return visitSDIV(N);
1941 case ISD::UDIV: return visitUDIV(N);
1942 case ISD::SREM:
1943 case ISD::UREM: return visitREM(N);
1944 case ISD::MULHU: return visitMULHU(N);
1945 case ISD::MULHS: return visitMULHS(N);
1946 case ISD::AVGFLOORS:
1947 case ISD::AVGFLOORU:
1948 case ISD::AVGCEILS:
1949 case ISD::AVGCEILU: return visitAVG(N);
1950 case ISD::ABDS:
1951 case ISD::ABDU: return visitABD(N);
1952 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1953 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1954 case ISD::SMULO:
1955 case ISD::UMULO: return visitMULO(N);
1956 case ISD::SMIN:
1957 case ISD::SMAX:
1958 case ISD::UMIN:
1959 case ISD::UMAX: return visitIMINMAX(N);
1960 case ISD::AND: return visitAND(N);
1961 case ISD::OR: return visitOR(N);
1962 case ISD::XOR: return visitXOR(N);
1963 case ISD::SHL: return visitSHL(N);
1964 case ISD::SRA: return visitSRA(N);
1965 case ISD::SRL: return visitSRL(N);
1966 case ISD::ROTR:
1967 case ISD::ROTL: return visitRotate(N);
1968 case ISD::FSHL:
1969 case ISD::FSHR: return visitFunnelShift(N);
1970 case ISD::SSHLSAT:
1971 case ISD::USHLSAT: return visitSHLSAT(N);
1972 case ISD::ABS: return visitABS(N);
1973 case ISD::BSWAP: return visitBSWAP(N);
1974 case ISD::BITREVERSE: return visitBITREVERSE(N);
1975 case ISD::CTLZ: return visitCTLZ(N);
1976 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1977 case ISD::CTTZ: return visitCTTZ(N);
1978 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1979 case ISD::CTPOP: return visitCTPOP(N);
1980 case ISD::SELECT: return visitSELECT(N);
1981 case ISD::VSELECT: return visitVSELECT(N);
1982 case ISD::SELECT_CC: return visitSELECT_CC(N);
1983 case ISD::SETCC: return visitSETCC(N);
1984 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1985 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1986 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1987 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1988 case ISD::AssertSext:
1989 case ISD::AssertZext: return visitAssertExt(N);
1990 case ISD::AssertAlign: return visitAssertAlign(N);
1991 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1994 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1995 case ISD::TRUNCATE: return visitTRUNCATE(N);
1996 case ISD::BITCAST: return visitBITCAST(N);
1997 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1998 case ISD::FADD: return visitFADD(N);
1999 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
2000 case ISD::FSUB: return visitFSUB(N);
2001 case ISD::FMUL: return visitFMUL(N);
2002 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
2003 case ISD::FDIV: return visitFDIV(N);
2004 case ISD::FREM: return visitFREM(N);
2005 case ISD::FSQRT: return visitFSQRT(N);
2006 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
2007 case ISD::FPOW: return visitFPOW(N);
2008 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
2009 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
2010 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
2011 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
2012 case ISD::FP_ROUND: return visitFP_ROUND(N);
2013 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
2014 case ISD::FNEG: return visitFNEG(N);
2015 case ISD::FABS: return visitFABS(N);
2016 case ISD::FFLOOR: return visitFFLOOR(N);
2017 case ISD::FMINNUM:
2018 case ISD::FMAXNUM:
2019 case ISD::FMINIMUM:
2020 case ISD::FMAXIMUM: return visitFMinMax(N);
2021 case ISD::FCEIL: return visitFCEIL(N);
2022 case ISD::FTRUNC: return visitFTRUNC(N);
2023 case ISD::FFREXP: return visitFFREXP(N);
2024 case ISD::BRCOND: return visitBRCOND(N);
2025 case ISD::BR_CC: return visitBR_CC(N);
2026 case ISD::LOAD: return visitLOAD(N);
2027 case ISD::STORE: return visitSTORE(N);
2028 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
2029 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
2030 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
2031 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
2032 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
2033 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
2034 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
2035 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
2036 case ISD::MGATHER: return visitMGATHER(N);
2037 case ISD::MLOAD: return visitMLOAD(N);
2038 case ISD::MSCATTER: return visitMSCATTER(N);
2039 case ISD::MSTORE: return visitMSTORE(N);
2040 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
2041 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
2042 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
2043 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
2044 case ISD::FREEZE: return visitFREEZE(N);
2045 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
2046 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
2049 case ISD::VECREDUCE_ADD:
2050 case ISD::VECREDUCE_MUL:
2051 case ISD::VECREDUCE_AND:
2052 case ISD::VECREDUCE_OR:
2053 case ISD::VECREDUCE_XOR:
2061 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2062#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2063#include "llvm/IR/VPIntrinsics.def"
2064 return visitVPOp(N);
2065 }
2066 return SDValue();
2067}
2068
2069SDValue DAGCombiner::combine(SDNode *N) {
2070 SDValue RV;
2071 if (!DisableGenericCombines)
2072 RV = visit(N);
2073
2074 // If nothing happened, try a target-specific DAG combine.
2075 if (!RV.getNode()) {
2076 assert(N->getOpcode() != ISD::DELETED_NODE &&
2077 "Node was deleted but visit returned NULL!");
2078
2079 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2080 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2081
2082 // Expose the DAG combiner to the target combiner impls.
2084 DagCombineInfo(DAG, Level, false, this);
2085
2086 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2087 }
2088 }
2089
2090 // If nothing happened still, try promoting the operation.
2091 if (!RV.getNode()) {
2092 switch (N->getOpcode()) {
2093 default: break;
2094 case ISD::ADD:
2095 case ISD::SUB:
2096 case ISD::MUL:
2097 case ISD::AND:
2098 case ISD::OR:
2099 case ISD::XOR:
2100 RV = PromoteIntBinOp(SDValue(N, 0));
2101 break;
2102 case ISD::SHL:
2103 case ISD::SRA:
2104 case ISD::SRL:
2105 RV = PromoteIntShiftOp(SDValue(N, 0));
2106 break;
2107 case ISD::SIGN_EXTEND:
2108 case ISD::ZERO_EXTEND:
2109 case ISD::ANY_EXTEND:
2110 RV = PromoteExtend(SDValue(N, 0));
2111 break;
2112 case ISD::LOAD:
2113 if (PromoteLoad(SDValue(N, 0)))
2114 RV = SDValue(N, 0);
2115 break;
2116 }
2117 }
2118
2119 // If N is a commutative binary node, try to eliminate it if the commuted
2120 // version is already present in the DAG.
2121 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2122 SDValue N0 = N->getOperand(0);
2123 SDValue N1 = N->getOperand(1);
2124
2125 // Constant operands are canonicalized to RHS.
2126 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2127 SDValue Ops[] = {N1, N0};
2128 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2129 N->getFlags());
2130 if (CSENode)
2131 return SDValue(CSENode, 0);
2132 }
2133 }
2134
2135 return RV;
2136}
2137
2138/// Given a node, return its input chain if it has one, otherwise return a null
2139/// sd operand.
2141 if (unsigned NumOps = N->getNumOperands()) {
2142 if (N->getOperand(0).getValueType() == MVT::Other)
2143 return N->getOperand(0);
2144 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2145 return N->getOperand(NumOps-1);
2146 for (unsigned i = 1; i < NumOps-1; ++i)
2147 if (N->getOperand(i).getValueType() == MVT::Other)
2148 return N->getOperand(i);
2149 }
2150 return SDValue();
2151}
2152
2153SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2154 // If N has two operands, where one has an input chain equal to the other,
2155 // the 'other' chain is redundant.
2156 if (N->getNumOperands() == 2) {
2157 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2158 return N->getOperand(0);
2159 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2160 return N->getOperand(1);
2161 }
2162
2163 // Don't simplify token factors if optnone.
2164 if (OptLevel == CodeGenOptLevel::None)
2165 return SDValue();
2166
2167 // Don't simplify the token factor if the node itself has too many operands.
2168 if (N->getNumOperands() > TokenFactorInlineLimit)
2169 return SDValue();
2170
2171 // If the sole user is a token factor, we should make sure we have a
2172 // chance to merge them together. This prevents TF chains from inhibiting
2173 // optimizations.
2174 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
2175 AddToWorklist(*(N->use_begin()));
2176
2177 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2178 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2180 bool Changed = false; // If we should replace this token factor.
2181
2182 // Start out with this token factor.
2183 TFs.push_back(N);
2184
2185 // Iterate through token factors. The TFs grows when new token factors are
2186 // encountered.
2187 for (unsigned i = 0; i < TFs.size(); ++i) {
2188 // Limit number of nodes to inline, to avoid quadratic compile times.
2189 // We have to add the outstanding Token Factors to Ops, otherwise we might
2190 // drop Ops from the resulting Token Factors.
2191 if (Ops.size() > TokenFactorInlineLimit) {
2192 for (unsigned j = i; j < TFs.size(); j++)
2193 Ops.emplace_back(TFs[j], 0);
2194 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2195 // combiner worklist later.
2196 TFs.resize(i);
2197 break;
2198 }
2199
2200 SDNode *TF = TFs[i];
2201 // Check each of the operands.
2202 for (const SDValue &Op : TF->op_values()) {
2203 switch (Op.getOpcode()) {
2204 case ISD::EntryToken:
2205 // Entry tokens don't need to be added to the list. They are
2206 // redundant.
2207 Changed = true;
2208 break;
2209
2210 case ISD::TokenFactor:
2211 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2212 // Queue up for processing.
2213 TFs.push_back(Op.getNode());
2214 Changed = true;
2215 break;
2216 }
2217 [[fallthrough]];
2218
2219 default:
2220 // Only add if it isn't already in the list.
2221 if (SeenOps.insert(Op.getNode()).second)
2222 Ops.push_back(Op);
2223 else
2224 Changed = true;
2225 break;
2226 }
2227 }
2228 }
2229
2230 // Re-visit inlined Token Factors, to clean them up in case they have been
2231 // removed. Skip the first Token Factor, as this is the current node.
2232 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2233 AddToWorklist(TFs[i]);
2234
2235 // Remove Nodes that are chained to another node in the list. Do so
2236 // by walking up chains breath-first stopping when we've seen
2237 // another operand. In general we must climb to the EntryNode, but we can exit
2238 // early if we find all remaining work is associated with just one operand as
2239 // no further pruning is possible.
2240
2241 // List of nodes to search through and original Ops from which they originate.
2243 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2244 SmallPtrSet<SDNode *, 16> SeenChains;
2245 bool DidPruneOps = false;
2246
2247 unsigned NumLeftToConsider = 0;
2248 for (const SDValue &Op : Ops) {
2249 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2250 OpWorkCount.push_back(1);
2251 }
2252
2253 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2254 // If this is an Op, we can remove the op from the list. Remark any
2255 // search associated with it as from the current OpNumber.
2256 if (SeenOps.contains(Op)) {
2257 Changed = true;
2258 DidPruneOps = true;
2259 unsigned OrigOpNumber = 0;
2260 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2261 OrigOpNumber++;
2262 assert((OrigOpNumber != Ops.size()) &&
2263 "expected to find TokenFactor Operand");
2264 // Re-mark worklist from OrigOpNumber to OpNumber
2265 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2266 if (Worklist[i].second == OrigOpNumber) {
2267 Worklist[i].second = OpNumber;
2268 }
2269 }
2270 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2271 OpWorkCount[OrigOpNumber] = 0;
2272 NumLeftToConsider--;
2273 }
2274 // Add if it's a new chain
2275 if (SeenChains.insert(Op).second) {
2276 OpWorkCount[OpNumber]++;
2277 Worklist.push_back(std::make_pair(Op, OpNumber));
2278 }
2279 };
2280
2281 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2282 // We need at least be consider at least 2 Ops to prune.
2283 if (NumLeftToConsider <= 1)
2284 break;
2285 auto CurNode = Worklist[i].first;
2286 auto CurOpNumber = Worklist[i].second;
2287 assert((OpWorkCount[CurOpNumber] > 0) &&
2288 "Node should not appear in worklist");
2289 switch (CurNode->getOpcode()) {
2290 case ISD::EntryToken:
2291 // Hitting EntryToken is the only way for the search to terminate without
2292 // hitting
2293 // another operand's search. Prevent us from marking this operand
2294 // considered.
2295 NumLeftToConsider++;
2296 break;
2297 case ISD::TokenFactor:
2298 for (const SDValue &Op : CurNode->op_values())
2299 AddToWorklist(i, Op.getNode(), CurOpNumber);
2300 break;
2302 case ISD::LIFETIME_END:
2303 case ISD::CopyFromReg:
2304 case ISD::CopyToReg:
2305 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2306 break;
2307 default:
2308 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2309 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2310 break;
2311 }
2312 OpWorkCount[CurOpNumber]--;
2313 if (OpWorkCount[CurOpNumber] == 0)
2314 NumLeftToConsider--;
2315 }
2316
2317 // If we've changed things around then replace token factor.
2318 if (Changed) {
2320 if (Ops.empty()) {
2321 // The entry token is the only possible outcome.
2322 Result = DAG.getEntryNode();
2323 } else {
2324 if (DidPruneOps) {
2325 SmallVector<SDValue, 8> PrunedOps;
2326 //
2327 for (const SDValue &Op : Ops) {
2328 if (SeenChains.count(Op.getNode()) == 0)
2329 PrunedOps.push_back(Op);
2330 }
2331 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2332 } else {
2333 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2334 }
2335 }
2336 return Result;
2337 }
2338 return SDValue();
2339}
2340
2341/// MERGE_VALUES can always be eliminated.
2342SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2343 WorklistRemover DeadNodes(*this);
2344 // Replacing results may cause a different MERGE_VALUES to suddenly
2345 // be CSE'd with N, and carry its uses with it. Iterate until no
2346 // uses remain, to ensure that the node can be safely deleted.
2347 // First add the users of this node to the work list so that they
2348 // can be tried again once they have new operands.
2349 AddUsersToWorklist(N);
2350 do {
2351 // Do as a single replacement to avoid rewalking use lists.
2353 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2354 Ops.push_back(N->getOperand(i));
2355 DAG.ReplaceAllUsesWith(N, Ops.data());
2356 } while (!N->use_empty());
2357 deleteAndRecombine(N);
2358 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2359}
2360
2361/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2362/// ConstantSDNode pointer else nullptr.
2364 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2365 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2366}
2367
2368// isTruncateOf - If N is a truncate of some other value, return true, record
2369// the value being truncated in Op and which of Op's bits are zero/one in Known.
2370// This function computes KnownBits to avoid a duplicated call to
2371// computeKnownBits in the caller.
2373 KnownBits &Known) {
2374 if (N->getOpcode() == ISD::TRUNCATE) {
2375 Op = N->getOperand(0);
2376 Known = DAG.computeKnownBits(Op);
2377 return true;
2378 }
2379
2380 if (N.getOpcode() != ISD::SETCC ||
2381 N.getValueType().getScalarType() != MVT::i1 ||
2382 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
2383 return false;
2384
2385 SDValue Op0 = N->getOperand(0);
2386 SDValue Op1 = N->getOperand(1);
2387 assert(Op0.getValueType() == Op1.getValueType());
2388
2389 if (isNullOrNullSplat(Op0))
2390 Op = Op1;
2391 else if (isNullOrNullSplat(Op1))
2392 Op = Op0;
2393 else
2394 return false;
2395
2396 Known = DAG.computeKnownBits(Op);
2397
2398 return (Known.Zero | 1).isAllOnes();
2399}
2400
2401/// Return true if 'Use' is a load or a store that uses N as its base pointer
2402/// and that N may be folded in the load / store addressing mode.
2404 const TargetLowering &TLI) {
2405 EVT VT;
2406 unsigned AS;
2407
2408 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2409 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2410 return false;
2411 VT = LD->getMemoryVT();
2412 AS = LD->getAddressSpace();
2413 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2414 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2415 return false;
2416 VT = ST->getMemoryVT();
2417 AS = ST->getAddressSpace();
2418 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2419 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2420 return false;
2421 VT = LD->getMemoryVT();
2422 AS = LD->getAddressSpace();
2423 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2424 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2425 return false;
2426 VT = ST->getMemoryVT();
2427 AS = ST->getAddressSpace();
2428 } else {
2429 return false;
2430 }
2431
2433 if (N->getOpcode() == ISD::ADD) {
2434 AM.HasBaseReg = true;
2435 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2436 if (Offset)
2437 // [reg +/- imm]
2438 AM.BaseOffs = Offset->getSExtValue();
2439 else
2440 // [reg +/- reg]
2441 AM.Scale = 1;
2442 } else if (N->getOpcode() == ISD::SUB) {
2443 AM.HasBaseReg = true;
2444 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2445 if (Offset)
2446 // [reg +/- imm]
2447 AM.BaseOffs = -Offset->getSExtValue();
2448 else
2449 // [reg +/- reg]
2450 AM.Scale = 1;
2451 } else {
2452 return false;
2453 }
2454
2455 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2456 VT.getTypeForEVT(*DAG.getContext()), AS);
2457}
2458
2459/// This inverts a canonicalization in IR that replaces a variable select arm
2460/// with an identity constant. Codegen improves if we re-use the variable
2461/// operand rather than load a constant. This can also be converted into a
2462/// masked vector operation if the target supports it.
2464 bool ShouldCommuteOperands) {
2465 // Match a select as operand 1. The identity constant that we are looking for
2466 // is only valid as operand 1 of a non-commutative binop.
2467 SDValue N0 = N->getOperand(0);
2468 SDValue N1 = N->getOperand(1);
2469 if (ShouldCommuteOperands)
2470 std::swap(N0, N1);
2471
2472 // TODO: Should this apply to scalar select too?
2473 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2474 return SDValue();
2475
2476 // We can't hoist all instructions because of immediate UB (not speculatable).
2477 // For example div/rem by zero.
2479 return SDValue();
2480
2481 unsigned Opcode = N->getOpcode();
2482 EVT VT = N->getValueType(0);
2483 SDValue Cond = N1.getOperand(0);
2484 SDValue TVal = N1.getOperand(1);
2485 SDValue FVal = N1.getOperand(2);
2486
2487 // This transform increases uses of N0, so freeze it to be safe.
2488 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2489 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2490 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2491 SDValue F0 = DAG.getFreeze(N0);
2492 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2493 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2494 }
2495 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2496 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2497 SDValue F0 = DAG.getFreeze(N0);
2498 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2499 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2500 }
2501
2502 return SDValue();
2503}
2504
2505SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2506 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2507 "Unexpected binary operator");
2508
2509 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2510 auto BinOpcode = BO->getOpcode();
2511 EVT VT = BO->getValueType(0);
2512 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2513 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2514 return Sel;
2515
2516 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2517 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2518 return Sel;
2519 }
2520
2521 // Don't do this unless the old select is going away. We want to eliminate the
2522 // binary operator, not replace a binop with a select.
2523 // TODO: Handle ISD::SELECT_CC.
2524 unsigned SelOpNo = 0;
2525 SDValue Sel = BO->getOperand(0);
2526 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2527 SelOpNo = 1;
2528 Sel = BO->getOperand(1);
2529
2530 // Peek through trunc to shift amount type.
2531 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2532 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2533 // This is valid when the truncated bits of x are already zero.
2534 SDValue Op;
2535 KnownBits Known;
2536 if (isTruncateOf(DAG, Sel, Op, Known) &&
2538 Sel = Op;
2539 }
2540 }
2541
2542 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2543 return SDValue();
2544
2545 SDValue CT = Sel.getOperand(1);
2546 if (!isConstantOrConstantVector(CT, true) &&
2548 return SDValue();
2549
2550 SDValue CF = Sel.getOperand(2);
2551 if (!isConstantOrConstantVector(CF, true) &&
2553 return SDValue();
2554
2555 // Bail out if any constants are opaque because we can't constant fold those.
2556 // The exception is "and" and "or" with either 0 or -1 in which case we can
2557 // propagate non constant operands into select. I.e.:
2558 // and (select Cond, 0, -1), X --> select Cond, 0, X
2559 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2560 bool CanFoldNonConst =
2561 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2564
2565 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2566 if (!CanFoldNonConst &&
2567 !isConstantOrConstantVector(CBO, true) &&
2569 return SDValue();
2570
2571 SDLoc DL(Sel);
2572 SDValue NewCT, NewCF;
2573
2574 if (CanFoldNonConst) {
2575 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2576 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2577 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2578 NewCT = CT;
2579 else
2580 NewCT = CBO;
2581
2582 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2583 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2584 NewCF = CF;
2585 else
2586 NewCF = CBO;
2587 } else {
2588 // We have a select-of-constants followed by a binary operator with a
2589 // constant. Eliminate the binop by pulling the constant math into the
2590 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2591 // CBO, CF + CBO
2592 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2593 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2594 if (!NewCT)
2595 return SDValue();
2596
2597 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2598 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2599 if (!NewCF)
2600 return SDValue();
2601 }
2602
2603 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2604 SelectOp->setFlags(BO->getFlags());
2605 return SelectOp;
2606}
2607
2609 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2610 "Expecting add or sub");
2611
2612 // Match a constant operand and a zext operand for the math instruction:
2613 // add Z, C
2614 // sub C, Z
2615 bool IsAdd = N->getOpcode() == ISD::ADD;
2616 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2617 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2618 auto *CN = dyn_cast<ConstantSDNode>(C);
2619 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2620 return SDValue();
2621
2622 // Match the zext operand as a setcc of a boolean.
2623 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2624 Z.getOperand(0).getValueType() != MVT::i1)
2625 return SDValue();
2626
2627 // Match the compare as: setcc (X & 1), 0, eq.
2628 SDValue SetCC = Z.getOperand(0);
2629 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2630 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2631 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2632 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2633 return SDValue();
2634
2635 // We are adding/subtracting a constant and an inverted low bit. Turn that
2636 // into a subtract/add of the low bit with incremented/decremented constant:
2637 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2638 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2639 EVT VT = C.getValueType();
2640 SDLoc DL(N);
2641 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2642 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2643 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2644 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2645}
2646
2647/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2648/// a shift and add with a different constant.
2650 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2651 "Expecting add or sub");
2652
2653 // We need a constant operand for the add/sub, and the other operand is a
2654 // logical shift right: add (srl), C or sub C, (srl).
2655 bool IsAdd = N->getOpcode() == ISD::ADD;
2656 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2657 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2658 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2659 ShiftOp.getOpcode() != ISD::SRL)
2660 return SDValue();
2661
2662 // The shift must be of a 'not' value.
2663 SDValue Not = ShiftOp.getOperand(0);
2664 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2665 return SDValue();
2666
2667 // The shift must be moving the sign bit to the least-significant-bit.
2668 EVT VT = ShiftOp.getValueType();
2669 SDValue ShAmt = ShiftOp.getOperand(1);
2670 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2671 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2672 return SDValue();
2673
2674 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2675 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2676 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2677 SDLoc DL(N);
2678 if (SDValue NewC = DAG.FoldConstantArithmetic(
2679 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2680 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2681 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2682 Not.getOperand(0), ShAmt);
2683 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2684 }
2685
2686 return SDValue();
2687}
2688
2689static bool
2691 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2692 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2693}
2694
2695/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2696/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2697/// are no common bits set in the operands).
2698SDValue DAGCombiner::visitADDLike(SDNode *N) {
2699 SDValue N0 = N->getOperand(0);
2700 SDValue N1 = N->getOperand(1);
2701 EVT VT = N0.getValueType();
2702 SDLoc DL(N);
2703
2704 // fold (add x, undef) -> undef
2705 if (N0.isUndef())
2706 return N0;
2707 if (N1.isUndef())
2708 return N1;
2709
2710 // fold (add c1, c2) -> c1+c2
2711 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2712 return C;
2713
2714 // canonicalize constant to RHS
2717 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2718
2719 if (areBitwiseNotOfEachother(N0, N1))
2721 SDLoc(N), VT);
2722
2723 // fold vector ops
2724 if (VT.isVector()) {
2725 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2726 return FoldedVOp;
2727
2728 // fold (add x, 0) -> x, vector edition
2730 return N0;
2731 }
2732
2733 // fold (add x, 0) -> x
2734 if (isNullConstant(N1))
2735 return N0;
2736
2737 if (N0.getOpcode() == ISD::SUB) {
2738 SDValue N00 = N0.getOperand(0);
2739 SDValue N01 = N0.getOperand(1);
2740
2741 // fold ((A-c1)+c2) -> (A+(c2-c1))
2742 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2743 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2744
2745 // fold ((c1-A)+c2) -> (c1+c2)-A
2746 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2747 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2748 }
2749
2750 // add (sext i1 X), 1 -> zext (not i1 X)
2751 // We don't transform this pattern:
2752 // add (zext i1 X), -1 -> sext (not i1 X)
2753 // because most (?) targets generate better code for the zext form.
2754 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2755 isOneOrOneSplat(N1)) {
2756 SDValue X = N0.getOperand(0);
2757 if ((!LegalOperations ||
2758 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2760 X.getScalarValueSizeInBits() == 1) {
2761 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2762 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2763 }
2764 }
2765
2766 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2767 // iff (or x, c0) is equivalent to (add x, c0).
2768 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2769 // iff (xor x, c0) is equivalent to (add x, c0).
2770 if (DAG.isADDLike(N0)) {
2771 SDValue N01 = N0.getOperand(1);
2772 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2773 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2774 }
2775
2776 if (SDValue NewSel = foldBinOpIntoSelect(N))
2777 return NewSel;
2778
2779 // reassociate add
2780 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2781 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2782 return RADD;
2783
2784 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2785 // equivalent to (add x, c).
2786 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2787 // equivalent to (add x, c).
2788 // Do this optimization only when adding c does not introduce instructions
2789 // for adding carries.
2790 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2791 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2792 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2793 // If N0's type does not split or is a sign mask, it does not introduce
2794 // add carry.
2795 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2796 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2799 if (NoAddCarry)
2800 return DAG.getNode(
2801 ISD::ADD, DL, VT,
2802 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2803 N0.getOperand(1));
2804 }
2805 return SDValue();
2806 };
2807 if (SDValue Add = ReassociateAddOr(N0, N1))
2808 return Add;
2809 if (SDValue Add = ReassociateAddOr(N1, N0))
2810 return Add;
2811
2812 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2813 if (SDValue SD =
2814 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2815 return SD;
2816 }
2817 // fold ((0-A) + B) -> B-A
2818 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2819 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2820
2821 // fold (A + (0-B)) -> A-B
2822 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2823 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2824
2825 // fold (A+(B-A)) -> B
2826 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2827 return N1.getOperand(0);
2828
2829 // fold ((B-A)+A) -> B
2830 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2831 return N0.getOperand(0);
2832
2833 // fold ((A-B)+(C-A)) -> (C-B)
2834 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2835 N0.getOperand(0) == N1.getOperand(1))
2836 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2837 N0.getOperand(1));
2838
2839 // fold ((A-B)+(B-C)) -> (A-C)
2840 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2841 N0.getOperand(1) == N1.getOperand(0))
2842 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2843 N1.getOperand(1));
2844
2845 // fold (A+(B-(A+C))) to (B-C)
2846 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2847 N0 == N1.getOperand(1).getOperand(0))
2848 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2849 N1.getOperand(1).getOperand(1));
2850
2851 // fold (A+(B-(C+A))) to (B-C)
2852 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2853 N0 == N1.getOperand(1).getOperand(1))
2854 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2855 N1.getOperand(1).getOperand(0));
2856
2857 // fold (A+((B-A)+or-C)) to (B+or-C)
2858 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2859 N1.getOperand(0).getOpcode() == ISD::SUB &&
2860 N0 == N1.getOperand(0).getOperand(1))
2861 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2862 N1.getOperand(1));
2863
2864 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2865 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2866 N0->hasOneUse() && N1->hasOneUse()) {
2867 SDValue N00 = N0.getOperand(0);
2868 SDValue N01 = N0.getOperand(1);
2869 SDValue N10 = N1.getOperand(0);
2870 SDValue N11 = N1.getOperand(1);
2871
2873 return DAG.getNode(ISD::SUB, DL, VT,
2874 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2875 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2876 }
2877
2878 // fold (add (umax X, C), -C) --> (usubsat X, C)
2879 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2880 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2881 return (!Max && !Op) ||
2882 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2883 };
2884 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2885 /*AllowUndefs*/ true))
2886 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2887 N0.getOperand(1));
2888 }
2889
2891 return SDValue(N, 0);
2892
2893 if (isOneOrOneSplat(N1)) {
2894 // fold (add (xor a, -1), 1) -> (sub 0, a)
2895 if (isBitwiseNot(N0))
2896 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2897 N0.getOperand(0));
2898
2899 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2900 if (N0.getOpcode() == ISD::ADD) {
2901 SDValue A, Xor;
2902
2903 if (isBitwiseNot(N0.getOperand(0))) {
2904 A = N0.getOperand(1);
2905 Xor = N0.getOperand(0);
2906 } else if (isBitwiseNot(N0.getOperand(1))) {
2907 A = N0.getOperand(0);
2908 Xor = N0.getOperand(1);
2909 }
2910
2911 if (Xor)
2912 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2913 }
2914
2915 // Look for:
2916 // add (add x, y), 1
2917 // And if the target does not like this form then turn into:
2918 // sub y, (xor x, -1)
2919 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2920 N0.hasOneUse() &&
2921 // Limit this to after legalization if the add has wrap flags
2922 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2923 !N->getFlags().hasNoSignedWrap()))) {
2924 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2925 DAG.getAllOnesConstant(DL, VT));
2926 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2927 }
2928 }
2929
2930 // (x - y) + -1 -> add (xor y, -1), x
2931 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2933 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2934 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2935 }
2936
2937 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2938 return Combined;
2939
2940 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2941 return Combined;
2942
2943 return SDValue();
2944}
2945
2946SDValue DAGCombiner::visitADD(SDNode *N) {
2947 SDValue N0 = N->getOperand(0);
2948 SDValue N1 = N->getOperand(1);
2949 EVT VT = N0.getValueType();
2950 SDLoc DL(N);
2951
2952 if (SDValue Combined = visitADDLike(N))
2953 return Combined;
2954
2955 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2956 return V;
2957
2958 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2959 return V;
2960
2961 // fold (a+b) -> (a|b) iff a and b share no bits.
2962 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2963 DAG.haveNoCommonBitsSet(N0, N1))
2964 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2965
2966 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2967 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2968 const APInt &C0 = N0->getConstantOperandAPInt(0);
2969 const APInt &C1 = N1->getConstantOperandAPInt(0);
2970 return DAG.getVScale(DL, VT, C0 + C1);
2971 }
2972
2973 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2974 if (N0.getOpcode() == ISD::ADD &&
2975 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
2976 N1.getOpcode() == ISD::VSCALE) {
2977 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2978 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2979 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2980 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2981 }
2982
2983 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2984 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2985 N1.getOpcode() == ISD::STEP_VECTOR) {
2986 const APInt &C0 = N0->getConstantOperandAPInt(0);
2987 const APInt &C1 = N1->getConstantOperandAPInt(0);
2988 APInt NewStep = C0 + C1;
2989 return DAG.getStepVector(DL, VT, NewStep);
2990 }
2991
2992 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2993 if (N0.getOpcode() == ISD::ADD &&
2995 N1.getOpcode() == ISD::STEP_VECTOR) {
2996 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2997 const APInt &SV1 = N1->getConstantOperandAPInt(0);
2998 APInt NewStep = SV0 + SV1;
2999 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3000 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3001 }
3002
3003 return SDValue();
3004}
3005
3006SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3007 unsigned Opcode = N->getOpcode();
3008 SDValue N0 = N->getOperand(0);
3009 SDValue N1 = N->getOperand(1);
3010 EVT VT = N0.getValueType();
3011 bool IsSigned = Opcode == ISD::SADDSAT;
3012 SDLoc DL(N);
3013
3014 // fold (add_sat x, undef) -> -1
3015 if (N0.isUndef() || N1.isUndef())
3016 return DAG.getAllOnesConstant(DL, VT);
3017
3018 // fold (add_sat c1, c2) -> c3
3019 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3020 return C;
3021
3022 // canonicalize constant to RHS
3025 return DAG.getNode(Opcode, DL, VT, N1, N0);
3026
3027 // fold vector ops
3028 if (VT.isVector()) {
3029 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3030 return FoldedVOp;
3031
3032 // fold (add_sat x, 0) -> x, vector edition
3034 return N0;
3035 }
3036
3037 // fold (add_sat x, 0) -> x
3038 if (isNullConstant(N1))
3039 return N0;
3040
3041 // If it cannot overflow, transform into an add.
3042 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3043 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3044
3045 return SDValue();
3046}
3047
3049 bool ForceCarryReconstruction = false) {
3050 bool Masked = false;
3051
3052 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3053 while (true) {
3054 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3055 V = V.getOperand(0);
3056 continue;
3057 }
3058
3059 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3060 if (ForceCarryReconstruction)
3061 return V;
3062
3063 Masked = true;
3064 V = V.getOperand(0);
3065 continue;
3066 }
3067
3068 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3069 return V;
3070
3071 break;
3072 }
3073
3074 // If this is not a carry, return.
3075 if (V.getResNo() != 1)
3076 return SDValue();
3077
3078 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3079 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3080 return SDValue();
3081
3082 EVT VT = V->getValueType(0);
3083 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3084 return SDValue();
3085
3086 // If the result is masked, then no matter what kind of bool it is we can
3087 // return. If it isn't, then we need to make sure the bool type is either 0 or
3088 // 1 and not other values.
3089 if (Masked ||
3090 TLI.getBooleanContents(V.getValueType()) ==
3092 return V;
3093
3094 return SDValue();
3095}
3096
3097/// Given the operands of an add/sub operation, see if the 2nd operand is a
3098/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3099/// the opcode and bypass the mask operation.
3100static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3101 SelectionDAG &DAG, const SDLoc &DL) {
3102 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3103 N1 = N1.getOperand(0);
3104
3105 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3106 return SDValue();
3107
3108 EVT VT = N0.getValueType();
3109 SDValue N10 = N1.getOperand(0);
3110 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3111 N10 = N10.getOperand(0);
3112
3113 if (N10.getValueType() != VT)
3114 return SDValue();
3115
3116 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3117 return SDValue();
3118
3119 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3120 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3121 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3122}
3123
3124/// Helper for doing combines based on N0 and N1 being added to each other.
3125SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3126 SDNode *LocReference) {
3127 EVT VT = N0.getValueType();
3128 SDLoc DL(LocReference);
3129
3130 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3131 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
3133 return DAG.getNode(ISD::SUB, DL, VT, N0,
3134 DAG.getNode(ISD::SHL, DL, VT,
3135 N1.getOperand(0).getOperand(1),
3136 N1.getOperand(1)));
3137
3138 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3139 return V;
3140
3141 // Look for:
3142 // add (add x, 1), y
3143 // And if the target does not like this form then turn into:
3144 // sub y, (xor x, -1)
3145 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3146 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3147 // Limit this to after legalization if the add has wrap flags
3148 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3149 !N0->getFlags().hasNoSignedWrap()))) {
3150 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
3151 DAG.getAllOnesConstant(DL, VT));
3152 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3153 }
3154
3155 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3156 // Hoist one-use subtraction by non-opaque constant:
3157 // (x - C) + y -> (x + y) - C
3158 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3159 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3160 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3161 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3162 }
3163 // Hoist one-use subtraction from non-opaque constant:
3164 // (C - x) + y -> (y - x) + C
3165 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3166 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3167 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3168 }
3169 }
3170
3171 // add (mul x, C), x -> mul x, C+1
3172 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3173 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3174 N0.hasOneUse()) {
3175 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3176 DAG.getConstant(1, DL, VT));
3177 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3178 }
3179
3180 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3181 // rather than 'add 0/-1' (the zext should get folded).
3182 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3183 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3184 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3186 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3187 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3188 }
3189
3190 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3191 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3192 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3193 if (TN->getVT() == MVT::i1) {
3194 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3195 DAG.getConstant(1, DL, VT));
3196 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3197 }
3198 }
3199
3200 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3201 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3202 N1.getResNo() == 0)
3203 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3204 N0, N1.getOperand(0), N1.getOperand(2));
3205
3206 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3208 if (SDValue Carry = getAsCarry(TLI, N1))
3209 return DAG.getNode(ISD::UADDO_CARRY, DL,
3210 DAG.getVTList(VT, Carry.getValueType()), N0,
3211 DAG.getConstant(0, DL, VT), Carry);
3212
3213 return SDValue();
3214}
3215
3216SDValue DAGCombiner::visitADDC(SDNode *N) {
3217 SDValue N0 = N->getOperand(0);
3218 SDValue N1 = N->getOperand(1);
3219 EVT VT = N0.getValueType();
3220 SDLoc DL(N);
3221
3222 // If the flag result is dead, turn this into an ADD.
3223 if (!N->hasAnyUseOfValue(1))
3224 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3225 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3226
3227 // canonicalize constant to RHS.
3228 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3229 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3230 if (N0C && !N1C)
3231 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3232
3233 // fold (addc x, 0) -> x + no carry out
3234 if (isNullConstant(N1))
3235 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3236 DL, MVT::Glue));
3237
3238 // If it cannot overflow, transform into an add.
3240 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3241 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3242
3243 return SDValue();
3244}
3245
3246/**
3247 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3248 * then the flip also occurs if computing the inverse is the same cost.
3249 * This function returns an empty SDValue in case it cannot flip the boolean
3250 * without increasing the cost of the computation. If you want to flip a boolean
3251 * no matter what, use DAG.getLogicalNOT.
3252 */
3254 const TargetLowering &TLI,
3255 bool Force) {
3256 if (Force && isa<ConstantSDNode>(V))
3257 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3258
3259 if (V.getOpcode() != ISD::XOR)
3260 return SDValue();
3261
3262 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
3263 if (!Const)
3264 return SDValue();
3265
3266 EVT VT = V.getValueType();
3267
3268 bool IsFlip = false;
3269 switch(TLI.getBooleanContents(VT)) {
3271 IsFlip = Const->isOne();
3272 break;
3274 IsFlip = Const->isAllOnes();
3275 break;
3277 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
3278 break;
3279 }
3280
3281 if (IsFlip)
3282 return V.getOperand(0);
3283 if (Force)
3284 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3285 return SDValue();
3286}
3287
3288SDValue DAGCombiner::visitADDO(SDNode *N) {
3289 SDValue N0 = N->getOperand(0);
3290 SDValue N1 = N->getOperand(1);
3291 EVT VT = N0.getValueType();
3292 bool IsSigned = (ISD::SADDO == N->getOpcode());
3293
3294 EVT CarryVT = N->getValueType(1);
3295 SDLoc DL(N);
3296
3297 // If the flag result is dead, turn this into an ADD.
3298 if (!N->hasAnyUseOfValue(1))
3299 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3300 DAG.getUNDEF(CarryVT));
3301
3302 // canonicalize constant to RHS.
3305 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3306
3307 // fold (addo x, 0) -> x + no carry out
3308 if (isNullOrNullSplat(N1))
3309 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3310
3311 // If it cannot overflow, transform into an add.
3312 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3313 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3314 DAG.getConstant(0, DL, CarryVT));
3315
3316 if (IsSigned) {
3317 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3318 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3319 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3320 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3321 } else {
3322 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3323 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3324 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3325 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3326 return CombineTo(
3327 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3328 }
3329
3330 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3331 return Combined;
3332
3333 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3334 return Combined;
3335 }
3336
3337 return SDValue();
3338}
3339
3340SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3341 EVT VT = N0.getValueType();
3342 if (VT.isVector())
3343 return SDValue();
3344
3345 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3346 // If Y + 1 cannot overflow.
3347 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3348 SDValue Y = N1.getOperand(0);
3349 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3351 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3352 N1.getOperand(2));
3353 }
3354
3355 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3357 if (SDValue Carry = getAsCarry(TLI, N1))
3358 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3359 DAG.getConstant(0, SDLoc(N), VT), Carry);
3360
3361 return SDValue();
3362}
3363
3364SDValue DAGCombiner::visitADDE(SDNode *N) {
3365 SDValue N0 = N->getOperand(0);
3366 SDValue N1 = N->getOperand(1);
3367 SDValue CarryIn = N->getOperand(2);
3368
3369 // canonicalize constant to RHS
3370 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3371 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3372 if (N0C && !N1C)
3373 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3374 N1, N0, CarryIn);
3375
3376 // fold (adde x, y, false) -> (addc x, y)
3377 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3378 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3379
3380 return SDValue();
3381}
3382
3383SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3384 SDValue N0 = N->getOperand(0);
3385 SDValue N1 = N->getOperand(1);
3386 SDValue CarryIn = N->getOperand(2);
3387 SDLoc DL(N);
3388
3389 // canonicalize constant to RHS
3390 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3391 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3392 if (N0C && !N1C)
3393 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3394
3395 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3396 if (isNullConstant(CarryIn)) {
3397 if (!LegalOperations ||
3398 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3399 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3400 }
3401
3402 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3403 if (isNullConstant(N0) && isNullConstant(N1)) {
3404 EVT VT = N0.getValueType();
3405 EVT CarryVT = CarryIn.getValueType();
3406 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3407 AddToWorklist(CarryExt.getNode());
3408 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3409 DAG.getConstant(1, DL, VT)),
3410 DAG.getConstant(0, DL, CarryVT));
3411 }
3412
3413 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3414 return Combined;
3415
3416 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3417 return Combined;
3418
3419 // We want to avoid useless duplication.
3420 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3421 // not a binary operation, this is not really possible to leverage this
3422 // existing mechanism for it. However, if more operations require the same
3423 // deduplication logic, then it may be worth generalize.
3424 SDValue Ops[] = {N1, N0, CarryIn};
3425 SDNode *CSENode =
3426 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3427 if (CSENode)
3428 return SDValue(CSENode, 0);
3429
3430 return SDValue();
3431}
3432
3433/**
3434 * If we are facing some sort of diamond carry propapagtion pattern try to
3435 * break it up to generate something like:
3436 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3437 *
3438 * The end result is usually an increase in operation required, but because the
3439 * carry is now linearized, other transforms can kick in and optimize the DAG.
3440 *
3441 * Patterns typically look something like
3442 * (uaddo A, B)
3443 * / \
3444 * Carry Sum
3445 * | \
3446 * | (uaddo_carry *, 0, Z)
3447 * | /
3448 * \ Carry
3449 * | /
3450 * (uaddo_carry X, *, *)
3451 *
3452 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3453 * produce a combine with a single path for carry propagation.
3454 */
3456 SelectionDAG &DAG, SDValue X,
3457 SDValue Carry0, SDValue Carry1,
3458 SDNode *N) {
3459 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3460 return SDValue();
3461 if (Carry1.getOpcode() != ISD::UADDO)
3462 return SDValue();
3463
3464 SDValue Z;
3465
3466 /**
3467 * First look for a suitable Z. It will present itself in the form of
3468 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3469 */
3470 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3471 isNullConstant(Carry0.getOperand(1))) {
3472 Z = Carry0.getOperand(2);
3473 } else if (Carry0.getOpcode() == ISD::UADDO &&
3474 isOneConstant(Carry0.getOperand(1))) {
3475 EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3476 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3477 } else {
3478 // We couldn't find a suitable Z.
3479 return SDValue();
3480 }
3481
3482
3483 auto cancelDiamond = [&](SDValue A,SDValue B) {
3484 SDLoc DL(N);
3485 SDValue NewY =
3486 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3487 Combiner.AddToWorklist(NewY.getNode());
3488 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3489 DAG.getConstant(0, DL, X.getValueType()),
3490 NewY.getValue(1));
3491 };
3492
3493 /**
3494 * (uaddo A, B)
3495 * |
3496 * Sum
3497 * |
3498 * (uaddo_carry *, 0, Z)
3499 */
3500 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3501 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3502 }
3503
3504 /**
3505 * (uaddo_carry A, 0, Z)
3506 * |
3507 * Sum
3508 * |
3509 * (uaddo *, B)
3510 */
3511 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3512 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3513 }
3514
3515 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3516 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3517 }
3518
3519 return SDValue();
3520}
3521
3522// If we are facing some sort of diamond carry/borrow in/out pattern try to
3523// match patterns like:
3524//
3525// (uaddo A, B) CarryIn
3526// | \ |
3527// | \ |
3528// PartialSum PartialCarryOutX /
3529// | | /
3530// | ____|____________/
3531// | / |
3532// (uaddo *, *) \________
3533// | \ \
3534// | \ |
3535// | PartialCarryOutY |
3536// | \ |
3537// | \ /
3538// AddCarrySum | ______/
3539// | /
3540// CarryOut = (or *, *)
3541//
3542// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3543//
3544// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3545//
3546// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3547// with a single path for carry/borrow out propagation.
3549 SDValue N0, SDValue N1, SDNode *N) {
3550 SDValue Carry0 = getAsCarry(TLI, N0);
3551 if (!Carry0)
3552 return SDValue();
3553 SDValue Carry1 = getAsCarry(TLI, N1);
3554 if (!Carry1)
3555 return SDValue();
3556
3557 unsigned Opcode = Carry0.getOpcode();
3558 if (Opcode != Carry1.getOpcode())
3559 return SDValue();
3560 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3561 return SDValue();
3562
3563 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3564 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3565 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3566 std::swap(Carry0, Carry1);
3567
3568 // Check if nodes are connected in expected way.
3569 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3570 Carry1.getOperand(1) != Carry0.getValue(0))
3571 return SDValue();
3572
3573 // The carry in value must be on the righthand side for subtraction.
3574 unsigned CarryInOperandNum =
3575 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3576 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3577 return SDValue();
3578 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3579
3580 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3581 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3582 return SDValue();
3583
3584 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3585 CarryIn = getAsCarry(TLI, CarryIn, true);
3586 if (!CarryIn)
3587 return SDValue();
3588
3589 SDLoc DL(N);
3590 SDValue Merged =
3591 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3592 Carry0.getOperand(1), CarryIn);
3593
3594 // Please note that because we have proven that the result of the UADDO/USUBO
3595 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3596 // therefore prove that if the first UADDO/USUBO overflows, the second
3597 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3598 // maximum value.
3599 //
3600 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3601 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3602 //
3603 // This is important because it means that OR and XOR can be used to merge
3604 // carry flags; and that AND can return a constant zero.
3605 //
3606 // TODO: match other operations that can merge flags (ADD, etc)
3607 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3608 if (N->getOpcode() == ISD::AND)
3609 return DAG.getConstant(0, DL, MVT::i1);
3610 return Merged.getValue(1);
3611}
3612
3613SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3614 SDValue CarryIn, SDNode *N) {
3615 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3616 // carry.
3617 if (isBitwiseNot(N0))
3618 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3619 SDLoc DL(N);
3620 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3621 N0.getOperand(0), NotC);
3622 return CombineTo(
3623 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3624 }
3625
3626 // Iff the flag result is dead:
3627 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3628 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3629 // or the dependency between the instructions.
3630 if ((N0.getOpcode() == ISD::ADD ||
3631 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3632 N0.getValue(1) != CarryIn)) &&
3633 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3634 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3635 N0.getOperand(0), N0.getOperand(1), CarryIn);
3636
3637 /**
3638 * When one of the uaddo_carry argument is itself a carry, we may be facing
3639 * a diamond carry propagation. In which case we try to transform the DAG
3640 * to ensure linear carry propagation if that is possible.
3641 */
3642 if (auto Y = getAsCarry(TLI, N1)) {
3643 // Because both are carries, Y and Z can be swapped.
3644 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3645 return R;
3646 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3647 return R;
3648 }
3649
3650 return SDValue();
3651}
3652
3653SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3654 SDValue CarryIn, SDNode *N) {
3655 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3656 if (isBitwiseNot(N0)) {
3657 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3658 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3659 N0.getOperand(0), NotC);
3660 }
3661
3662 return SDValue();
3663}
3664
3665SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3666 SDValue N0 = N->getOperand(0);
3667 SDValue N1 = N->getOperand(1);
3668 SDValue CarryIn = N->getOperand(2);
3669 SDLoc DL(N);
3670
3671 // canonicalize constant to RHS
3672 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3673 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3674 if (N0C && !N1C)
3675 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3676
3677 // fold (saddo_carry x, y, false) -> (saddo x, y)
3678 if (isNullConstant(CarryIn)) {
3679 if (!LegalOperations ||
3680 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3681 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3682 }
3683
3684 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3685 return Combined;
3686
3687 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3688 return Combined;
3689
3690 return SDValue();
3691}
3692
3693// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3694// clamp/truncation if necessary.
3695static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3696 SDValue RHS, SelectionDAG &DAG,
3697 const SDLoc &DL) {
3698 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3699 "Illegal truncation");
3700
3701 if (DstVT == SrcVT)
3702 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3703
3704 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3705 // clamping RHS.
3707 DstVT.getScalarSizeInBits());
3708 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3709 return SDValue();
3710
3711 SDValue SatLimit =
3713 DstVT.getScalarSizeInBits()),
3714 DL, SrcVT);
3715 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3716 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3717 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3718 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3719}
3720
3721// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3722// usubsat(a,b), optionally as a truncated type.
3723SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3724 if (N->getOpcode() != ISD::SUB ||
3725 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3726 return SDValue();
3727
3728 EVT SubVT = N->getValueType(0);
3729 SDValue Op0 = N->getOperand(0);
3730 SDValue Op1 = N->getOperand(1);
3731
3732 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3733 // they may be converted to usubsat(a,b).
3734 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3735 SDValue MaxLHS = Op0.getOperand(0);
3736 SDValue MaxRHS = Op0.getOperand(1);
3737 if (MaxLHS == Op1)
3738 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3739 if (MaxRHS == Op1)
3740 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3741 }
3742
3743 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3744 SDValue MinLHS = Op1.getOperand(0);
3745 SDValue MinRHS = Op1.getOperand(1);
3746 if (MinLHS == Op0)
3747 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3748 if (MinRHS == Op0)
3749 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3750 }
3751
3752 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3753 if (Op1.getOpcode() == ISD::TRUNCATE &&
3754 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3755 Op1.getOperand(0).hasOneUse()) {
3756 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3757 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3758 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3759 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3760 DAG, SDLoc(N));
3761 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3762 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3763 DAG, SDLoc(N));
3764 }
3765
3766 return SDValue();
3767}
3768
3769// Since it may not be valid to emit a fold to zero for vector initializers
3770// check if we can before folding.
3771static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3772 SelectionDAG &DAG, bool LegalOperations) {
3773 if (!VT.isVector())
3774 return DAG.getConstant(0, DL, VT);
3775 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3776 return DAG.getConstant(0, DL, VT);
3777 return SDValue();
3778}
3779
3780SDValue DAGCombiner::visitSUB(SDNode *N) {
3781 SDValue N0 = N->getOperand(0);
3782 SDValue N1 = N->getOperand(1);
3783 EVT VT = N0.getValueType();
3784 SDLoc DL(N);
3785
3786 auto PeekThroughFreeze = [](SDValue N) {
3787 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3788 return N->getOperand(0);
3789 return N;
3790 };
3791
3792 // fold (sub x, x) -> 0
3793 // FIXME: Refactor this and xor and other similar operations together.
3794 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3795 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3796
3797 // fold (sub c1, c2) -> c3
3798 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3799 return C;
3800
3801 // fold vector ops
3802 if (VT.isVector()) {
3803 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3804 return FoldedVOp;
3805
3806 // fold (sub x, 0) -> x, vector edition
3808 return N0;
3809 }
3810
3811 if (SDValue NewSel = foldBinOpIntoSelect(N))
3812 return NewSel;
3813
3815
3816 // fold (sub x, c) -> (add x, -c)
3817 if (N1C) {
3818 return DAG.getNode(ISD::ADD, DL, VT, N0,
3819 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3820 }
3821
3822 if (isNullOrNullSplat(N0)) {
3823 unsigned BitWidth = VT.getScalarSizeInBits();
3824 // Right-shifting everything out but the sign bit followed by negation is
3825 // the same as flipping arithmetic/logical shift type without the negation:
3826 // -(X >>u 31) -> (X >>s 31)
3827 // -(X >>s 31) -> (X >>u 31)
3828 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3830 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3831 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3832 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3833 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3834 }
3835 }
3836
3837 // 0 - X --> 0 if the sub is NUW.
3838 if (N->getFlags().hasNoUnsignedWrap())
3839 return N0;
3840
3842 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3843 // N1 must be 0 because negating the minimum signed value is undefined.
3844 if (N->getFlags().hasNoSignedWrap())
3845 return N0;
3846
3847 // 0 - X --> X if X is 0 or the minimum signed value.
3848 return N1;
3849 }
3850
3851 // Convert 0 - abs(x).
3852 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3854 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3855 return Result;
3856
3857 // Fold neg(splat(neg(x)) -> splat(x)
3858 if (VT.isVector()) {
3859 SDValue N1S = DAG.getSplatValue(N1, true);
3860 if (N1S && N1S.getOpcode() == ISD::SUB &&
3861 isNullConstant(N1S.getOperand(0)))
3862 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3863 }
3864 }
3865
3866 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3868 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3869
3870 // fold (A - (0-B)) -> A+B
3871 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3872 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3873
3874 // fold A-(A-B) -> B
3875 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3876 return N1.getOperand(1);
3877
3878 // fold (A+B)-A -> B
3879 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3880 return N0.getOperand(1);
3881
3882 // fold (A+B)-B -> A
3883 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3884 return N0.getOperand(0);
3885
3886 // fold (A+C1)-C2 -> A+(C1-C2)
3887 if (N0.getOpcode() == ISD::ADD) {
3888 SDValue N01 = N0.getOperand(1);
3889 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3890 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3891 }
3892
3893 // fold C2-(A+C1) -> (C2-C1)-A
3894 if (N1.getOpcode() == ISD::ADD) {
3895 SDValue N11 = N1.getOperand(1);
3896 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3897 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3898 }
3899
3900 // fold (A-C1)-C2 -> A-(C1+C2)
3901 if (N0.getOpcode() == ISD::SUB) {
3902 SDValue N01 = N0.getOperand(1);
3903 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3904 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3905 }
3906
3907 // fold (c1-A)-c2 -> (c1-c2)-A
3908 if (N0.getOpcode() == ISD::SUB) {
3909 SDValue N00 = N0.getOperand(0);
3910 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3911 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3912 }
3913
3914 // fold ((A+(B+or-C))-B) -> A+or-C
3915 if (N0.getOpcode() == ISD::ADD &&
3916 (N0.getOperand(1).getOpcode() == ISD::SUB ||
3917 N0.getOperand(1).getOpcode() == ISD::ADD) &&
3918 N0.getOperand(1).getOperand(0) == N1)
3919 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3920 N0.getOperand(1).getOperand(1));
3921
3922 // fold ((A+(C+B))-B) -> A+C
3923 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3924 N0.getOperand(1).getOperand(1) == N1)
3925 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3926 N0.getOperand(1).getOperand(0));
3927
3928 // fold ((A-(B-C))-C) -> A-B
3929 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3930 N0.getOperand(1).getOperand(1) == N1)
3931 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3932 N0.getOperand(1).getOperand(0));
3933
3934 // fold (A-(B-C)) -> A+(C-B)
3935 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3936 return DAG.getNode(ISD::ADD, DL, VT, N0,
3937 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3938 N1.getOperand(0)));
3939
3940 // A - (A & B) -> A & (~B)
3941 if (N1.getOpcode() == ISD::AND) {
3942 SDValue A = N1.getOperand(0);
3943 SDValue B = N1.getOperand(1);
3944 if (A != N0)
3945 std::swap(A, B);
3946 if (A == N0 &&
3947 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3948 SDValue InvB =
3949 DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3950 return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3951 }
3952 }
3953
3954 // fold (X - (-Y * Z)) -> (X + (Y * Z))
3955 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3956 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3958 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3959 N1.getOperand(0).getOperand(1),
3960 N1.getOperand(1));
3961 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3962 }
3963 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3965 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3966 N1.getOperand(0),
3967 N1.getOperand(1).getOperand(1));
3968 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3969 }
3970 }
3971
3972 // If either operand of a sub is undef, the result is undef
3973 if (N0.isUndef())
3974 return N0;
3975 if (N1.isUndef())
3976 return N1;
3977
3978 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3979 return V;
3980
3981 if (SDValue V = foldAddSubOfSignBit(N, DAG))
3982 return V;
3983
3984 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3985 return V;
3986
3987 if (SDValue V = foldSubToUSubSat(VT, N))
3988 return V;
3989
3990 // (x - y) - 1 -> add (xor y, -1), x
3991 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) {
3992 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3993 DAG.getAllOnesConstant(DL, VT));
3994 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3995 }
3996
3997 // Look for:
3998 // sub y, (xor x, -1)
3999 // And if the target does not like this form then turn into:
4000 // add (add x, y), 1
4001 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4002 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4003 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4004 }
4005
4006 // Hoist one-use addition by non-opaque constant:
4007 // (x + C) - y -> (x - y) + C
4008 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4009 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4010 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4011 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4012 }
4013 // y - (x + C) -> (y - x) - C
4014 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4015 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4016 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4017 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4018 }
4019 // (x - C) - y -> (x - y) - C
4020 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4021 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4022 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4023 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4024 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4025 }
4026 // (C - x) - y -> C - (x + y)
4027 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4028 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4029 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4030 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4031 }
4032
4033 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4034 // rather than 'sub 0/1' (the sext should get folded).
4035 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4036 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4037 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4038 TLI.getBooleanContents(VT) ==
4040 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4041 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4042 }
4043
4044 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
4045 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
4046 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
4047 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
4048 SDValue S0 = N1.getOperand(0);
4049 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
4051 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
4052 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
4053 }
4054 }
4055
4056 // If the relocation model supports it, consider symbol offsets.
4057 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4058 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4059 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4060 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4061 if (GA->getGlobal() == GB->getGlobal())
4062 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4063 DL, VT);
4064 }
4065
4066 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4067 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4068 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4069 if (TN->getVT() == MVT::i1) {
4070 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4071 DAG.getConstant(1, DL, VT));
4072 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4073 }
4074 }
4075
4076 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4077 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4078 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4079 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4080 }
4081
4082 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4083 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4084 APInt NewStep = -N1.getConstantOperandAPInt(0);
4085 return DAG.getNode(ISD::ADD, DL, VT, N0,
4086 DAG.getStepVector(DL, VT, NewStep));
4087 }
4088
4089 // Prefer an add for more folding potential and possibly better codegen:
4090 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4091 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4092 SDValue ShAmt = N1.getOperand(1);
4093 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4094 if (ShAmtC &&
4095 ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
4096 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4097 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4098 }
4099 }
4100
4101 // As with the previous fold, prefer add for more folding potential.
4102 // Subtracting SMIN/0 is the same as adding SMIN/0:
4103 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4104 if (N1.getOpcode() == ISD::SHL) {
4106 if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
4107 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4108 }
4109
4110 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4111 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4112 N0.getResNo() == 0 && N0.hasOneUse())
4113 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4114 N0.getOperand(0), N1, N0.getOperand(2));
4115
4117 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4118 if (SDValue Carry = getAsCarry(TLI, N0)) {
4119 SDValue X = N1;
4120 SDValue Zero = DAG.getConstant(0, DL, VT);
4121 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4122 return DAG.getNode(ISD::UADDO_CARRY, DL,
4123 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4124 Carry);
4125 }
4126 }
4127
4128 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4129 // sub C0, X --> xor X, C0
4130 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4131 if (!C0->isOpaque()) {
4132 const APInt &C0Val = C0->getAPIntValue();
4133 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4134 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4135 return DAG.