LLVM 23.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/APSInt.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SetVector.h"
28#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
52#include "llvm/IR/Attributes.h"
53#include "llvm/IR/Constant.h"
54#include "llvm/IR/DataLayout.h"
56#include "llvm/IR/Function.h"
57#include "llvm/IR/Metadata.h"
62#include "llvm/Support/Debug.h"
70#include <algorithm>
71#include <cassert>
72#include <cstdint>
73#include <functional>
74#include <iterator>
75#include <optional>
76#include <string>
77#include <tuple>
78#include <utility>
79#include <variant>
80
81#include "MatchContext.h"
82
83using namespace llvm;
84using namespace llvm::SDPatternMatch;
85
86#define DEBUG_TYPE "dagcombine"
87
88STATISTIC(NodesCombined , "Number of dag nodes combined");
89STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
90STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
91STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
92STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
93STATISTIC(SlicedLoads, "Number of load sliced");
94STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
95
96DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
97 "Controls whether a DAG combine is performed for a node");
98
99static cl::opt<bool>
100CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
101 cl::desc("Enable DAG combiner's use of IR alias analysis"));
102
103static cl::opt<bool>
104UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
105 cl::desc("Enable DAG combiner's use of TBAA"));
106
107#ifndef NDEBUG
109CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
110 cl::desc("Only use DAG-combiner alias analysis in this"
111 " function"));
112#endif
113
114/// Hidden option to stress test load slicing, i.e., when this option
115/// is enabled, load slicing bypasses most of its profitability guards.
116static cl::opt<bool>
117StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
118 cl::desc("Bypass the profitability model of load slicing"),
119 cl::init(false));
120
121static cl::opt<bool>
122 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
123 cl::desc("DAG combiner may split indexing from loads"));
124
125static cl::opt<bool>
126 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
127 cl::desc("DAG combiner enable merging multiple stores "
128 "into a wider store"));
129
131 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
132 cl::desc("Limit the number of operands to inline for Token Factors"));
133
135 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
136 cl::desc("Limit the number of times for the same StoreNode and RootNode "
137 "to bail out in store merging dependence check"));
138
140 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
141 cl::desc("DAG combiner enable reducing the width of load/op/store "
142 "sequence"));
144 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
145 cl::Hidden, cl::init(false),
146 cl::desc("DAG combiner force override the narrowing profitable check when "
147 "reducing the width of load/op/store sequences"));
148
150 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
151 cl::desc("DAG combiner enable load/<replace bytes>/store with "
152 "a narrower store"));
153
154static cl::opt<bool> DisableCombines("combiner-disabled", cl::Hidden,
155 cl::init(false),
156 cl::desc("Disable the DAG combiner"));
157
158namespace {
159
160 class DAGCombiner {
161 SelectionDAG &DAG;
162 const TargetLowering &TLI;
163 const SelectionDAGTargetInfo *STI;
165 CodeGenOptLevel OptLevel;
166 bool LegalDAG = false;
167 bool LegalOperations = false;
168 bool LegalTypes = false;
169 bool ForCodeSize;
170 bool DisableGenericCombines;
171
172 /// Worklist of all of the nodes that need to be simplified.
173 ///
174 /// This must behave as a stack -- new nodes to process are pushed onto the
175 /// back and when processing we pop off of the back.
176 ///
177 /// The worklist will not contain duplicates but may contain null entries
178 /// due to nodes being deleted from the underlying DAG. For fast lookup and
179 /// deduplication, the index of the node in this vector is stored in the
180 /// node in SDNode::CombinerWorklistIndex.
182
183 /// This records all nodes attempted to be added to the worklist since we
184 /// considered a new worklist entry. As we keep do not add duplicate nodes
185 /// in the worklist, this is different from the tail of the worklist.
187
188 /// Map from candidate StoreNode to the pair of RootNode and count.
189 /// The count is used to track how many times we have seen the StoreNode
190 /// with the same RootNode bail out in dependence check. If we have seen
191 /// the bail out for the same pair many times over a limit, we won't
192 /// consider the StoreNode with the same RootNode as store merging
193 /// candidate again.
195
196 // BatchAA - Used for DAG load/store alias analysis.
197 BatchAAResults *BatchAA;
198
199 /// This caches all chains that have already been processed in
200 /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
201 /// stores candidates.
202 SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
203
204 /// When an instruction is simplified, add all users of the instruction to
205 /// the work lists because they might get more simplified now.
206 void AddUsersToWorklist(SDNode *N) {
207 for (SDNode *Node : N->users())
208 AddToWorklist(Node);
209 }
210
211 /// Convenient shorthand to add a node and all of its user to the worklist.
212 void AddToWorklistWithUsers(SDNode *N) {
213 AddUsersToWorklist(N);
214 AddToWorklist(N);
215 }
216
217 // Prune potentially dangling nodes. This is called after
218 // any visit to a node, but should also be called during a visit after any
219 // failed combine which may have created a DAG node.
220 void clearAddedDanglingWorklistEntries() {
221 // Check any nodes added to the worklist to see if they are prunable.
222 while (!PruningList.empty()) {
223 auto *N = PruningList.pop_back_val();
224 if (N->use_empty())
225 recursivelyDeleteUnusedNodes(N);
226 }
227 }
228
229 SDNode *getNextWorklistEntry() {
230 // Before we do any work, remove nodes that are not in use.
231 clearAddedDanglingWorklistEntries();
232 SDNode *N = nullptr;
233 // The Worklist holds the SDNodes in order, but it may contain null
234 // entries.
235 while (!N && !Worklist.empty()) {
236 N = Worklist.pop_back_val();
237 }
238
239 if (N) {
240 assert(N->getCombinerWorklistIndex() >= 0 &&
241 "Found a worklist entry without a corresponding map entry!");
242 // Set to -2 to indicate that we combined the node.
243 N->setCombinerWorklistIndex(-2);
244 }
245 return N;
246 }
247
248 /// Call the node-specific routine that folds each particular type of node.
249 SDValue visit(SDNode *N);
250
251 public:
252 DAGCombiner(SelectionDAG &D, BatchAAResults *BatchAA, CodeGenOptLevel OL)
253 : DAG(D), TLI(D.getTargetLoweringInfo()),
254 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
255 BatchAA(BatchAA) {
256 ForCodeSize = DAG.shouldOptForSize();
257 DisableGenericCombines =
258 DisableCombines || (STI && STI->disableGenericCombines(OptLevel));
259
260 MaximumLegalStoreInBits = 0;
261 // We use the minimum store size here, since that's all we can guarantee
262 // for the scalable vector types.
263 for (MVT VT : MVT::all_valuetypes())
264 if (EVT(VT).isSimple() && VT != MVT::Other &&
265 TLI.isTypeLegal(EVT(VT)) &&
266 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
267 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
268 }
269
270 void ConsiderForPruning(SDNode *N) {
271 // Mark this for potential pruning.
272 PruningList.insert(N);
273 }
274
275 /// Add to the worklist making sure its instance is at the back (next to be
276 /// processed.)
277 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
278 bool SkipIfCombinedBefore = false) {
279 assert(N->getOpcode() != ISD::DELETED_NODE &&
280 "Deleted Node added to Worklist");
281
282 // Skip handle nodes as they can't usefully be combined and confuse the
283 // zero-use deletion strategy.
284 if (N->getOpcode() == ISD::HANDLENODE)
285 return;
286
287 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
288 return;
289
290 if (IsCandidateForPruning)
291 ConsiderForPruning(N);
292
293 if (N->getCombinerWorklistIndex() < 0) {
294 N->setCombinerWorklistIndex(Worklist.size());
295 Worklist.push_back(N);
296 }
297 }
298
299 /// Remove all instances of N from the worklist.
300 void removeFromWorklist(SDNode *N) {
301 PruningList.remove(N);
302 StoreRootCountMap.erase(N);
303
304 int WorklistIndex = N->getCombinerWorklistIndex();
305 // If not in the worklist, the index might be -1 or -2 (was combined
306 // before). As the node gets deleted anyway, there's no need to update
307 // the index.
308 if (WorklistIndex < 0)
309 return; // Not in the worklist.
310
311 // Null out the entry rather than erasing it to avoid a linear operation.
312 Worklist[WorklistIndex] = nullptr;
313 N->setCombinerWorklistIndex(-1);
314 }
315
316 void deleteAndRecombine(SDNode *N);
317 bool recursivelyDeleteUnusedNodes(SDNode *N);
318
319 /// Replaces all uses of the results of one DAG node with new values.
320 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
321 bool AddTo = true);
322
323 /// Replaces all uses of the results of one DAG node with new values.
324 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
325 return CombineTo(N, &Res, 1, AddTo);
326 }
327
328 /// Replaces all uses of the results of one DAG node with new values.
329 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
330 bool AddTo = true) {
331 SDValue To[] = { Res0, Res1 };
332 return CombineTo(N, To, 2, AddTo);
333 }
334
335 SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To,
336 bool AddTo = true) {
337 return CombineTo(N, To->data(), To->size(), AddTo);
338 }
339
340 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
341
342 private:
343 unsigned MaximumLegalStoreInBits;
344
345 /// Check the specified integer node value to see if it can be simplified or
346 /// if things it uses can be simplified by bit propagation.
347 /// If so, return true.
348 bool SimplifyDemandedBits(SDValue Op) {
349 unsigned BitWidth = Op.getScalarValueSizeInBits();
350 APInt DemandedBits = APInt::getAllOnes(BitWidth);
351 return SimplifyDemandedBits(Op, DemandedBits);
352 }
353
354 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
355 EVT VT = Op.getValueType();
356 APInt DemandedElts = VT.isFixedLengthVector()
358 : APInt(1, 1);
359 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
360 }
361
362 /// Check the specified vector node value to see if it can be simplified or
363 /// if things it uses can be simplified as it only uses some of the
364 /// elements. If so, return true.
365 bool SimplifyDemandedVectorElts(SDValue Op) {
366 // TODO: For now just pretend it cannot be simplified.
367 if (Op.getValueType().isScalableVector())
368 return false;
369
370 unsigned NumElts = Op.getValueType().getVectorNumElements();
371 APInt DemandedElts = APInt::getAllOnes(NumElts);
372 return SimplifyDemandedVectorElts(Op, DemandedElts);
373 }
374
375 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
376 const APInt &DemandedElts,
377 bool AssumeSingleUse = false);
378 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
379 bool AssumeSingleUse = false);
380
381 bool CombineToPreIndexedLoadStore(SDNode *N);
382 bool CombineToPostIndexedLoadStore(SDNode *N);
383 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
384 bool SliceUpLoad(SDNode *N);
385
386 // Looks up the chain to find a unique (unaliased) store feeding the passed
387 // load. If no such store is found, returns a nullptr.
388 // Note: This will look past a CALLSEQ_START if the load is chained to it so
389 // so that it can find stack stores for byval params.
390 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
391 // Scalars have size 0 to distinguish from singleton vectors.
392 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
393 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
394 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
395
396 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
397 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
398 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
399 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
400 SDValue PromoteIntBinOp(SDValue Op);
401 SDValue PromoteIntShiftOp(SDValue Op);
402 SDValue PromoteExtend(SDValue Op);
403 bool PromoteLoad(SDValue Op);
404
405 SDValue foldShiftToAvg(SDNode *N, const SDLoc &DL);
406 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
407 SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT);
408
409 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
410 SDValue RHS, SDValue True, SDValue False,
411 ISD::CondCode CC);
412
413 /// Call the node-specific routine that knows how to fold each
414 /// particular type of node. If that doesn't do anything, try the
415 /// target-specific DAG combines.
416 SDValue combine(SDNode *N);
417
418 // Visitation implementation - Implement dag node combining for different
419 // node types. The semantics are as follows:
420 // Return Value:
421 // SDValue.getNode() == 0 - No change was made
422 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
423 // otherwise - N should be replaced by the returned Operand.
424 //
425 SDValue visitTokenFactor(SDNode *N);
426 SDValue visitMERGE_VALUES(SDNode *N);
427 SDValue visitADD(SDNode *N);
428 SDValue visitADDLike(SDNode *N);
429 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
430 SDNode *LocReference);
431 SDValue visitPTRADD(SDNode *N);
432 SDValue visitSUB(SDNode *N);
433 SDValue visitADDSAT(SDNode *N);
434 SDValue visitSUBSAT(SDNode *N);
435 SDValue visitADDC(SDNode *N);
436 SDValue visitADDO(SDNode *N);
437 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
438 SDValue visitSUBC(SDNode *N);
439 SDValue visitSUBO(SDNode *N);
440 SDValue visitADDE(SDNode *N);
441 SDValue visitUADDO_CARRY(SDNode *N);
442 SDValue visitSADDO_CARRY(SDNode *N);
443 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
444 SDNode *N);
445 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
446 SDNode *N);
447 SDValue visitSUBE(SDNode *N);
448 SDValue visitUSUBO_CARRY(SDNode *N);
449 SDValue visitSSUBO_CARRY(SDNode *N);
450 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
451 SDValue visitMULFIX(SDNode *N);
452 SDValue useDivRem(SDNode *N);
453 SDValue visitSDIV(SDNode *N);
454 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
455 SDValue visitUDIV(SDNode *N);
456 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
457 SDValue visitREM(SDNode *N);
458 SDValue visitMULHU(SDNode *N);
459 SDValue visitMULHS(SDNode *N);
460 SDValue visitAVG(SDNode *N);
461 SDValue visitABD(SDNode *N);
462 SDValue visitSMUL_LOHI(SDNode *N);
463 SDValue visitUMUL_LOHI(SDNode *N);
464 SDValue visitMULO(SDNode *N);
465 SDValue visitIMINMAX(SDNode *N);
466 SDValue visitAND(SDNode *N);
467 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
468 SDValue visitOR(SDNode *N);
469 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
470 SDValue visitXOR(SDNode *N);
471 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
472 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
473 SDValue visitSHL(SDNode *N);
474 SDValue visitSRA(SDNode *N);
475 SDValue visitSRL(SDNode *N);
476 SDValue visitFunnelShift(SDNode *N);
477 SDValue visitSHLSAT(SDNode *N);
478 SDValue visitRotate(SDNode *N);
479 SDValue visitABS(SDNode *N);
480 SDValue visitBSWAP(SDNode *N);
481 SDValue visitBITREVERSE(SDNode *N);
482 SDValue visitCTLZ(SDNode *N);
483 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
484 SDValue visitCTTZ(SDNode *N);
485 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
486 SDValue visitCTPOP(SDNode *N);
487 SDValue visitSELECT(SDNode *N);
488 SDValue visitVSELECT(SDNode *N);
489 SDValue visitVP_SELECT(SDNode *N);
490 SDValue visitSELECT_CC(SDNode *N);
491 SDValue visitSETCC(SDNode *N);
492 SDValue visitSETCCCARRY(SDNode *N);
493 SDValue visitSIGN_EXTEND(SDNode *N);
494 SDValue visitZERO_EXTEND(SDNode *N);
495 SDValue visitANY_EXTEND(SDNode *N);
496 SDValue visitAssertExt(SDNode *N);
497 SDValue visitAssertAlign(SDNode *N);
498 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
499 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
500 SDValue visitTRUNCATE(SDNode *N);
501 SDValue visitTRUNCATE_USAT_U(SDNode *N);
502 SDValue visitBITCAST(SDNode *N);
503 SDValue visitFREEZE(SDNode *N);
504 SDValue visitBUILD_PAIR(SDNode *N);
505 SDValue visitFADD(SDNode *N);
506 SDValue visitVP_FADD(SDNode *N);
507 SDValue visitVP_FSUB(SDNode *N);
508 SDValue visitSTRICT_FADD(SDNode *N);
509 SDValue visitFSUB(SDNode *N);
510 SDValue visitFMUL(SDNode *N);
511 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
512 SDValue visitFMAD(SDNode *N);
513 SDValue visitFMULADD(SDNode *N);
514 SDValue visitFDIV(SDNode *N);
515 SDValue visitFREM(SDNode *N);
516 SDValue visitFSQRT(SDNode *N);
517 SDValue visitFCOPYSIGN(SDNode *N);
518 SDValue visitFPOW(SDNode *N);
519 SDValue visitFCANONICALIZE(SDNode *N);
520 SDValue visitSINT_TO_FP(SDNode *N);
521 SDValue visitUINT_TO_FP(SDNode *N);
522 SDValue visitFP_TO_SINT(SDNode *N);
523 SDValue visitFP_TO_UINT(SDNode *N);
524 SDValue visitXROUND(SDNode *N);
525 SDValue visitFP_ROUND(SDNode *N);
526 SDValue visitFP_EXTEND(SDNode *N);
527 SDValue visitFNEG(SDNode *N);
528 SDValue visitFABS(SDNode *N);
529 SDValue visitFCEIL(SDNode *N);
530 SDValue visitFTRUNC(SDNode *N);
531 SDValue visitFFREXP(SDNode *N);
532 SDValue visitFFLOOR(SDNode *N);
533 SDValue visitFMinMax(SDNode *N);
534 SDValue visitBRCOND(SDNode *N);
535 SDValue visitBR_CC(SDNode *N);
536 SDValue visitLOAD(SDNode *N);
537
538 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
539 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
540 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
541
542 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
543
544 SDValue visitSTORE(SDNode *N);
545 SDValue visitATOMIC_STORE(SDNode *N);
546 SDValue visitLIFETIME_END(SDNode *N);
547 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
548 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
549 SDValue visitBUILD_VECTOR(SDNode *N);
550 SDValue visitCONCAT_VECTORS(SDNode *N);
551 SDValue visitVECTOR_INTERLEAVE(SDNode *N);
552 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
553 SDValue visitVECTOR_SHUFFLE(SDNode *N);
554 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
555 SDValue visitINSERT_SUBVECTOR(SDNode *N);
556 SDValue visitVECTOR_COMPRESS(SDNode *N);
557 SDValue visitMLOAD(SDNode *N);
558 SDValue visitMSTORE(SDNode *N);
559 SDValue visitMGATHER(SDNode *N);
560 SDValue visitMSCATTER(SDNode *N);
561 SDValue visitMHISTOGRAM(SDNode *N);
562 SDValue visitPARTIAL_REDUCE_MLA(SDNode *N);
563 SDValue visitVPGATHER(SDNode *N);
564 SDValue visitVPSCATTER(SDNode *N);
565 SDValue visitVP_STRIDED_LOAD(SDNode *N);
566 SDValue visitVP_STRIDED_STORE(SDNode *N);
567 SDValue visitFP_TO_FP16(SDNode *N);
568 SDValue visitFP16_TO_FP(SDNode *N);
569 SDValue visitFP_TO_BF16(SDNode *N);
570 SDValue visitBF16_TO_FP(SDNode *N);
571 SDValue visitVECREDUCE(SDNode *N);
572 SDValue visitVPOp(SDNode *N);
573 SDValue visitGET_FPENV_MEM(SDNode *N);
574 SDValue visitSET_FPENV_MEM(SDNode *N);
575
576 template <class MatchContextClass>
577 SDValue visitFADDForFMACombine(SDNode *N);
578 template <class MatchContextClass>
579 SDValue visitFSUBForFMACombine(SDNode *N);
580 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
581
582 SDValue XformToShuffleWithZero(SDNode *N);
583 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
584 const SDLoc &DL,
585 SDNode *N,
586 SDValue N0,
587 SDValue N1);
588 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
589 SDValue N1, SDNodeFlags Flags);
590 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
591 SDValue N1, SDNodeFlags Flags);
592 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
593 EVT VT, SDValue N0, SDValue N1,
594 SDNodeFlags Flags = SDNodeFlags());
595
596 SDValue visitShiftByConstant(SDNode *N);
597
598 SDValue foldSelectOfConstants(SDNode *N);
599 SDValue foldVSelectOfConstants(SDNode *N);
600 SDValue foldBinOpIntoSelect(SDNode *BO);
601 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
602 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
603 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
604 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
605 SDValue N2, SDValue N3, ISD::CondCode CC,
606 bool NotExtCompare = false);
607 SDValue convertSelectOfFPConstantsToLoadOffset(
608 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
609 ISD::CondCode CC);
610 SDValue foldSignChangeInBitcast(SDNode *N);
611 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
612 SDValue N2, SDValue N3, ISD::CondCode CC);
613 SDValue foldSelectOfBinops(SDNode *N);
614 SDValue foldSextSetcc(SDNode *N);
615 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
616 const SDLoc &DL);
617 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
618 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
619 SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
620 SDValue False, ISD::CondCode CC, const SDLoc &DL);
621 SDValue foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
622 SDValue False, ISD::CondCode CC, const SDLoc &DL);
623 SDValue unfoldMaskedMerge(SDNode *N);
624 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
625 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
626 const SDLoc &DL, bool foldBooleans);
627 SDValue rebuildSetCC(SDValue N);
628
629 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
630 SDValue &CC, bool MatchStrict = false) const;
631 bool isOneUseSetCC(SDValue N) const;
632
633 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
634 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
635
636 SDValue foldCTLZToCTLS(SDValue Src, const SDLoc &DL);
637
638 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
639 unsigned HiOp);
640 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
641 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
642 const TargetLowering &TLI);
643 SDValue foldPartialReduceMLAMulOp(SDNode *N);
644 SDValue foldPartialReduceAdd(SDNode *N);
645
646 SDValue CombineExtLoad(SDNode *N);
647 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
648 SDValue combineRepeatedFPDivisors(SDNode *N);
649 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
650 SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf);
651 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
652 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
653 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
654 SDValue BuildSDIV(SDNode *N);
655 SDValue BuildSDIVPow2(SDNode *N);
656 SDValue BuildUDIV(SDNode *N);
657 SDValue BuildSREMPow2(SDNode *N);
658 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
659 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
660 bool KnownNeverZero = false,
661 bool InexpensiveOnly = false,
662 std::optional<EVT> OutVT = std::nullopt);
663 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
664 SDValue buildRsqrtEstimate(SDValue Op);
665 SDValue buildSqrtEstimate(SDValue Op);
666 SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip);
667 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
668 bool Reciprocal);
669 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
670 bool Reciprocal);
671 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
672 bool DemandHighBits = true);
673 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
674 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
675 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
676 bool HasPos, unsigned PosOpcode,
677 unsigned NegOpcode, const SDLoc &DL);
678 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
679 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
680 bool HasPos, unsigned PosOpcode,
681 unsigned NegOpcode, const SDLoc &DL);
682 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
683 bool FromAdd);
684 SDValue MatchLoadCombine(SDNode *N);
685 SDValue mergeTruncStores(StoreSDNode *N);
686 SDValue reduceLoadWidth(SDNode *N);
687 SDValue ReduceLoadOpStoreWidth(SDNode *N);
688 SDValue splitMergedValStore(StoreSDNode *ST);
689 SDValue TransformFPLoadStorePair(SDNode *N);
690 SDValue convertBuildVecZextToZext(SDNode *N);
691 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
692 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
693 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
694 SDValue reduceBuildVecToShuffle(SDNode *N);
695 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
696 ArrayRef<int> VectorMask, SDValue VecIn1,
697 SDValue VecIn2, unsigned LeftIdx,
698 bool DidSplitVec);
699 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
700
701 /// Walk up chain skipping non-aliasing memory nodes,
702 /// looking for aliasing nodes and adding them to the Aliases vector.
703 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
704 SmallVectorImpl<SDValue> &Aliases);
705
706 /// Return true if there is any possibility that the two addresses overlap.
707 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
708
709 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
710 /// chain (aliasing node.)
711 SDValue FindBetterChain(SDNode *N, SDValue Chain);
712
713 /// Try to replace a store and any possibly adjacent stores on
714 /// consecutive chains with better chains. Return true only if St is
715 /// replaced.
716 ///
717 /// Notice that other chains may still be replaced even if the function
718 /// returns false.
719 bool findBetterNeighborChains(StoreSDNode *St);
720
721 // Helper for findBetterNeighborChains. Walk up store chain add additional
722 // chained stores that do not overlap and can be parallelized.
723 bool parallelizeChainedStores(StoreSDNode *St);
724
725 /// Holds a pointer to an LSBaseSDNode as well as information on where it
726 /// is located in a sequence of memory operations connected by a chain.
727 struct MemOpLink {
728 // Ptr to the mem node.
729 LSBaseSDNode *MemNode;
730
731 // Offset from the base ptr.
732 int64_t OffsetFromBase;
733
734 MemOpLink(LSBaseSDNode *N, int64_t Offset)
735 : MemNode(N), OffsetFromBase(Offset) {}
736 };
737
738 // Classify the origin of a stored value.
739 enum class StoreSource { Unknown, Constant, Extract, Load };
740 StoreSource getStoreSource(SDValue StoreVal) {
741 switch (StoreVal.getOpcode()) {
742 case ISD::Constant:
743 case ISD::ConstantFP:
744 return StoreSource::Constant;
748 return StoreSource::Constant;
749 return StoreSource::Unknown;
752 return StoreSource::Extract;
753 case ISD::LOAD:
754 return StoreSource::Load;
755 default:
756 return StoreSource::Unknown;
757 }
758 }
759
760 /// This is a helper function for visitMUL to check the profitability
761 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
762 /// MulNode is the original multiply, AddNode is (add x, c1),
763 /// and ConstNode is c2.
764 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
765 SDValue ConstNode);
766
767 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
768 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
769 /// the type of the loaded value to be extended.
770 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
771 EVT LoadResultTy, EVT &ExtVT);
772
773 /// Helper function to calculate whether the given Load/Store can have its
774 /// width reduced to ExtVT.
775 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
776 EVT &MemVT, unsigned ShAmt = 0);
777
778 /// Used by BackwardsPropagateMask to find suitable loads.
779 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
780 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
781 ConstantSDNode *Mask, SDNode *&NodeToMask);
782 /// Attempt to propagate a given AND node back to load leaves so that they
783 /// can be combined into narrow loads.
784 bool BackwardsPropagateMask(SDNode *N);
785
786 /// Helper function for mergeConsecutiveStores which merges the component
787 /// store chains.
788 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
789 unsigned NumStores);
790
791 /// Helper function for mergeConsecutiveStores which checks if all the store
792 /// nodes have the same underlying object. We can still reuse the first
793 /// store's pointer info if all the stores are from the same object.
794 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
795
796 /// This is a helper function for mergeConsecutiveStores. When the source
797 /// elements of the consecutive stores are all constants or all extracted
798 /// vector elements, try to merge them into one larger store introducing
799 /// bitcasts if necessary. \return True if a merged store was created.
800 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
801 EVT MemVT, unsigned NumStores,
802 bool IsConstantSrc, bool UseVector,
803 bool UseTrunc);
804
805 /// This is a helper function for mergeConsecutiveStores. Stores that
806 /// potentially may be merged with St are placed in StoreNodes. On success,
807 /// returns a chain predecessor to all store candidates.
808 SDNode *getStoreMergeCandidates(StoreSDNode *St,
809 SmallVectorImpl<MemOpLink> &StoreNodes);
810
811 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
812 /// have indirect dependency through their operands. RootNode is the
813 /// predecessor to all stores calculated by getStoreMergeCandidates and is
814 /// used to prune the dependency check. \return True if safe to merge.
815 bool checkMergeStoreCandidatesForDependencies(
816 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
817 SDNode *RootNode);
818
819 /// Helper function for tryStoreMergeOfLoads. Checks if the load/store
820 /// chain has a call in it. \return True if a call is found.
821 bool hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld);
822
823 /// This is a helper function for mergeConsecutiveStores. Given a list of
824 /// store candidates, find the first N that are consecutive in memory.
825 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
826 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
827 int64_t ElementSizeBytes) const;
828
829 /// This is a helper function for mergeConsecutiveStores. It is used for
830 /// store chains that are composed entirely of constant values.
831 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
832 unsigned NumConsecutiveStores,
833 EVT MemVT, SDNode *Root, bool AllowVectors);
834
835 /// This is a helper function for mergeConsecutiveStores. It is used for
836 /// store chains that are composed entirely of extracted vector elements.
837 /// When extracting multiple vector elements, try to store them in one
838 /// vector store rather than a sequence of scalar stores.
839 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
840 unsigned NumConsecutiveStores, EVT MemVT,
841 SDNode *Root);
842
843 /// This is a helper function for mergeConsecutiveStores. It is used for
844 /// store chains that are composed entirely of loaded values.
845 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
846 unsigned NumConsecutiveStores, EVT MemVT,
847 SDNode *Root, bool AllowVectors,
848 bool IsNonTemporalStore, bool IsNonTemporalLoad);
849
850 /// Merge consecutive store operations into a wide store.
851 /// This optimization uses wide integers or vectors when possible.
852 /// \return true if stores were merged.
853 bool mergeConsecutiveStores(StoreSDNode *St);
854
855 /// Try to transform a truncation where C is a constant:
856 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
857 ///
858 /// \p N needs to be a truncation and its first operand an AND. Other
859 /// requirements are checked by the function (e.g. that trunc is
860 /// single-use) and if missed an empty SDValue is returned.
861 SDValue distributeTruncateThroughAnd(SDNode *N);
862
863 /// Helper function to determine whether the target supports operation
864 /// given by \p Opcode for type \p VT, that is, whether the operation
865 /// is legal or custom before legalizing operations, and whether is
866 /// legal (but not custom) after legalization.
867 bool hasOperation(unsigned Opcode, EVT VT) {
868 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
869 }
870
871 bool hasUMin(EVT VT) const {
872 auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
873 return (LK.first == TargetLoweringBase::TypeLegal ||
875 TLI.isOperationLegalOrCustom(ISD::UMIN, LK.second);
876 }
877
878 public:
879 /// Runs the dag combiner on all nodes in the work list
880 void Run(CombineLevel AtLevel);
881
882 SelectionDAG &getDAG() const { return DAG; }
883
884 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
885 EVT getShiftAmountTy(EVT LHSTy) {
886 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
887 }
888
889 /// This method returns true if we are running before type legalization or
890 /// if the specified VT is legal.
891 bool isTypeLegal(const EVT &VT) {
892 if (!LegalTypes) return true;
893 return TLI.isTypeLegal(VT);
894 }
895
896 /// Convenience wrapper around TargetLowering::getSetCCResultType
897 EVT getSetCCResultType(EVT VT) const {
898 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
899 }
900
901 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
902 SDValue OrigLoad, SDValue ExtLoad,
903 ISD::NodeType ExtType);
904 };
905
906/// This class is a DAGUpdateListener that removes any deleted
907/// nodes from the worklist.
908class WorklistRemover : public SelectionDAG::DAGUpdateListener {
909 DAGCombiner &DC;
910
911public:
912 explicit WorklistRemover(DAGCombiner &dc)
913 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
914
915 void NodeDeleted(SDNode *N, SDNode *E) override {
916 DC.removeFromWorklist(N);
917 }
918};
919
920class WorklistInserter : public SelectionDAG::DAGUpdateListener {
921 DAGCombiner &DC;
922
923public:
924 explicit WorklistInserter(DAGCombiner &dc)
925 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
926
927 // FIXME: Ideally we could add N to the worklist, but this causes exponential
928 // compile time costs in large DAGs, e.g. Halide.
929 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
930};
931
932} // end anonymous namespace
933
934//===----------------------------------------------------------------------===//
935// TargetLowering::DAGCombinerInfo implementation
936//===----------------------------------------------------------------------===//
937
939 ((DAGCombiner*)DC)->AddToWorklist(N);
940}
941
943CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
944 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
945}
946
948CombineTo(SDNode *N, SDValue Res, bool AddTo) {
949 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
950}
951
953CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
954 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
955}
956
959 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
960}
961
964 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
965}
966
967//===----------------------------------------------------------------------===//
968// Helper Functions
969//===----------------------------------------------------------------------===//
970
971void DAGCombiner::deleteAndRecombine(SDNode *N) {
972 removeFromWorklist(N);
973
974 // If the operands of this node are only used by the node, they will now be
975 // dead. Make sure to re-visit them and recursively delete dead nodes.
976 for (const SDValue &Op : N->ops())
977 // For an operand generating multiple values, one of the values may
978 // become dead allowing further simplification (e.g. split index
979 // arithmetic from an indexed load).
980 if (Op->hasOneUse() || Op->getNumValues() > 1)
981 AddToWorklist(Op.getNode());
982
983 DAG.DeleteNode(N);
984}
985
986// APInts must be the same size for most operations, this helper
987// function zero extends the shorter of the pair so that they match.
988// We provide an Offset so that we can create bitwidths that won't overflow.
989static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
990 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
991 LHS = LHS.zext(Bits);
992 RHS = RHS.zext(Bits);
993}
994
995// Return true if this node is a setcc, or is a select_cc
996// that selects between the target values used for true and false, making it
997// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
998// the appropriate nodes based on the type of node we are checking. This
999// simplifies life a bit for the callers.
1000bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
1001 SDValue &CC, bool MatchStrict) const {
1002 if (N.getOpcode() == ISD::SETCC) {
1003 LHS = N.getOperand(0);
1004 RHS = N.getOperand(1);
1005 CC = N.getOperand(2);
1006 return true;
1007 }
1008
1009 if (MatchStrict &&
1010 (N.getOpcode() == ISD::STRICT_FSETCC ||
1011 N.getOpcode() == ISD::STRICT_FSETCCS)) {
1012 LHS = N.getOperand(1);
1013 RHS = N.getOperand(2);
1014 CC = N.getOperand(3);
1015 return true;
1016 }
1017
1018 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
1019 !TLI.isConstFalseVal(N.getOperand(3)))
1020 return false;
1021
1022 if (TLI.getBooleanContents(N.getValueType()) ==
1024 return false;
1025
1026 LHS = N.getOperand(0);
1027 RHS = N.getOperand(1);
1028 CC = N.getOperand(4);
1029 return true;
1030}
1031
1032/// Return true if this is a SetCC-equivalent operation with only one use.
1033/// If this is true, it allows the users to invert the operation for free when
1034/// it is profitable to do so.
1035bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1036 SDValue N0, N1, N2;
1037 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1038 return true;
1039 return false;
1040}
1041
1043 if (!ScalarTy.isSimple())
1044 return false;
1045
1046 uint64_t MaskForTy = 0ULL;
1047 switch (ScalarTy.getSimpleVT().SimpleTy) {
1048 case MVT::i8:
1049 MaskForTy = 0xFFULL;
1050 break;
1051 case MVT::i16:
1052 MaskForTy = 0xFFFFULL;
1053 break;
1054 case MVT::i32:
1055 MaskForTy = 0xFFFFFFFFULL;
1056 break;
1057 default:
1058 return false;
1059 break;
1060 }
1061
1062 APInt Val;
1063 if (ISD::isConstantSplatVector(N, Val))
1064 return Val.getLimitedValue() == MaskForTy;
1065
1066 return false;
1067}
1068
1069// Determines if it is a constant integer or a splat/build vector of constant
1070// integers (and undefs).
1071// Do not permit build vector implicit truncation unless AllowTruncation is set.
1072static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
1073 bool AllowTruncation = false) {
1075 return !(Const->isOpaque() && NoOpaques);
1076 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1077 return false;
1078 unsigned BitWidth = N.getScalarValueSizeInBits();
1079 for (const SDValue &Op : N->op_values()) {
1080 if (Op.isUndef())
1081 continue;
1083 if (!Const || (Const->isOpaque() && NoOpaques))
1084 return false;
1085 // When AllowTruncation is true, allow constants that have been promoted
1086 // during type legalization as long as the value fits in the target type.
1087 if ((AllowTruncation &&
1088 Const->getAPIntValue().getActiveBits() > BitWidth) ||
1089 (!AllowTruncation && Const->getAPIntValue().getBitWidth() != BitWidth))
1090 return false;
1091 }
1092 return true;
1093}
1094
1095// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1096// undef's.
1097static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1098 if (V.getOpcode() != ISD::BUILD_VECTOR)
1099 return false;
1100 return isConstantOrConstantVector(V, NoOpaques) ||
1102}
1103
1104// Determine if this an indexed load with an opaque target constant index.
1105static bool canSplitIdx(LoadSDNode *LD) {
1106 return MaySplitLoadIndex &&
1107 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1108 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1109}
1110
1111bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1112 const SDLoc &DL,
1113 SDNode *N,
1114 SDValue N0,
1115 SDValue N1) {
1116 // Currently this only tries to ensure we don't undo the GEP splits done by
1117 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1118 // we check if the following transformation would be problematic:
1119 // (load/store (add, (add, x, offset1), offset2)) ->
1120 // (load/store (add, x, offset1+offset2)).
1121
1122 // (load/store (add, (add, x, y), offset2)) ->
1123 // (load/store (add, (add, x, offset2), y)).
1124
1125 if (!N0.isAnyAdd())
1126 return false;
1127
1128 // Check for vscale addressing modes.
1129 // (load/store (add/sub (add x, y), vscale))
1130 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1131 // (load/store (add/sub (add x, y), (mul vscale, C)))
1132 if ((N1.getOpcode() == ISD::VSCALE ||
1133 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1134 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1136 N1.getValueType().getFixedSizeInBits() <= 64) {
1137 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1138 ? N1.getConstantOperandVal(0)
1139 : (N1.getOperand(0).getConstantOperandVal(0) *
1140 (N1.getOpcode() == ISD::SHL
1141 ? (1LL << N1.getConstantOperandVal(1))
1142 : N1.getConstantOperandVal(1)));
1143 if (Opc == ISD::SUB)
1144 ScalableOffset = -ScalableOffset;
1145 if (all_of(N->users(), [&](SDNode *Node) {
1146 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1147 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1148 TargetLoweringBase::AddrMode AM;
1149 AM.HasBaseReg = true;
1150 AM.ScalableOffset = ScalableOffset;
1151 EVT VT = LoadStore->getMemoryVT();
1152 unsigned AS = LoadStore->getAddressSpace();
1153 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1154 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1155 AS);
1156 }
1157 return false;
1158 }))
1159 return true;
1160 }
1161
1162 if (Opc != ISD::ADD && Opc != ISD::PTRADD)
1163 return false;
1164
1165 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1166 if (!C2)
1167 return false;
1168
1169 const APInt &C2APIntVal = C2->getAPIntValue();
1170 if (C2APIntVal.getSignificantBits() > 64)
1171 return false;
1172
1173 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1174 if (N0.hasOneUse())
1175 return false;
1176
1177 const APInt &C1APIntVal = C1->getAPIntValue();
1178 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1179 if (CombinedValueIntVal.getSignificantBits() > 64)
1180 return false;
1181 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1182
1183 for (SDNode *Node : N->users()) {
1184 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1185 // Is x[offset2] already not a legal addressing mode? If so then
1186 // reassociating the constants breaks nothing (we test offset2 because
1187 // that's the one we hope to fold into the load or store).
1188 TargetLoweringBase::AddrMode AM;
1189 AM.HasBaseReg = true;
1190 AM.BaseOffs = C2APIntVal.getSExtValue();
1191 EVT VT = LoadStore->getMemoryVT();
1192 unsigned AS = LoadStore->getAddressSpace();
1193 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1194 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1195 continue;
1196
1197 // Would x[offset1+offset2] still be a legal addressing mode?
1198 AM.BaseOffs = CombinedValue;
1199 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1200 return true;
1201 }
1202 }
1203 } else {
1204 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1205 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1206 return false;
1207
1208 for (SDNode *Node : N->users()) {
1209 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1210 if (!LoadStore)
1211 return false;
1212
1213 // Is x[offset2] a legal addressing mode? If so then
1214 // reassociating the constants breaks address pattern
1215 TargetLoweringBase::AddrMode AM;
1216 AM.HasBaseReg = true;
1217 AM.BaseOffs = C2APIntVal.getSExtValue();
1218 EVT VT = LoadStore->getMemoryVT();
1219 unsigned AS = LoadStore->getAddressSpace();
1220 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1221 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1222 return false;
1223 }
1224 return true;
1225 }
1226
1227 return false;
1228}
1229
1230/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1231/// \p N0 is the same kind of operation as \p Opc.
1232SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1233 SDValue N0, SDValue N1,
1234 SDNodeFlags Flags) {
1235 EVT VT = N0.getValueType();
1236
1237 if (N0.getOpcode() != Opc)
1238 return SDValue();
1239
1240 SDValue N00 = N0.getOperand(0);
1241 SDValue N01 = N0.getOperand(1);
1242
1244 SDNodeFlags NewFlags;
1245 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1246 Flags.hasNoUnsignedWrap())
1247 NewFlags |= SDNodeFlags::NoUnsignedWrap;
1248
1250 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1251 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) {
1252 NewFlags.setDisjoint(Flags.hasDisjoint() &&
1253 N0->getFlags().hasDisjoint());
1254 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1255 }
1256 return SDValue();
1257 }
1258 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1259 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1260 // iff (op x, c1) has one use
1261 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1262 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1263 }
1264 }
1265
1266 // Check for repeated operand logic simplifications.
1267 if (Opc == ISD::AND || Opc == ISD::OR) {
1268 // (N00 & N01) & N00 --> N00 & N01
1269 // (N00 & N01) & N01 --> N00 & N01
1270 // (N00 | N01) | N00 --> N00 | N01
1271 // (N00 | N01) | N01 --> N00 | N01
1272 if (N1 == N00 || N1 == N01)
1273 return N0;
1274 }
1275 if (Opc == ISD::XOR) {
1276 // (N00 ^ N01) ^ N00 --> N01
1277 if (N1 == N00)
1278 return N01;
1279 // (N00 ^ N01) ^ N01 --> N00
1280 if (N1 == N01)
1281 return N00;
1282 }
1283
1284 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1285 if (N1 != N01) {
1286 // Reassociate if (op N00, N1) already exist
1287 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1288 // if Op (Op N00, N1), N01 already exist
1289 // we need to stop reassciate to avoid dead loop
1290 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1291 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1292 }
1293 }
1294
1295 if (N1 != N00) {
1296 // Reassociate if (op N01, N1) already exist
1297 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1298 // if Op (Op N01, N1), N00 already exist
1299 // we need to stop reassciate to avoid dead loop
1300 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1301 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1302 }
1303 }
1304
1305 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1306 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1307 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1308 // comparisons with the same predicate. This enables optimizations as the
1309 // following one:
1310 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1311 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1312 if (Opc == ISD::AND || Opc == ISD::OR) {
1313 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1314 N01->getOpcode() == ISD::SETCC) {
1315 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1316 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1317 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1318 if (CC1 == CC00 && CC1 != CC01) {
1319 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1320 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1321 }
1322 if (CC1 == CC01 && CC1 != CC00) {
1323 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1324 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1325 }
1326 }
1327 }
1328 }
1329
1330 return SDValue();
1331}
1332
1333/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1334/// same kind of operation as \p Opc.
1335SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1336 SDValue N1, SDNodeFlags Flags) {
1337 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1338
1339 // Floating-point reassociation is not allowed without loose FP math.
1340 if (N0.getValueType().isFloatingPoint() ||
1342 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1343 return SDValue();
1344
1345 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1346 return Combined;
1347 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1348 return Combined;
1349 return SDValue();
1350}
1351
1352// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1353// Note that we only expect Flags to be passed from FP operations. For integer
1354// operations they need to be dropped.
1355SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1356 const SDLoc &DL, EVT VT, SDValue N0,
1357 SDValue N1, SDNodeFlags Flags) {
1358 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1359 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1360 N0->hasOneUse() && N1->hasOneUse() &&
1362 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1363 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1364 return DAG.getNode(RedOpc, DL, VT,
1365 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1366 N0.getOperand(0), N1.getOperand(0)));
1367 }
1368
1369 // Reassociate op(op(vecreduce(a), b), op(vecreduce(c), d)) into
1370 // op(vecreduce(op(a, c)), op(b, d)), to combine the reductions into a
1371 // single node.
1372 SDValue A, B, C, D, RedA, RedB;
1373 if (sd_match(N0, m_OneUse(m_c_BinOp(
1374 Opc,
1375 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(A))),
1376 m_Value(RedA)),
1377 m_Value(B)))) &&
1379 Opc,
1380 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(C))),
1381 m_Value(RedB)),
1382 m_Value(D)))) &&
1383 !sd_match(B, m_UnaryOp(RedOpc, m_Value())) &&
1384 !sd_match(D, m_UnaryOp(RedOpc, m_Value())) &&
1385 A.getValueType() == C.getValueType() &&
1386 hasOperation(Opc, A.getValueType()) &&
1387 TLI.shouldReassociateReduction(RedOpc, VT)) {
1388 if ((Opc == ISD::FADD || Opc == ISD::FMUL) &&
1389 (!N0->getFlags().hasAllowReassociation() ||
1391 !RedA->getFlags().hasAllowReassociation() ||
1392 !RedB->getFlags().hasAllowReassociation()))
1393 return SDValue();
1394 SelectionDAG::FlagInserter FlagsInserter(
1395 DAG, Flags & N0->getFlags() & N1->getFlags() & RedA->getFlags() &
1396 RedB->getFlags());
1397 SDValue Op = DAG.getNode(Opc, DL, A.getValueType(), A, C);
1398 SDValue Red = DAG.getNode(RedOpc, DL, VT, Op);
1399 SDValue Op2 = DAG.getNode(Opc, DL, VT, B, D);
1400 return DAG.getNode(Opc, DL, VT, Red, Op2);
1401 }
1402 return SDValue();
1403}
1404
1405SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1406 bool AddTo) {
1407 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1408 ++NodesCombined;
1409 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1410 To[0].dump(&DAG);
1411 dbgs() << " and " << NumTo - 1 << " other values\n");
1412 for (unsigned i = 0, e = NumTo; i != e; ++i)
1413 assert((!To[i].getNode() ||
1414 N->getValueType(i) == To[i].getValueType()) &&
1415 "Cannot combine value to value of different type!");
1416
1417 WorklistRemover DeadNodes(*this);
1418 DAG.ReplaceAllUsesWith(N, To);
1419 if (AddTo) {
1420 // Push the new nodes and any users onto the worklist
1421 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1422 if (To[i].getNode())
1423 AddToWorklistWithUsers(To[i].getNode());
1424 }
1425 }
1426
1427 // Finally, if the node is now dead, remove it from the graph. The node
1428 // may not be dead if the replacement process recursively simplified to
1429 // something else needing this node.
1430 if (N->use_empty())
1431 deleteAndRecombine(N);
1432 return SDValue(N, 0);
1433}
1434
1435void DAGCombiner::
1436CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1437 // Replace the old value with the new one.
1438 ++NodesCombined;
1439 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1440 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1441
1442 // Replace all uses.
1443 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1444
1445 // Push the new node and any (possibly new) users onto the worklist.
1446 AddToWorklistWithUsers(TLO.New.getNode());
1447
1448 // Finally, if the node is now dead, remove it from the graph.
1449 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1450}
1451
1452/// Check the specified integer node value to see if it can be simplified or if
1453/// things it uses can be simplified by bit propagation. If so, return true.
1454bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1455 const APInt &DemandedElts,
1456 bool AssumeSingleUse) {
1457 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1458 KnownBits Known;
1459 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1460 AssumeSingleUse))
1461 return false;
1462
1463 // Revisit the node.
1464 AddToWorklist(Op.getNode());
1465
1466 CommitTargetLoweringOpt(TLO);
1467 return true;
1468}
1469
1470/// Check the specified vector node value to see if it can be simplified or
1471/// if things it uses can be simplified as it only uses some of the elements.
1472/// If so, return true.
1473bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1474 const APInt &DemandedElts,
1475 bool AssumeSingleUse) {
1476 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1477 APInt KnownUndef, KnownZero;
1478 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1479 TLO, 0, AssumeSingleUse))
1480 return false;
1481
1482 // Revisit the node.
1483 AddToWorklist(Op.getNode());
1484
1485 CommitTargetLoweringOpt(TLO);
1486 return true;
1487}
1488
1489void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1490 SDLoc DL(Load);
1491 EVT VT = Load->getValueType(0);
1492 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1493
1494 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1495 Trunc.dump(&DAG); dbgs() << '\n');
1496
1497 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1498 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1499
1500 AddToWorklist(Trunc.getNode());
1501 recursivelyDeleteUnusedNodes(Load);
1502}
1503
1504SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1505 Replace = false;
1506 SDLoc DL(Op);
1507 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1508 LoadSDNode *LD = cast<LoadSDNode>(Op);
1509 EVT MemVT = LD->getMemoryVT();
1511 : LD->getExtensionType();
1512 Replace = true;
1513 return DAG.getExtLoad(ExtType, DL, PVT,
1514 LD->getChain(), LD->getBasePtr(),
1515 MemVT, LD->getMemOperand());
1516 }
1517
1518 unsigned Opc = Op.getOpcode();
1519 switch (Opc) {
1520 default: break;
1521 case ISD::AssertSext:
1522 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1523 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1524 break;
1525 case ISD::AssertZext:
1526 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1527 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1528 break;
1529 case ISD::Constant: {
1530 unsigned ExtOpc =
1531 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1532 return DAG.getNode(ExtOpc, DL, PVT, Op);
1533 }
1534 }
1535
1536 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1537 return SDValue();
1538 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1539}
1540
1541SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1543 return SDValue();
1544 EVT OldVT = Op.getValueType();
1545 SDLoc DL(Op);
1546 bool Replace = false;
1547 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1548 if (!NewOp.getNode())
1549 return SDValue();
1550 AddToWorklist(NewOp.getNode());
1551
1552 if (Replace)
1553 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1554 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1555 DAG.getValueType(OldVT));
1556}
1557
1558SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1559 EVT OldVT = Op.getValueType();
1560 SDLoc DL(Op);
1561 bool Replace = false;
1562 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1563 if (!NewOp.getNode())
1564 return SDValue();
1565 AddToWorklist(NewOp.getNode());
1566
1567 if (Replace)
1568 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1569 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1570}
1571
1572/// Promote the specified integer binary operation if the target indicates it is
1573/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1574/// i32 since i16 instructions are longer.
1575SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1576 if (!LegalOperations)
1577 return SDValue();
1578
1579 EVT VT = Op.getValueType();
1580 if (VT.isVector() || !VT.isInteger())
1581 return SDValue();
1582
1583 // If operation type is 'undesirable', e.g. i16 on x86, consider
1584 // promoting it.
1585 unsigned Opc = Op.getOpcode();
1586 if (TLI.isTypeDesirableForOp(Opc, VT))
1587 return SDValue();
1588
1589 EVT PVT = VT;
1590 // Consult target whether it is a good idea to promote this operation and
1591 // what's the right type to promote it to.
1592 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1593 assert(PVT != VT && "Don't know what type to promote to!");
1594
1595 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1596
1597 bool Replace0 = false;
1598 SDValue N0 = Op.getOperand(0);
1599 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1600
1601 bool Replace1 = false;
1602 SDValue N1 = Op.getOperand(1);
1603 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1604 SDLoc DL(Op);
1605
1606 SDValue RV =
1607 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1608
1609 // We are always replacing N0/N1's use in N and only need additional
1610 // replacements if there are additional uses.
1611 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1612 // (SDValue) here because the node may reference multiple values
1613 // (for example, the chain value of a load node).
1614 Replace0 &= !N0->hasOneUse();
1615 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1616
1617 // Combine Op here so it is preserved past replacements.
1618 CombineTo(Op.getNode(), RV);
1619
1620 // If operands have a use ordering, make sure we deal with
1621 // predecessor first.
1622 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1623 std::swap(N0, N1);
1624 std::swap(NN0, NN1);
1625 }
1626
1627 if (Replace0) {
1628 AddToWorklist(NN0.getNode());
1629 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1630 }
1631 if (Replace1) {
1632 AddToWorklist(NN1.getNode());
1633 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1634 }
1635 return Op;
1636 }
1637 return SDValue();
1638}
1639
1640/// Promote the specified integer shift operation if the target indicates it is
1641/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1642/// i32 since i16 instructions are longer.
1643SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1644 if (!LegalOperations)
1645 return SDValue();
1646
1647 EVT VT = Op.getValueType();
1648 if (VT.isVector() || !VT.isInteger())
1649 return SDValue();
1650
1651 // If operation type is 'undesirable', e.g. i16 on x86, consider
1652 // promoting it.
1653 unsigned Opc = Op.getOpcode();
1654 if (TLI.isTypeDesirableForOp(Opc, VT))
1655 return SDValue();
1656
1657 EVT PVT = VT;
1658 // Consult target whether it is a good idea to promote this operation and
1659 // what's the right type to promote it to.
1660 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1661 assert(PVT != VT && "Don't know what type to promote to!");
1662
1663 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1664
1665 SDNodeFlags TruncFlags;
1666 bool Replace = false;
1667 SDValue N0 = Op.getOperand(0);
1668 if (Opc == ISD::SRA) {
1669 N0 = SExtPromoteOperand(N0, PVT);
1670 } else if (Opc == ISD::SRL) {
1671 N0 = ZExtPromoteOperand(N0, PVT);
1672 } else {
1673 if (Op->getFlags().hasNoUnsignedWrap()) {
1674 N0 = ZExtPromoteOperand(N0, PVT);
1675 TruncFlags = SDNodeFlags::NoUnsignedWrap;
1676 } else if (Op->getFlags().hasNoSignedWrap()) {
1677 N0 = SExtPromoteOperand(N0, PVT);
1678 TruncFlags = SDNodeFlags::NoSignedWrap;
1679 } else {
1680 N0 = PromoteOperand(N0, PVT, Replace);
1681 }
1682 }
1683
1684 if (!N0.getNode())
1685 return SDValue();
1686
1687 SDLoc DL(Op);
1688 SDValue N1 = Op.getOperand(1);
1689 SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT,
1690 DAG.getNode(Opc, DL, PVT, N0, N1), TruncFlags);
1691
1692 if (Replace)
1693 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1694
1695 // Deal with Op being deleted.
1696 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1697 return RV;
1698 }
1699 return SDValue();
1700}
1701
1702SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1703 if (!LegalOperations)
1704 return SDValue();
1705
1706 EVT VT = Op.getValueType();
1707 if (VT.isVector() || !VT.isInteger())
1708 return SDValue();
1709
1710 // If operation type is 'undesirable', e.g. i16 on x86, consider
1711 // promoting it.
1712 unsigned Opc = Op.getOpcode();
1713 if (TLI.isTypeDesirableForOp(Opc, VT))
1714 return SDValue();
1715
1716 EVT PVT = VT;
1717 // Consult target whether it is a good idea to promote this operation and
1718 // what's the right type to promote it to.
1719 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1720 assert(PVT != VT && "Don't know what type to promote to!");
1721 // fold (aext (aext x)) -> (aext x)
1722 // fold (aext (zext x)) -> (zext x)
1723 // fold (aext (sext x)) -> (sext x)
1724 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1725 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1726 }
1727 return SDValue();
1728}
1729
1730bool DAGCombiner::PromoteLoad(SDValue Op) {
1731 if (!LegalOperations)
1732 return false;
1733
1734 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1735 return false;
1736
1737 EVT VT = Op.getValueType();
1738 if (VT.isVector() || !VT.isInteger())
1739 return false;
1740
1741 // If operation type is 'undesirable', e.g. i16 on x86, consider
1742 // promoting it.
1743 unsigned Opc = Op.getOpcode();
1744 if (TLI.isTypeDesirableForOp(Opc, VT))
1745 return false;
1746
1747 EVT PVT = VT;
1748 // Consult target whether it is a good idea to promote this operation and
1749 // what's the right type to promote it to.
1750 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1751 assert(PVT != VT && "Don't know what type to promote to!");
1752
1753 SDLoc DL(Op);
1754 SDNode *N = Op.getNode();
1755 LoadSDNode *LD = cast<LoadSDNode>(N);
1756 EVT MemVT = LD->getMemoryVT();
1758 : LD->getExtensionType();
1759 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1760 LD->getChain(), LD->getBasePtr(),
1761 MemVT, LD->getMemOperand());
1762 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1763
1764 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1765 Result.dump(&DAG); dbgs() << '\n');
1766
1767 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1768 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1769
1770 AddToWorklist(Result.getNode());
1771 recursivelyDeleteUnusedNodes(N);
1772 return true;
1773 }
1774
1775 return false;
1776}
1777
1778/// Recursively delete a node which has no uses and any operands for
1779/// which it is the only use.
1780///
1781/// Note that this both deletes the nodes and removes them from the worklist.
1782/// It also adds any nodes who have had a user deleted to the worklist as they
1783/// may now have only one use and subject to other combines.
1784bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1785 if (!N->use_empty())
1786 return false;
1787
1788 SmallSetVector<SDNode *, 16> Nodes;
1789 Nodes.insert(N);
1790 do {
1791 N = Nodes.pop_back_val();
1792 if (!N)
1793 continue;
1794
1795 if (N->use_empty()) {
1796 for (const SDValue &ChildN : N->op_values())
1797 Nodes.insert(ChildN.getNode());
1798
1799 removeFromWorklist(N);
1800 DAG.DeleteNode(N);
1801 } else {
1802 AddToWorklist(N);
1803 }
1804 } while (!Nodes.empty());
1805 return true;
1806}
1807
1808//===----------------------------------------------------------------------===//
1809// Main DAG Combiner implementation
1810//===----------------------------------------------------------------------===//
1811
1812void DAGCombiner::Run(CombineLevel AtLevel) {
1813 // set the instance variables, so that the various visit routines may use it.
1814 Level = AtLevel;
1815 LegalDAG = Level >= AfterLegalizeDAG;
1816 LegalOperations = Level >= AfterLegalizeVectorOps;
1817 LegalTypes = Level >= AfterLegalizeTypes;
1818
1819 WorklistInserter AddNodes(*this);
1820
1821 // Add all the dag nodes to the worklist.
1822 //
1823 // Note: All nodes are not added to PruningList here, this is because the only
1824 // nodes which can be deleted are those which have no uses and all other nodes
1825 // which would otherwise be added to the worklist by the first call to
1826 // getNextWorklistEntry are already present in it.
1827 for (SDNode &Node : DAG.allnodes())
1828 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1829
1830 // Create a dummy node (which is not added to allnodes), that adds a reference
1831 // to the root node, preventing it from being deleted, and tracking any
1832 // changes of the root.
1833 HandleSDNode Dummy(DAG.getRoot());
1834
1835 // While we have a valid worklist entry node, try to combine it.
1836 while (SDNode *N = getNextWorklistEntry()) {
1837 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1838 // N is deleted from the DAG, since they too may now be dead or may have a
1839 // reduced number of uses, allowing other xforms.
1840 if (recursivelyDeleteUnusedNodes(N))
1841 continue;
1842
1843 WorklistRemover DeadNodes(*this);
1844
1845 // If this combine is running after legalizing the DAG, re-legalize any
1846 // nodes pulled off the worklist.
1847 if (LegalDAG) {
1848 SmallSetVector<SDNode *, 16> UpdatedNodes;
1849 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1850
1851 for (SDNode *LN : UpdatedNodes)
1852 AddToWorklistWithUsers(LN);
1853
1854 if (!NIsValid)
1855 continue;
1856 }
1857
1858 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1859
1860 // Add any operands of the new node which have not yet been combined to the
1861 // worklist as well. getNextWorklistEntry flags nodes that have been
1862 // combined before. Because the worklist uniques things already, this won't
1863 // repeatedly process the same operand.
1864 for (const SDValue &ChildN : N->op_values())
1865 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1866 /*SkipIfCombinedBefore=*/true);
1867
1868 SDValue RV = combine(N);
1869
1870 if (!RV.getNode())
1871 continue;
1872
1873 ++NodesCombined;
1874
1875 // Invalidate cached info.
1876 ChainsWithoutMergeableStores.clear();
1877
1878 // If we get back the same node we passed in, rather than a new node or
1879 // zero, we know that the node must have defined multiple values and
1880 // CombineTo was used. Since CombineTo takes care of the worklist
1881 // mechanics for us, we have no work to do in this case.
1882 if (RV.getNode() == N)
1883 continue;
1884
1885 assert(N->getOpcode() != ISD::DELETED_NODE &&
1886 RV.getOpcode() != ISD::DELETED_NODE &&
1887 "Node was deleted but visit returned new node!");
1888
1889 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1890
1891 if (N->getNumValues() == RV->getNumValues())
1892 DAG.ReplaceAllUsesWith(N, RV.getNode());
1893 else {
1894 assert(N->getValueType(0) == RV.getValueType() &&
1895 N->getNumValues() == 1 && "Type mismatch");
1896 DAG.ReplaceAllUsesWith(N, &RV);
1897 }
1898
1899 // Push the new node and any users onto the worklist. Omit this if the
1900 // new node is the EntryToken (e.g. if a store managed to get optimized
1901 // out), because re-visiting the EntryToken and its users will not uncover
1902 // any additional opportunities, but there may be a large number of such
1903 // users, potentially causing compile time explosion.
1904 if (RV.getOpcode() != ISD::EntryToken)
1905 AddToWorklistWithUsers(RV.getNode());
1906
1907 // Finally, if the node is now dead, remove it from the graph. The node
1908 // may not be dead if the replacement process recursively simplified to
1909 // something else needing this node. This will also take care of adding any
1910 // operands which have lost a user to the worklist.
1911 recursivelyDeleteUnusedNodes(N);
1912 }
1913
1914 // If the root changed (e.g. it was a dead load, update the root).
1915 DAG.setRoot(Dummy.getValue());
1916 DAG.RemoveDeadNodes();
1917}
1918
1919SDValue DAGCombiner::visit(SDNode *N) {
1920 // clang-format off
1921 switch (N->getOpcode()) {
1922 default: break;
1923 case ISD::TokenFactor: return visitTokenFactor(N);
1924 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1925 case ISD::ADD: return visitADD(N);
1926 case ISD::PTRADD: return visitPTRADD(N);
1927 case ISD::SUB: return visitSUB(N);
1928 case ISD::SADDSAT:
1929 case ISD::UADDSAT: return visitADDSAT(N);
1930 case ISD::SSUBSAT:
1931 case ISD::USUBSAT: return visitSUBSAT(N);
1932 case ISD::ADDC: return visitADDC(N);
1933 case ISD::SADDO:
1934 case ISD::UADDO: return visitADDO(N);
1935 case ISD::SUBC: return visitSUBC(N);
1936 case ISD::SSUBO:
1937 case ISD::USUBO: return visitSUBO(N);
1938 case ISD::ADDE: return visitADDE(N);
1939 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1940 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1941 case ISD::SUBE: return visitSUBE(N);
1942 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1943 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1944 case ISD::SMULFIX:
1945 case ISD::SMULFIXSAT:
1946 case ISD::UMULFIX:
1947 case ISD::UMULFIXSAT: return visitMULFIX(N);
1948 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1949 case ISD::SDIV: return visitSDIV(N);
1950 case ISD::UDIV: return visitUDIV(N);
1951 case ISD::SREM:
1952 case ISD::UREM: return visitREM(N);
1953 case ISD::MULHU: return visitMULHU(N);
1954 case ISD::MULHS: return visitMULHS(N);
1955 case ISD::AVGFLOORS:
1956 case ISD::AVGFLOORU:
1957 case ISD::AVGCEILS:
1958 case ISD::AVGCEILU: return visitAVG(N);
1959 case ISD::ABDS:
1960 case ISD::ABDU: return visitABD(N);
1961 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1962 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1963 case ISD::SMULO:
1964 case ISD::UMULO: return visitMULO(N);
1965 case ISD::SMIN:
1966 case ISD::SMAX:
1967 case ISD::UMIN:
1968 case ISD::UMAX: return visitIMINMAX(N);
1969 case ISD::AND: return visitAND(N);
1970 case ISD::OR: return visitOR(N);
1971 case ISD::XOR: return visitXOR(N);
1972 case ISD::SHL: return visitSHL(N);
1973 case ISD::SRA: return visitSRA(N);
1974 case ISD::SRL: return visitSRL(N);
1975 case ISD::ROTR:
1976 case ISD::ROTL: return visitRotate(N);
1977 case ISD::FSHL:
1978 case ISD::FSHR: return visitFunnelShift(N);
1979 case ISD::SSHLSAT:
1980 case ISD::USHLSAT: return visitSHLSAT(N);
1981 case ISD::ABS: return visitABS(N);
1982 case ISD::BSWAP: return visitBSWAP(N);
1983 case ISD::BITREVERSE: return visitBITREVERSE(N);
1984 case ISD::CTLZ: return visitCTLZ(N);
1985 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1986 case ISD::CTTZ: return visitCTTZ(N);
1987 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1988 case ISD::CTPOP: return visitCTPOP(N);
1989 case ISD::SELECT: return visitSELECT(N);
1990 case ISD::VSELECT: return visitVSELECT(N);
1991 case ISD::SELECT_CC: return visitSELECT_CC(N);
1992 case ISD::SETCC: return visitSETCC(N);
1993 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1994 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1995 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1996 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1997 case ISD::AssertSext:
1998 case ISD::AssertZext: return visitAssertExt(N);
1999 case ISD::AssertAlign: return visitAssertAlign(N);
2000 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
2003 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
2004 case ISD::TRUNCATE: return visitTRUNCATE(N);
2005 case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
2006 case ISD::BITCAST: return visitBITCAST(N);
2007 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
2008 case ISD::FADD: return visitFADD(N);
2009 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
2010 case ISD::FSUB: return visitFSUB(N);
2011 case ISD::FMUL: return visitFMUL(N);
2012 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
2013 case ISD::FMAD: return visitFMAD(N);
2014 case ISD::FMULADD: return visitFMULADD(N);
2015 case ISD::FDIV: return visitFDIV(N);
2016 case ISD::FREM: return visitFREM(N);
2017 case ISD::FSQRT: return visitFSQRT(N);
2018 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
2019 case ISD::FPOW: return visitFPOW(N);
2020 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
2021 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
2022 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
2023 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
2024 case ISD::LROUND:
2025 case ISD::LLROUND:
2026 case ISD::LRINT:
2027 case ISD::LLRINT: return visitXROUND(N);
2028 case ISD::FP_ROUND: return visitFP_ROUND(N);
2029 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
2030 case ISD::FNEG: return visitFNEG(N);
2031 case ISD::FABS: return visitFABS(N);
2032 case ISD::FFLOOR: return visitFFLOOR(N);
2033 case ISD::FMINNUM:
2034 case ISD::FMAXNUM:
2035 case ISD::FMINIMUM:
2036 case ISD::FMAXIMUM:
2037 case ISD::FMINIMUMNUM:
2038 case ISD::FMAXIMUMNUM: return visitFMinMax(N);
2039 case ISD::FCEIL: return visitFCEIL(N);
2040 case ISD::FTRUNC: return visitFTRUNC(N);
2041 case ISD::FFREXP: return visitFFREXP(N);
2042 case ISD::BRCOND: return visitBRCOND(N);
2043 case ISD::BR_CC: return visitBR_CC(N);
2044 case ISD::LOAD: return visitLOAD(N);
2045 case ISD::STORE: return visitSTORE(N);
2046 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
2047 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
2048 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
2049 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
2050 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
2051 case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N);
2052 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
2053 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
2054 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
2055 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
2056 case ISD::MGATHER: return visitMGATHER(N);
2057 case ISD::MLOAD: return visitMLOAD(N);
2058 case ISD::MSCATTER: return visitMSCATTER(N);
2059 case ISD::MSTORE: return visitMSTORE(N);
2060 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
2065 return visitPARTIAL_REDUCE_MLA(N);
2066 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
2067 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
2068 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
2069 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
2070 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
2071 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
2072 case ISD::FREEZE: return visitFREEZE(N);
2073 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
2074 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
2075 case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
2078 case ISD::VECREDUCE_ADD:
2079 case ISD::VECREDUCE_MUL:
2080 case ISD::VECREDUCE_AND:
2081 case ISD::VECREDUCE_OR:
2082 case ISD::VECREDUCE_XOR:
2090 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2091#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2092#include "llvm/IR/VPIntrinsics.def"
2093 return visitVPOp(N);
2094 }
2095 // clang-format on
2096 return SDValue();
2097}
2098
2099SDValue DAGCombiner::combine(SDNode *N) {
2100 if (!DebugCounter::shouldExecute(DAGCombineCounter))
2101 return SDValue();
2102
2103 SDValue RV;
2104 if (!DisableGenericCombines)
2105 RV = visit(N);
2106
2107 // If nothing happened, try a target-specific DAG combine.
2108 if (!RV.getNode()) {
2109 assert(N->getOpcode() != ISD::DELETED_NODE &&
2110 "Node was deleted but visit returned NULL!");
2111
2112 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2113 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2114
2115 // Expose the DAG combiner to the target combiner impls.
2116 TargetLowering::DAGCombinerInfo
2117 DagCombineInfo(DAG, Level, false, this);
2118
2119 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2120 }
2121 }
2122
2123 // If nothing happened still, try promoting the operation.
2124 if (!RV.getNode()) {
2125 switch (N->getOpcode()) {
2126 default: break;
2127 case ISD::ADD:
2128 case ISD::SUB:
2129 case ISD::MUL:
2130 case ISD::AND:
2131 case ISD::OR:
2132 case ISD::XOR:
2133 RV = PromoteIntBinOp(SDValue(N, 0));
2134 break;
2135 case ISD::SHL:
2136 case ISD::SRA:
2137 case ISD::SRL:
2138 RV = PromoteIntShiftOp(SDValue(N, 0));
2139 break;
2140 case ISD::SIGN_EXTEND:
2141 case ISD::ZERO_EXTEND:
2142 case ISD::ANY_EXTEND:
2143 RV = PromoteExtend(SDValue(N, 0));
2144 break;
2145 case ISD::LOAD:
2146 if (PromoteLoad(SDValue(N, 0)))
2147 RV = SDValue(N, 0);
2148 break;
2149 }
2150 }
2151
2152 // If N is a commutative binary node, try to eliminate it if the commuted
2153 // version is already present in the DAG.
2154 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2155 SDValue N0 = N->getOperand(0);
2156 SDValue N1 = N->getOperand(1);
2157
2158 // Constant operands are canonicalized to RHS.
2159 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2160 SDValue Ops[] = {N1, N0};
2161 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2162 N->getFlags());
2163 if (CSENode)
2164 return SDValue(CSENode, 0);
2165 }
2166 }
2167
2168 return RV;
2169}
2170
2171/// Given a node, return its input chain if it has one, otherwise return a null
2172/// sd operand.
2174 if (unsigned NumOps = N->getNumOperands()) {
2175 if (N->getOperand(0).getValueType() == MVT::Other)
2176 return N->getOperand(0);
2177 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2178 return N->getOperand(NumOps-1);
2179 for (unsigned i = 1; i < NumOps-1; ++i)
2180 if (N->getOperand(i).getValueType() == MVT::Other)
2181 return N->getOperand(i);
2182 }
2183 return SDValue();
2184}
2185
2186SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2187 SDValue Operand = N->getOperand(0);
2188 EVT VT = Operand.getValueType();
2189 SDLoc dl(N);
2190
2191 // Canonicalize undef to quiet NaN.
2192 if (Operand.isUndef()) {
2193 APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2194 return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2195 }
2196 return SDValue();
2197}
2198
2199SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2200 // If N has two operands, where one has an input chain equal to the other,
2201 // the 'other' chain is redundant.
2202 if (N->getNumOperands() == 2) {
2203 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2204 return N->getOperand(0);
2205 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2206 return N->getOperand(1);
2207 }
2208
2209 // Don't simplify token factors if optnone.
2210 if (OptLevel == CodeGenOptLevel::None)
2211 return SDValue();
2212
2213 // Don't simplify the token factor if the node itself has too many operands.
2214 if (N->getNumOperands() > TokenFactorInlineLimit)
2215 return SDValue();
2216
2217 // If the sole user is a token factor, we should make sure we have a
2218 // chance to merge them together. This prevents TF chains from inhibiting
2219 // optimizations.
2220 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2221 AddToWorklist(*(N->user_begin()));
2222
2223 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2224 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2225 SmallPtrSet<SDNode*, 16> SeenOps;
2226 bool Changed = false; // If we should replace this token factor.
2227
2228 // Start out with this token factor.
2229 TFs.push_back(N);
2230
2231 // Iterate through token factors. The TFs grows when new token factors are
2232 // encountered.
2233 for (unsigned i = 0; i < TFs.size(); ++i) {
2234 // Limit number of nodes to inline, to avoid quadratic compile times.
2235 // We have to add the outstanding Token Factors to Ops, otherwise we might
2236 // drop Ops from the resulting Token Factors.
2237 if (Ops.size() > TokenFactorInlineLimit) {
2238 for (unsigned j = i; j < TFs.size(); j++)
2239 Ops.emplace_back(TFs[j], 0);
2240 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2241 // combiner worklist later.
2242 TFs.resize(i);
2243 break;
2244 }
2245
2246 SDNode *TF = TFs[i];
2247 // Check each of the operands.
2248 for (const SDValue &Op : TF->op_values()) {
2249 switch (Op.getOpcode()) {
2250 case ISD::EntryToken:
2251 // Entry tokens don't need to be added to the list. They are
2252 // redundant.
2253 Changed = true;
2254 break;
2255
2256 case ISD::TokenFactor:
2257 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2258 // Queue up for processing.
2259 TFs.push_back(Op.getNode());
2260 Changed = true;
2261 break;
2262 }
2263 [[fallthrough]];
2264
2265 default:
2266 // Only add if it isn't already in the list.
2267 if (SeenOps.insert(Op.getNode()).second)
2268 Ops.push_back(Op);
2269 else
2270 Changed = true;
2271 break;
2272 }
2273 }
2274 }
2275
2276 // Re-visit inlined Token Factors, to clean them up in case they have been
2277 // removed. Skip the first Token Factor, as this is the current node.
2278 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2279 AddToWorklist(TFs[i]);
2280
2281 // Remove Nodes that are chained to another node in the list. Do so
2282 // by walking up chains breath-first stopping when we've seen
2283 // another operand. In general we must climb to the EntryNode, but we can exit
2284 // early if we find all remaining work is associated with just one operand as
2285 // no further pruning is possible.
2286
2287 // List of nodes to search through and original Ops from which they originate.
2289 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2290 SmallPtrSet<SDNode *, 16> SeenChains;
2291 bool DidPruneOps = false;
2292
2293 unsigned NumLeftToConsider = 0;
2294 for (const SDValue &Op : Ops) {
2295 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2296 OpWorkCount.push_back(1);
2297 }
2298
2299 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2300 // If this is an Op, we can remove the op from the list. Remark any
2301 // search associated with it as from the current OpNumber.
2302 if (SeenOps.contains(Op)) {
2303 Changed = true;
2304 DidPruneOps = true;
2305 unsigned OrigOpNumber = 0;
2306 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2307 OrigOpNumber++;
2308 assert((OrigOpNumber != Ops.size()) &&
2309 "expected to find TokenFactor Operand");
2310 // Re-mark worklist from OrigOpNumber to OpNumber
2311 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2312 if (Worklist[i].second == OrigOpNumber) {
2313 Worklist[i].second = OpNumber;
2314 }
2315 }
2316 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2317 OpWorkCount[OrigOpNumber] = 0;
2318 NumLeftToConsider--;
2319 }
2320 // Add if it's a new chain
2321 if (SeenChains.insert(Op).second) {
2322 OpWorkCount[OpNumber]++;
2323 Worklist.push_back(std::make_pair(Op, OpNumber));
2324 }
2325 };
2326
2327 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2328 // We need at least be consider at least 2 Ops to prune.
2329 if (NumLeftToConsider <= 1)
2330 break;
2331 auto CurNode = Worklist[i].first;
2332 auto CurOpNumber = Worklist[i].second;
2333 assert((OpWorkCount[CurOpNumber] > 0) &&
2334 "Node should not appear in worklist");
2335 switch (CurNode->getOpcode()) {
2336 case ISD::EntryToken:
2337 // Hitting EntryToken is the only way for the search to terminate without
2338 // hitting
2339 // another operand's search. Prevent us from marking this operand
2340 // considered.
2341 NumLeftToConsider++;
2342 break;
2343 case ISD::TokenFactor:
2344 for (const SDValue &Op : CurNode->op_values())
2345 AddToWorklist(i, Op.getNode(), CurOpNumber);
2346 break;
2348 case ISD::LIFETIME_END:
2349 case ISD::CopyFromReg:
2350 case ISD::CopyToReg:
2351 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2352 break;
2353 default:
2354 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2355 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2356 break;
2357 }
2358 OpWorkCount[CurOpNumber]--;
2359 if (OpWorkCount[CurOpNumber] == 0)
2360 NumLeftToConsider--;
2361 }
2362
2363 // If we've changed things around then replace token factor.
2364 if (Changed) {
2366 if (Ops.empty()) {
2367 // The entry token is the only possible outcome.
2368 Result = DAG.getEntryNode();
2369 } else {
2370 if (DidPruneOps) {
2371 SmallVector<SDValue, 8> PrunedOps;
2372 //
2373 for (const SDValue &Op : Ops) {
2374 if (SeenChains.count(Op.getNode()) == 0)
2375 PrunedOps.push_back(Op);
2376 }
2377 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2378 } else {
2379 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2380 }
2381 }
2382 return Result;
2383 }
2384 return SDValue();
2385}
2386
2387/// MERGE_VALUES can always be eliminated.
2388SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2389 WorklistRemover DeadNodes(*this);
2390 // Replacing results may cause a different MERGE_VALUES to suddenly
2391 // be CSE'd with N, and carry its uses with it. Iterate until no
2392 // uses remain, to ensure that the node can be safely deleted.
2393 // First add the users of this node to the work list so that they
2394 // can be tried again once they have new operands.
2395 AddUsersToWorklist(N);
2396 do {
2397 // Do as a single replacement to avoid rewalking use lists.
2399 DAG.ReplaceAllUsesWith(N, Ops.data());
2400 } while (!N->use_empty());
2401 deleteAndRecombine(N);
2402 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2403}
2404
2405/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2406/// ConstantSDNode pointer else nullptr.
2409 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2410}
2411
2412// isTruncateOf - If N is a truncate of some other value, return true, record
2413// the value being truncated in Op and which of Op's bits are zero/one in Known.
2414// This function computes KnownBits to avoid a duplicated call to
2415// computeKnownBits in the caller.
2417 KnownBits &Known) {
2418 if (N->getOpcode() == ISD::TRUNCATE) {
2419 Op = N->getOperand(0);
2420 Known = DAG.computeKnownBits(Op);
2421 if (N->getFlags().hasNoUnsignedWrap())
2422 Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2423 return true;
2424 }
2425
2426 if (N.getValueType().getScalarType() != MVT::i1 ||
2427 !sd_match(
2429 return false;
2430
2431 Known = DAG.computeKnownBits(Op);
2432 return (Known.Zero | 1).isAllOnes();
2433}
2434
2435/// Return true if 'Use' is a load or a store that uses N as its base pointer
2436/// and that N may be folded in the load / store addressing mode.
2438 const TargetLowering &TLI) {
2439 EVT VT;
2440 unsigned AS;
2441
2442 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2443 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2444 return false;
2445 VT = LD->getMemoryVT();
2446 AS = LD->getAddressSpace();
2447 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2448 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2449 return false;
2450 VT = ST->getMemoryVT();
2451 AS = ST->getAddressSpace();
2453 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2454 return false;
2455 VT = LD->getMemoryVT();
2456 AS = LD->getAddressSpace();
2458 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2459 return false;
2460 VT = ST->getMemoryVT();
2461 AS = ST->getAddressSpace();
2462 } else {
2463 return false;
2464 }
2465
2467 if (N->isAnyAdd()) {
2468 AM.HasBaseReg = true;
2470 if (Offset)
2471 // [reg +/- imm]
2472 AM.BaseOffs = Offset->getSExtValue();
2473 else
2474 // [reg +/- reg]
2475 AM.Scale = 1;
2476 } else if (N->getOpcode() == ISD::SUB) {
2477 AM.HasBaseReg = true;
2479 if (Offset)
2480 // [reg +/- imm]
2481 AM.BaseOffs = -Offset->getSExtValue();
2482 else
2483 // [reg +/- reg]
2484 AM.Scale = 1;
2485 } else {
2486 return false;
2487 }
2488
2489 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2490 VT.getTypeForEVT(*DAG.getContext()), AS);
2491}
2492
2493/// This inverts a canonicalization in IR that replaces a variable select arm
2494/// with an identity constant. Codegen improves if we re-use the variable
2495/// operand rather than load a constant. This can also be converted into a
2496/// masked vector operation if the target supports it.
2498 bool ShouldCommuteOperands) {
2499 SDValue N0 = N->getOperand(0);
2500 SDValue N1 = N->getOperand(1);
2501
2502 // Match a select as operand 1. The identity constant that we are looking for
2503 // is only valid as operand 1 of a non-commutative binop.
2504 if (ShouldCommuteOperands)
2505 std::swap(N0, N1);
2506
2507 SDValue Cond, TVal, FVal;
2509 m_Value(FVal)))))
2510 return SDValue();
2511
2512 // We can't hoist all instructions because of immediate UB (not speculatable).
2513 // For example div/rem by zero.
2515 return SDValue();
2516
2517 unsigned SelOpcode = N1.getOpcode();
2518 unsigned Opcode = N->getOpcode();
2519 EVT VT = N->getValueType(0);
2520 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2521
2522 // This transform increases uses of N0, so freeze it to be safe.
2523 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2524 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2525 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo) &&
2526 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2527 FVal)) {
2528 SDValue F0 = DAG.getFreeze(N0);
2529 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2530 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2531 }
2532 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2533 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo) &&
2534 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2535 TVal)) {
2536 SDValue F0 = DAG.getFreeze(N0);
2537 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2538 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2539 }
2540
2541 return SDValue();
2542}
2543
2544SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2545 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2546 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2547 "Unexpected binary operator");
2548
2549 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2550 return Sel;
2551
2552 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2553 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2554 return Sel;
2555
2556 // Don't do this unless the old select is going away. We want to eliminate the
2557 // binary operator, not replace a binop with a select.
2558 // TODO: Handle ISD::SELECT_CC.
2559 unsigned SelOpNo = 0;
2560 SDValue Sel = BO->getOperand(0);
2561 auto BinOpcode = BO->getOpcode();
2562 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2563 SelOpNo = 1;
2564 Sel = BO->getOperand(1);
2565
2566 // Peek through trunc to shift amount type.
2567 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2568 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2569 // This is valid when the truncated bits of x are already zero.
2570 SDValue Op;
2571 KnownBits Known;
2572 if (isTruncateOf(DAG, Sel, Op, Known) &&
2574 Sel = Op;
2575 }
2576 }
2577
2578 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2579 return SDValue();
2580
2581 SDValue CT = Sel.getOperand(1);
2582 if (!isConstantOrConstantVector(CT, true) &&
2584 return SDValue();
2585
2586 SDValue CF = Sel.getOperand(2);
2587 if (!isConstantOrConstantVector(CF, true) &&
2589 return SDValue();
2590
2591 // Bail out if any constants are opaque because we can't constant fold those.
2592 // The exception is "and" and "or" with either 0 or -1 in which case we can
2593 // propagate non constant operands into select. I.e.:
2594 // and (select Cond, 0, -1), X --> select Cond, 0, X
2595 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2596 bool CanFoldNonConst =
2597 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2600
2601 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2602 if (!CanFoldNonConst &&
2603 !isConstantOrConstantVector(CBO, true) &&
2605 return SDValue();
2606
2607 SDLoc DL(Sel);
2608 SDValue NewCT, NewCF;
2609 EVT VT = BO->getValueType(0);
2610
2611 if (CanFoldNonConst) {
2612 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2613 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2614 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2615 NewCT = CT;
2616 else
2617 NewCT = CBO;
2618
2619 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2620 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2621 NewCF = CF;
2622 else
2623 NewCF = CBO;
2624 } else {
2625 // We have a select-of-constants followed by a binary operator with a
2626 // constant. Eliminate the binop by pulling the constant math into the
2627 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2628 // CBO, CF + CBO
2629 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2630 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2631 if (!NewCT)
2632 return SDValue();
2633
2634 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2635 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2636 if (!NewCF)
2637 return SDValue();
2638 }
2639
2640 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF, BO->getFlags());
2641}
2642
2644 SelectionDAG &DAG) {
2645 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2646 "Expecting add or sub");
2647
2648 // Match a constant operand and a zext operand for the math instruction:
2649 // add Z, C
2650 // sub C, Z
2651 bool IsAdd = N->getOpcode() == ISD::ADD;
2652 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2653 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2654 auto *CN = dyn_cast<ConstantSDNode>(C);
2655 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2656 return SDValue();
2657
2658 // Match the zext operand as a setcc of a boolean.
2659 if (Z.getOperand(0).getValueType() != MVT::i1)
2660 return SDValue();
2661
2662 // Match the compare as: setcc (X & 1), 0, eq.
2663 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2665 return SDValue();
2666
2667 // We are adding/subtracting a constant and an inverted low bit. Turn that
2668 // into a subtract/add of the low bit with incremented/decremented constant:
2669 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2670 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2671 EVT VT = C.getValueType();
2672 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2673 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2674 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2675 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2676}
2677
2678// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2679SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2680 SDValue N0 = N->getOperand(0);
2681 EVT VT = N0.getValueType();
2682 SDValue A, B;
2683
2684 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2686 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2687 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2688 }
2689 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2691 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2692 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2693 }
2694 return SDValue();
2695}
2696
2697/// Try to fold a pointer arithmetic node.
2698/// This needs to be done separately from normal addition, because pointer
2699/// addition is not commutative.
2700SDValue DAGCombiner::visitPTRADD(SDNode *N) {
2701 SDValue N0 = N->getOperand(0);
2702 SDValue N1 = N->getOperand(1);
2703 EVT PtrVT = N0.getValueType();
2704 EVT IntVT = N1.getValueType();
2705 SDLoc DL(N);
2706
2707 // This is already ensured by an assert in SelectionDAG::getNode(). Several
2708 // combines here depend on this assumption.
2709 assert(PtrVT == IntVT &&
2710 "PTRADD with different operand types is not supported");
2711
2712 // fold (ptradd x, 0) -> x
2713 if (isNullConstant(N1))
2714 return N0;
2715
2716 // fold (ptradd 0, x) -> x
2717 if (PtrVT == IntVT && isNullConstant(N0))
2718 return N1;
2719
2720 if (N0.getOpcode() == ISD::PTRADD &&
2721 !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2722 SDValue X = N0.getOperand(0);
2723 SDValue Y = N0.getOperand(1);
2724 SDValue Z = N1;
2725 bool N0OneUse = N0.hasOneUse();
2726 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2727 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2728
2729 // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2730 // * y is a constant and (ptradd x, y) has one use; or
2731 // * y and z are both constants.
2732 if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2733 // If both additions in the original were NUW, the new ones are as well.
2734 SDNodeFlags Flags =
2735 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2736 SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2737 AddToWorklist(Add.getNode());
2738 // We can't set InBounds even if both original ptradds were InBounds and
2739 // NUW: SDAG usually represents pointers as integers, therefore, the
2740 // matched pattern behaves as if it had implicit casts:
2741 // (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds x, y))), z)
2742 // The outer inbounds ptradd might therefore rely on a provenance that x
2743 // does not have.
2744 return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2745 }
2746 }
2747
2748 // The following combines can turn in-bounds pointer arithmetic out of bounds.
2749 // That is problematic for settings like AArch64's CPA, which checks that
2750 // intermediate results of pointer arithmetic remain in bounds. The target
2751 // therefore needs to opt-in to enable them.
2753 DAG.getMachineFunction().getFunction(), PtrVT))
2754 return SDValue();
2755
2756 if (N0.getOpcode() == ISD::PTRADD && isa<ConstantSDNode>(N1)) {
2757 // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2758 // global address GA and constant c, such that c can be folded into GA.
2759 // TODO: Support constant vector splats.
2760 SDValue GAValue = N0.getOperand(0);
2761 if (const GlobalAddressSDNode *GA =
2763 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2764 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2765 // If both additions in the original were NUW, reassociation preserves
2766 // that.
2767 SDNodeFlags Flags =
2768 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2769 // We can't set InBounds even if both original ptradds were InBounds and
2770 // NUW: SDAG usually represents pointers as integers, therefore, the
2771 // matched pattern behaves as if it had implicit casts:
2772 // (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds GA, v))), c)
2773 // The outer inbounds ptradd might therefore rely on a provenance that
2774 // GA does not have.
2775 SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
2776 AddToWorklist(Inner.getNode());
2777 return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
2778 }
2779 }
2780 }
2781
2782 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2783 // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2784 // y is not, and (add y, z) is used only once.
2785 // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2786 // z is not, and (add y, z) is used only once.
2787 // The goal is to move constant offsets to the outermost ptradd, to create
2788 // more opportunities to fold offsets into memory instructions.
2789 // Together with the another combine above, this also implements
2790 // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2791 SDValue X = N0;
2792 SDValue Y = N1.getOperand(0);
2793 SDValue Z = N1.getOperand(1);
2794 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2795 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2796
2797 // If both additions in the original were NUW, reassociation preserves that.
2798 SDNodeFlags CommonFlags = N->getFlags() & N1->getFlags();
2799 SDNodeFlags ReassocFlags = CommonFlags & SDNodeFlags::NoUnsignedWrap;
2800 if (CommonFlags.hasNoUnsignedWrap()) {
2801 // If both operations are NUW and the PTRADD is inbounds, the offests are
2802 // both non-negative, so the reassociated PTRADDs are also inbounds.
2803 ReassocFlags |= N->getFlags() & SDNodeFlags::InBounds;
2804 }
2805
2806 if (ZIsConstant != YIsConstant) {
2807 if (YIsConstant)
2808 std::swap(Y, Z);
2809 SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
2810 AddToWorklist(Inner.getNode());
2811 return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
2812 }
2813 }
2814
2815 // Transform (ptradd a, b) -> (or disjoint a, b) if it is equivalent and if
2816 // that transformation can't block an offset folding at any use of the ptradd.
2817 // This should be done late, after legalization, so that it doesn't block
2818 // other ptradd combines that could enable more offset folding.
2819 if (LegalOperations && DAG.haveNoCommonBitsSet(N0, N1)) {
2820 bool TransformCannotBreakAddrMode = none_of(N->users(), [&](SDNode *User) {
2821 return canFoldInAddressingMode(N, User, DAG, TLI);
2822 });
2823
2824 if (TransformCannotBreakAddrMode)
2825 return DAG.getNode(ISD::OR, DL, PtrVT, N0, N1, SDNodeFlags::Disjoint);
2826 }
2827
2828 return SDValue();
2829}
2830
2831/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2832/// a shift and add with a different constant.
2834 SelectionDAG &DAG) {
2835 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2836 "Expecting add or sub");
2837
2838 // We need a constant operand for the add/sub, and the other operand is a
2839 // logical shift right: add (srl), C or sub C, (srl).
2840 bool IsAdd = N->getOpcode() == ISD::ADD;
2841 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2842 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2843 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2844 ShiftOp.getOpcode() != ISD::SRL)
2845 return SDValue();
2846
2847 // The shift must be of a 'not' value.
2848 SDValue Not = ShiftOp.getOperand(0);
2849 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2850 return SDValue();
2851
2852 // The shift must be moving the sign bit to the least-significant-bit.
2853 EVT VT = ShiftOp.getValueType();
2854 SDValue ShAmt = ShiftOp.getOperand(1);
2855 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2856 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2857 return SDValue();
2858
2859 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2860 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2861 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2862 if (SDValue NewC = DAG.FoldConstantArithmetic(
2863 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2864 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2865 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2866 Not.getOperand(0), ShAmt);
2867 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2868 }
2869
2870 return SDValue();
2871}
2872
2873static bool
2875 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2876 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2877}
2878
2879/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2880/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2881/// are no common bits set in the operands).
2882SDValue DAGCombiner::visitADDLike(SDNode *N) {
2883 SDValue N0 = N->getOperand(0);
2884 SDValue N1 = N->getOperand(1);
2885 EVT VT = N0.getValueType();
2886 SDLoc DL(N);
2887
2888 // fold (add x, undef) -> undef
2889 if (N0.isUndef())
2890 return N0;
2891 if (N1.isUndef())
2892 return N1;
2893
2894 // fold (add c1, c2) -> c1+c2
2895 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2896 return C;
2897
2898 // canonicalize constant to RHS
2901 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2902
2903 if (areBitwiseNotOfEachother(N0, N1))
2904 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2905
2906 // fold vector ops
2907 if (VT.isVector()) {
2908 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2909 return FoldedVOp;
2910
2911 // fold (add x, 0) -> x, vector edition
2913 return N0;
2914 }
2915
2916 // fold (add x, 0) -> x
2917 if (isNullConstant(N1))
2918 return N0;
2919
2920 if (N0.getOpcode() == ISD::SUB) {
2921 SDValue N00 = N0.getOperand(0);
2922 SDValue N01 = N0.getOperand(1);
2923
2924 // fold ((A-c1)+c2) -> (A+(c2-c1))
2925 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2926 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2927
2928 // fold ((c1-A)+c2) -> (c1+c2)-A
2929 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2930 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2931 }
2932
2933 // add (sext i1 X), 1 -> zext (not i1 X)
2934 // We don't transform this pattern:
2935 // add (zext i1 X), -1 -> sext (not i1 X)
2936 // because most (?) targets generate better code for the zext form.
2937 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2938 isOneOrOneSplat(N1)) {
2939 SDValue X = N0.getOperand(0);
2940 if ((!LegalOperations ||
2941 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2943 X.getScalarValueSizeInBits() == 1) {
2944 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2945 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2946 }
2947 }
2948
2949 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2950 // iff (or x, c0) is equivalent to (add x, c0).
2951 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2952 // iff (xor x, c0) is equivalent to (add x, c0).
2953 if (DAG.isADDLike(N0)) {
2954 SDValue N01 = N0.getOperand(1);
2955 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2956 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2957 }
2958
2959 if (SDValue NewSel = foldBinOpIntoSelect(N))
2960 return NewSel;
2961
2962 // reassociate add
2963 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2964 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2965 return RADD;
2966
2967 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2968 // equivalent to (add x, c).
2969 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2970 // equivalent to (add x, c).
2971 // Do this optimization only when adding c does not introduce instructions
2972 // for adding carries.
2973 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2974 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2975 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2976 // If N0's type does not split or is a sign mask, it does not introduce
2977 // add carry.
2978 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2979 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2982 if (NoAddCarry)
2983 return DAG.getNode(
2984 ISD::ADD, DL, VT,
2985 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2986 N0.getOperand(1));
2987 }
2988 return SDValue();
2989 };
2990 if (SDValue Add = ReassociateAddOr(N0, N1))
2991 return Add;
2992 if (SDValue Add = ReassociateAddOr(N1, N0))
2993 return Add;
2994
2995 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2996 if (SDValue SD =
2997 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2998 return SD;
2999 }
3000
3001 SDValue A, B, C, D;
3002
3003 // fold ((0-A) + B) -> B-A
3004 if (sd_match(N0, m_Neg(m_Value(A))))
3005 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
3006
3007 // fold (A + (0-B)) -> A-B
3008 if (sd_match(N1, m_Neg(m_Value(B))))
3009 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
3010
3011 // fold (A+(B-A)) -> B
3012 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
3013 return B;
3014
3015 // fold ((B-A)+A) -> B
3016 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
3017 return B;
3018
3019 // fold ((A-B)+(C-A)) -> (C-B)
3020 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
3022 return DAG.getNode(ISD::SUB, DL, VT, C, B);
3023
3024 // fold ((A-B)+(B-C)) -> (A-C)
3025 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
3027 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3028
3029 // fold (A+(B-(A+C))) to (B-C)
3030 // fold (A+(B-(C+A))) to (B-C)
3031 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
3032 return DAG.getNode(ISD::SUB, DL, VT, B, C);
3033
3034 // fold (A+((B-A)+or-C)) to (B+or-C)
3035 if (sd_match(N1,
3037 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
3038 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
3039
3040 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
3041 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
3042 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
3044 return DAG.getNode(ISD::SUB, DL, VT,
3045 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
3046 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
3047
3048 // fold (add (umax X, C), -C) --> (usubsat X, C)
3049 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
3050 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
3051 return (!Max && !Op) ||
3052 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
3053 };
3054 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
3055 /*AllowUndefs*/ true))
3056 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
3057 N0.getOperand(1));
3058 }
3059
3061 return SDValue(N, 0);
3062
3063 if (isOneOrOneSplat(N1)) {
3064 // fold (add (xor a, -1), 1) -> (sub 0, a)
3065 if (isBitwiseNot(N0))
3066 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
3067 N0.getOperand(0));
3068
3069 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
3070 if (N0.getOpcode() == ISD::ADD) {
3071 SDValue A, Xor;
3072
3073 if (isBitwiseNot(N0.getOperand(0))) {
3074 A = N0.getOperand(1);
3075 Xor = N0.getOperand(0);
3076 } else if (isBitwiseNot(N0.getOperand(1))) {
3077 A = N0.getOperand(0);
3078 Xor = N0.getOperand(1);
3079 }
3080
3081 if (Xor)
3082 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
3083 }
3084
3085 // Look for:
3086 // add (add x, y), 1
3087 // And if the target does not like this form then turn into:
3088 // sub y, (xor x, -1)
3089 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3090 N0.hasOneUse() &&
3091 // Limit this to after legalization if the add has wrap flags
3092 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
3093 !N->getFlags().hasNoSignedWrap()))) {
3094 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3095 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
3096 }
3097 }
3098
3099 // (x - y) + -1 -> add (xor y, -1), x
3100 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3101 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
3102 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
3103 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
3104 }
3105
3106 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
3107 // This can help if the inner add has multiple uses.
3108 APInt CM, CA;
3109 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
3110 if (VT.getScalarSizeInBits() <= 64) {
3112 m_ConstInt(CM)))) &&
3114 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3115 SDNodeFlags Flags;
3116 // If all the inputs are nuw, the outputs can be nuw. If all the input
3117 // are _also_ nsw the outputs can be too.
3118 if (N->getFlags().hasNoUnsignedWrap() &&
3119 N0->getFlags().hasNoUnsignedWrap() &&
3122 if (N->getFlags().hasNoSignedWrap() &&
3123 N0->getFlags().hasNoSignedWrap() &&
3126 }
3127 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3128 DAG.getConstant(CM, DL, VT), Flags);
3129 return DAG.getNode(
3130 ISD::ADD, DL, VT, Mul,
3131 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3132 }
3133 // Also look in case there is an intermediate add.
3134 if (sd_match(N0, m_OneUse(m_Add(
3136 m_ConstInt(CM))),
3137 m_Value(B)))) &&
3139 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3140 SDNodeFlags Flags;
3141 // If all the inputs are nuw, the outputs can be nuw. If all the input
3142 // are _also_ nsw the outputs can be too.
3143 SDValue OMul =
3144 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
3145 if (N->getFlags().hasNoUnsignedWrap() &&
3146 N0->getFlags().hasNoUnsignedWrap() &&
3147 OMul->getFlags().hasNoUnsignedWrap() &&
3148 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
3150 if (N->getFlags().hasNoSignedWrap() &&
3151 N0->getFlags().hasNoSignedWrap() &&
3152 OMul->getFlags().hasNoSignedWrap() &&
3153 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
3155 }
3156 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3157 DAG.getConstant(CM, DL, VT), Flags);
3158 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
3159 return DAG.getNode(
3160 ISD::ADD, DL, VT, Add,
3161 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3162 }
3163 }
3164 }
3165
3166 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
3167 return Combined;
3168
3169 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
3170 return Combined;
3171
3172 return SDValue();
3173}
3174
3175// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
3176// Attempt to form avgfloor(A, B) from ((A >> 1) + (B >> 1)) + (A & B & 1)
3177SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
3178 SDValue N0 = N->getOperand(0);
3179 EVT VT = N0.getValueType();
3180 SDValue A, B;
3181
3182 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
3183 (sd_match(N,
3185 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
3188 m_Srl(m_Deferred(A), m_One()),
3189 m_Srl(m_Deferred(B), m_One()))))) {
3190 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
3191 }
3192 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
3193 (sd_match(N,
3195 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
3198 m_Sra(m_Deferred(A), m_One()),
3199 m_Sra(m_Deferred(B), m_One()))))) {
3200 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
3201 }
3202
3203 return SDValue();
3204}
3205
3206SDValue DAGCombiner::visitADD(SDNode *N) {
3207 SDValue N0 = N->getOperand(0);
3208 SDValue N1 = N->getOperand(1);
3209 EVT VT = N0.getValueType();
3210 SDLoc DL(N);
3211
3212 if (SDValue Combined = visitADDLike(N))
3213 return Combined;
3214
3215 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3216 return V;
3217
3218 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3219 return V;
3220
3221 if (SDValue V = MatchRotate(N0, N1, SDLoc(N), /*FromAdd=*/true))
3222 return V;
3223
3224 // Try to match AVGFLOOR fixedwidth pattern
3225 if (SDValue V = foldAddToAvg(N, DL))
3226 return V;
3227
3228 // fold (a+b) -> (a|b) iff a and b share no bits.
3229 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
3230 DAG.haveNoCommonBitsSet(N0, N1))
3231 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
3232
3233 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
3234 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3235 const APInt &C0 = N0->getConstantOperandAPInt(0);
3236 const APInt &C1 = N1->getConstantOperandAPInt(0);
3237 return DAG.getVScale(DL, VT, C0 + C1);
3238 }
3239
3240 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3241 if (N0.getOpcode() == ISD::ADD &&
3242 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3243 N1.getOpcode() == ISD::VSCALE) {
3244 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3245 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3246 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3247 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3248 }
3249
3250 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3251 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3252 N1.getOpcode() == ISD::STEP_VECTOR) {
3253 const APInt &C0 = N0->getConstantOperandAPInt(0);
3254 const APInt &C1 = N1->getConstantOperandAPInt(0);
3255 APInt NewStep = C0 + C1;
3256 return DAG.getStepVector(DL, VT, NewStep);
3257 }
3258
3259 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3260 if (N0.getOpcode() == ISD::ADD &&
3262 N1.getOpcode() == ISD::STEP_VECTOR) {
3263 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3264 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3265 APInt NewStep = SV0 + SV1;
3266 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3267 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3268 }
3269
3270 return SDValue();
3271}
3272
3273SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3274 unsigned Opcode = N->getOpcode();
3275 SDValue N0 = N->getOperand(0);
3276 SDValue N1 = N->getOperand(1);
3277 EVT VT = N0.getValueType();
3278 bool IsSigned = Opcode == ISD::SADDSAT;
3279 SDLoc DL(N);
3280
3281 // fold (add_sat x, undef) -> -1
3282 if (N0.isUndef() || N1.isUndef())
3283 return DAG.getAllOnesConstant(DL, VT);
3284
3285 // fold (add_sat c1, c2) -> c3
3286 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3287 return C;
3288
3289 // canonicalize constant to RHS
3292 return DAG.getNode(Opcode, DL, VT, N1, N0);
3293
3294 // fold vector ops
3295 if (VT.isVector()) {
3296 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3297 return FoldedVOp;
3298
3299 // fold (add_sat x, 0) -> x, vector edition
3301 return N0;
3302 }
3303
3304 // fold (add_sat x, 0) -> x
3305 if (isNullConstant(N1))
3306 return N0;
3307
3308 // If it cannot overflow, transform into an add.
3309 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3310 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3311
3312 return SDValue();
3313}
3314
3316 bool ForceCarryReconstruction = false) {
3317 bool Masked = false;
3318
3319 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3320 while (true) {
3321 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3322 return V;
3323
3324 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3325 V = V.getOperand(0);
3326 continue;
3327 }
3328
3329 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3330 if (ForceCarryReconstruction)
3331 return V;
3332
3333 Masked = true;
3334 V = V.getOperand(0);
3335 continue;
3336 }
3337
3338 break;
3339 }
3340
3341 // If this is not a carry, return.
3342 if (V.getResNo() != 1)
3343 return SDValue();
3344
3345 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3346 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3347 return SDValue();
3348
3349 EVT VT = V->getValueType(0);
3350 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3351 return SDValue();
3352
3353 // If the result is masked, then no matter what kind of bool it is we can
3354 // return. If it isn't, then we need to make sure the bool type is either 0 or
3355 // 1 and not other values.
3356 if (Masked ||
3357 TLI.getBooleanContents(V.getValueType()) ==
3359 return V;
3360
3361 return SDValue();
3362}
3363
3364/// Given the operands of an add/sub operation, see if the 2nd operand is a
3365/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3366/// the opcode and bypass the mask operation.
3367static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3368 SelectionDAG &DAG, const SDLoc &DL) {
3369 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3370 N1 = N1.getOperand(0);
3371
3372 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3373 return SDValue();
3374
3375 EVT VT = N0.getValueType();
3376 SDValue N10 = N1.getOperand(0);
3377 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3378 N10 = N10.getOperand(0);
3379
3380 if (N10.getValueType() != VT)
3381 return SDValue();
3382
3383 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3384 return SDValue();
3385
3386 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3387 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3388 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3389}
3390
3391/// Helper for doing combines based on N0 and N1 being added to each other.
3392SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3393 SDNode *LocReference) {
3394 EVT VT = N0.getValueType();
3395 SDLoc DL(LocReference);
3396
3397 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3398 SDValue Y, N;
3399 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3400 return DAG.getNode(ISD::SUB, DL, VT, N0,
3401 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3402
3403 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3404 return V;
3405
3406 // Look for:
3407 // add (add x, 1), y
3408 // And if the target does not like this form then turn into:
3409 // sub y, (xor x, -1)
3410 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3411 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3412 // Limit this to after legalization if the add has wrap flags
3413 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3414 !N0->getFlags().hasNoSignedWrap()))) {
3415 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3416 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3417 }
3418
3419 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3420 // Hoist one-use subtraction by non-opaque constant:
3421 // (x - C) + y -> (x + y) - C
3422 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3423 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3424 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3425 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3426 }
3427 // Hoist one-use subtraction from non-opaque constant:
3428 // (C - x) + y -> (y - x) + C
3429 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3430 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3431 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3432 }
3433 }
3434
3435 // add (mul x, C), x -> mul x, C+1
3436 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3437 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3438 N0.hasOneUse()) {
3439 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3440 DAG.getConstant(1, DL, VT));
3441 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3442 }
3443
3444 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3445 // rather than 'add 0/-1' (the zext should get folded).
3446 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3447 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3448 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3450 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3451 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3452 }
3453
3454 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3455 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3456 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3457 if (TN->getVT() == MVT::i1) {
3458 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3459 DAG.getConstant(1, DL, VT));
3460 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3461 }
3462 }
3463
3464 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3465 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3466 N1.getResNo() == 0)
3467 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3468 N0, N1.getOperand(0), N1.getOperand(2));
3469
3470 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3472 if (SDValue Carry = getAsCarry(TLI, N1))
3473 return DAG.getNode(ISD::UADDO_CARRY, DL,
3474 DAG.getVTList(VT, Carry.getValueType()), N0,
3475 DAG.getConstant(0, DL, VT), Carry);
3476
3477 return SDValue();
3478}
3479
3480SDValue DAGCombiner::visitADDC(SDNode *N) {
3481 SDValue N0 = N->getOperand(0);
3482 SDValue N1 = N->getOperand(1);
3483 EVT VT = N0.getValueType();
3484 SDLoc DL(N);
3485
3486 // If the flag result is dead, turn this into an ADD.
3487 if (!N->hasAnyUseOfValue(1))
3488 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3489 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3490
3491 // canonicalize constant to RHS.
3492 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3493 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3494 if (N0C && !N1C)
3495 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3496
3497 // fold (addc x, 0) -> x + no carry out
3498 if (isNullConstant(N1))
3499 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3500 DL, MVT::Glue));
3501
3502 // If it cannot overflow, transform into an add.
3504 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3505 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3506
3507 return SDValue();
3508}
3509
3510/**
3511 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3512 * then the flip also occurs if computing the inverse is the same cost.
3513 * This function returns an empty SDValue in case it cannot flip the boolean
3514 * without increasing the cost of the computation. If you want to flip a boolean
3515 * no matter what, use DAG.getLogicalNOT.
3516 */
3518 const TargetLowering &TLI,
3519 bool Force) {
3520 if (Force && isa<ConstantSDNode>(V))
3521 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3522
3523 if (V.getOpcode() != ISD::XOR)
3524 return SDValue();
3525
3526 if (DAG.isBoolConstant(V.getOperand(1)) == true)
3527 return V.getOperand(0);
3528 if (Force && isConstOrConstSplat(V.getOperand(1), false))
3529 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3530 return SDValue();
3531}
3532
3533SDValue DAGCombiner::visitADDO(SDNode *N) {
3534 SDValue N0 = N->getOperand(0);
3535 SDValue N1 = N->getOperand(1);
3536 EVT VT = N0.getValueType();
3537 bool IsSigned = (ISD::SADDO == N->getOpcode());
3538
3539 EVT CarryVT = N->getValueType(1);
3540 SDLoc DL(N);
3541
3542 // If the flag result is dead, turn this into an ADD.
3543 if (!N->hasAnyUseOfValue(1))
3544 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3545 DAG.getUNDEF(CarryVT));
3546
3547 // canonicalize constant to RHS.
3550 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3551
3552 // fold (addo x, 0) -> x + no carry out
3553 if (isNullOrNullSplat(N1))
3554 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3555
3556 // If it cannot overflow, transform into an add.
3557 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3558 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3559 DAG.getConstant(0, DL, CarryVT));
3560
3561 if (IsSigned) {
3562 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3563 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3564 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3565 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3566 } else {
3567 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3568 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3569 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3570 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3571 return CombineTo(
3572 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3573 }
3574
3575 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3576 return Combined;
3577
3578 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3579 return Combined;
3580 }
3581
3582 return SDValue();
3583}
3584
3585SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3586 EVT VT = N0.getValueType();
3587 if (VT.isVector())
3588 return SDValue();
3589
3590 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3591 // If Y + 1 cannot overflow.
3592 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3593 SDValue Y = N1.getOperand(0);
3594 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3596 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3597 N1.getOperand(2));
3598 }
3599
3600 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3602 if (SDValue Carry = getAsCarry(TLI, N1))
3603 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3604 DAG.getConstant(0, SDLoc(N), VT), Carry);
3605
3606 return SDValue();
3607}
3608
3609SDValue DAGCombiner::visitADDE(SDNode *N) {
3610 SDValue N0 = N->getOperand(0);
3611 SDValue N1 = N->getOperand(1);
3612 SDValue CarryIn = N->getOperand(2);
3613
3614 // canonicalize constant to RHS
3615 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3616 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3617 if (N0C && !N1C)
3618 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3619 N1, N0, CarryIn);
3620
3621 // fold (adde x, y, false) -> (addc x, y)
3622 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3623 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3624
3625 return SDValue();
3626}
3627
3628SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3629 SDValue N0 = N->getOperand(0);
3630 SDValue N1 = N->getOperand(1);
3631 SDValue CarryIn = N->getOperand(2);
3632 SDLoc DL(N);
3633
3634 // canonicalize constant to RHS
3635 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3636 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3637 if (N0C && !N1C)
3638 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3639
3640 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3641 if (isNullConstant(CarryIn)) {
3642 if (!LegalOperations ||
3643 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3644 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3645 }
3646
3647 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3648 if (isNullConstant(N0) && isNullConstant(N1)) {
3649 EVT VT = N0.getValueType();
3650 EVT CarryVT = CarryIn.getValueType();
3651 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3652 AddToWorklist(CarryExt.getNode());
3653 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3654 DAG.getConstant(1, DL, VT)),
3655 DAG.getConstant(0, DL, CarryVT));
3656 }
3657
3658 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3659 return Combined;
3660
3661 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3662 return Combined;
3663
3664 // We want to avoid useless duplication.
3665 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3666 // not a binary operation, this is not really possible to leverage this
3667 // existing mechanism for it. However, if more operations require the same
3668 // deduplication logic, then it may be worth generalize.
3669 SDValue Ops[] = {N1, N0, CarryIn};
3670 SDNode *CSENode =
3671 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3672 if (CSENode)
3673 return SDValue(CSENode, 0);
3674
3675 return SDValue();
3676}
3677
3678/**
3679 * If we are facing some sort of diamond carry propagation pattern try to
3680 * break it up to generate something like:
3681 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3682 *
3683 * The end result is usually an increase in operation required, but because the
3684 * carry is now linearized, other transforms can kick in and optimize the DAG.
3685 *
3686 * Patterns typically look something like
3687 * (uaddo A, B)
3688 * / \
3689 * Carry Sum
3690 * | \
3691 * | (uaddo_carry *, 0, Z)
3692 * | /
3693 * \ Carry
3694 * | /
3695 * (uaddo_carry X, *, *)
3696 *
3697 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3698 * produce a combine with a single path for carry propagation.
3699 */
3701 SelectionDAG &DAG, SDValue X,
3702 SDValue Carry0, SDValue Carry1,
3703 SDNode *N) {
3704 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3705 return SDValue();
3706 if (Carry1.getOpcode() != ISD::UADDO)
3707 return SDValue();
3708
3709 SDValue Z;
3710
3711 /**
3712 * First look for a suitable Z. It will present itself in the form of
3713 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3714 */
3715 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3716 isNullConstant(Carry0.getOperand(1))) {
3717 Z = Carry0.getOperand(2);
3718 } else if (Carry0.getOpcode() == ISD::UADDO &&
3719 isOneConstant(Carry0.getOperand(1))) {
3720 EVT VT = Carry0->getValueType(1);
3721 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3722 } else {
3723 // We couldn't find a suitable Z.
3724 return SDValue();
3725 }
3726
3727
3728 auto cancelDiamond = [&](SDValue A,SDValue B) {
3729 SDLoc DL(N);
3730 SDValue NewY =
3731 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3732 Combiner.AddToWorklist(NewY.getNode());
3733 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3734 DAG.getConstant(0, DL, X.getValueType()),
3735 NewY.getValue(1));
3736 };
3737
3738 /**
3739 * (uaddo A, B)
3740 * |
3741 * Sum
3742 * |
3743 * (uaddo_carry *, 0, Z)
3744 */
3745 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3746 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3747 }
3748
3749 /**
3750 * (uaddo_carry A, 0, Z)
3751 * |
3752 * Sum
3753 * |
3754 * (uaddo *, B)
3755 */
3756 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3757 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3758 }
3759
3760 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3761 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3762 }
3763
3764 return SDValue();
3765}
3766
3767// If we are facing some sort of diamond carry/borrow in/out pattern try to
3768// match patterns like:
3769//
3770// (uaddo A, B) CarryIn
3771// | \ |
3772// | \ |
3773// PartialSum PartialCarryOutX /
3774// | | /
3775// | ____|____________/
3776// | / |
3777// (uaddo *, *) \________
3778// | \ \
3779// | \ |
3780// | PartialCarryOutY |
3781// | \ |
3782// | \ /
3783// AddCarrySum | ______/
3784// | /
3785// CarryOut = (or *, *)
3786//
3787// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3788//
3789// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3790//
3791// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3792// with a single path for carry/borrow out propagation.
3794 SDValue N0, SDValue N1, SDNode *N) {
3795 SDValue Carry0 = getAsCarry(TLI, N0);
3796 if (!Carry0)
3797 return SDValue();
3798 SDValue Carry1 = getAsCarry(TLI, N1);
3799 if (!Carry1)
3800 return SDValue();
3801
3802 unsigned Opcode = Carry0.getOpcode();
3803 if (Opcode != Carry1.getOpcode())
3804 return SDValue();
3805 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3806 return SDValue();
3807 // Guarantee identical type of CarryOut
3808 EVT CarryOutType = N->getValueType(0);
3809 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3810 CarryOutType != Carry1.getValue(1).getValueType())
3811 return SDValue();
3812
3813 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3814 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3815 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3816 std::swap(Carry0, Carry1);
3817
3818 // Check if nodes are connected in expected way.
3819 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3820 Carry1.getOperand(1) != Carry0.getValue(0))
3821 return SDValue();
3822
3823 // The carry in value must be on the righthand side for subtraction.
3824 unsigned CarryInOperandNum =
3825 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3826 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3827 return SDValue();
3828 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3829
3830 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3831 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3832 return SDValue();
3833
3834 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3835 CarryIn = getAsCarry(TLI, CarryIn, true);
3836 if (!CarryIn)
3837 return SDValue();
3838
3839 SDLoc DL(N);
3840 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3841 Carry1->getValueType(0));
3842 SDValue Merged =
3843 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3844 Carry0.getOperand(1), CarryIn);
3845
3846 // Please note that because we have proven that the result of the UADDO/USUBO
3847 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3848 // therefore prove that if the first UADDO/USUBO overflows, the second
3849 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3850 // maximum value.
3851 //
3852 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3853 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3854 //
3855 // This is important because it means that OR and XOR can be used to merge
3856 // carry flags; and that AND can return a constant zero.
3857 //
3858 // TODO: match other operations that can merge flags (ADD, etc)
3859 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3860 if (N->getOpcode() == ISD::AND)
3861 return DAG.getConstant(0, DL, CarryOutType);
3862 return Merged.getValue(1);
3863}
3864
3865SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3866 SDValue CarryIn, SDNode *N) {
3867 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3868 // carry.
3869 if (isBitwiseNot(N0))
3870 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3871 SDLoc DL(N);
3872 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3873 N0.getOperand(0), NotC);
3874 return CombineTo(
3875 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3876 }
3877
3878 // Iff the flag result is dead:
3879 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3880 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3881 // or the dependency between the instructions.
3882 if ((N0.getOpcode() == ISD::ADD ||
3883 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3884 N0.getValue(1) != CarryIn)) &&
3885 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3886 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3887 N0.getOperand(0), N0.getOperand(1), CarryIn);
3888
3889 /**
3890 * When one of the uaddo_carry argument is itself a carry, we may be facing
3891 * a diamond carry propagation. In which case we try to transform the DAG
3892 * to ensure linear carry propagation if that is possible.
3893 */
3894 if (auto Y = getAsCarry(TLI, N1)) {
3895 // Because both are carries, Y and Z can be swapped.
3896 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3897 return R;
3898 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3899 return R;
3900 }
3901
3902 return SDValue();
3903}
3904
3905SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3906 SDValue CarryIn, SDNode *N) {
3907 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3908 if (isBitwiseNot(N0)) {
3909 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3910 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3911 N0.getOperand(0), NotC);
3912 }
3913
3914 return SDValue();
3915}
3916
3917SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3918 SDValue N0 = N->getOperand(0);
3919 SDValue N1 = N->getOperand(1);
3920 SDValue CarryIn = N->getOperand(2);
3921 SDLoc DL(N);
3922
3923 // canonicalize constant to RHS
3924 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3925 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3926 if (N0C && !N1C)
3927 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3928
3929 // fold (saddo_carry x, y, false) -> (saddo x, y)
3930 if (isNullConstant(CarryIn)) {
3931 if (!LegalOperations ||
3932 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3933 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3934 }
3935
3936 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3937 return Combined;
3938
3939 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3940 return Combined;
3941
3942 return SDValue();
3943}
3944
3945// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3946// clamp/truncation if necessary.
3948 SDValue RHS, SelectionDAG &DAG,
3949 const SDLoc &DL) {
3950 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3951 "Illegal truncation");
3952
3953 if (DstVT == SrcVT)
3954 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3955
3956 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3957 // clamping RHS.
3959 DstVT.getScalarSizeInBits());
3960 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3961 return SDValue();
3962
3963 SDValue SatLimit =
3965 DstVT.getScalarSizeInBits()),
3966 DL, SrcVT);
3967 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3968 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3969 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3970 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3971}
3972
3973// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3974// usubsat(a,b), optionally as a truncated type.
3975SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3976 if (N->getOpcode() != ISD::SUB ||
3977 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3978 return SDValue();
3979
3980 EVT SubVT = N->getValueType(0);
3981 SDValue Op0 = N->getOperand(0);
3982 SDValue Op1 = N->getOperand(1);
3983
3984 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3985 // they may be converted to usubsat(a,b).
3986 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3987 SDValue MaxLHS = Op0.getOperand(0);
3988 SDValue MaxRHS = Op0.getOperand(1);
3989 if (MaxLHS == Op1)
3990 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3991 if (MaxRHS == Op1)
3992 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3993 }
3994
3995 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3996 SDValue MinLHS = Op1.getOperand(0);
3997 SDValue MinRHS = Op1.getOperand(1);
3998 if (MinLHS == Op0)
3999 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
4000 if (MinRHS == Op0)
4001 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
4002 }
4003
4004 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
4005 if (Op1.getOpcode() == ISD::TRUNCATE &&
4006 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
4007 Op1.getOperand(0).hasOneUse()) {
4008 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
4009 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
4010 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
4011 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
4012 DAG, DL);
4013 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
4014 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
4015 DAG, DL);
4016 }
4017
4018 return SDValue();
4019}
4020
4021// Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
4022// counting leading ones. Broadly, it replaces the substraction with a left
4023// shift.
4024//
4025// * DAG Legalisation Pattern:
4026//
4027// (sub (ctlz (zeroextend (not Src)))
4028// BitWidthDiff)
4029//
4030// if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
4031// -->
4032//
4033// (ctlz_zero_undef (not (shl (anyextend Src)
4034// BitWidthDiff)))
4035//
4036// * Type Legalisation Pattern:
4037//
4038// (sub (ctlz (and (xor Src XorMask)
4039// AndMask))
4040// BitWidthDiff)
4041//
4042// if AndMask has only trailing ones
4043// and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
4044// and XorMask has more trailing ones than AndMask
4045// -->
4046//
4047// (ctlz_zero_undef (not (shl Src BitWidthDiff)))
4048template <class MatchContextClass>
4050 const SDLoc DL(N);
4051 SDValue N0 = N->getOperand(0);
4052 EVT VT = N0.getValueType();
4053 unsigned BitWidth = VT.getScalarSizeInBits();
4054
4055 MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
4056
4057 APInt AndMask;
4058 APInt XorMask;
4059 APInt BitWidthDiff;
4060
4061 SDValue CtlzOp;
4062 SDValue Src;
4063
4064 if (!sd_context_match(
4065 N, Matcher, m_Sub(m_Ctlz(m_Value(CtlzOp)), m_ConstInt(BitWidthDiff))))
4066 return SDValue();
4067
4068 if (sd_context_match(CtlzOp, Matcher, m_ZExt(m_Not(m_Value(Src))))) {
4069 // DAG Legalisation Pattern:
4070 // (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
4071 if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
4072 return SDValue();
4073
4074 Src = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Src);
4075 } else if (sd_context_match(CtlzOp, Matcher,
4076 m_And(m_Xor(m_Value(Src), m_ConstInt(XorMask)),
4077 m_ConstInt(AndMask)))) {
4078 // Type Legalisation Pattern:
4079 // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
4080 if (BitWidthDiff.getZExtValue() >= BitWidth)
4081 return SDValue();
4082 unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
4083 if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
4084 return SDValue();
4085 } else
4086 return SDValue();
4087
4088 SDValue ShiftConst = DAG.getShiftAmountConstant(BitWidthDiff, VT, DL);
4089 SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
4090 SDValue Not =
4091 Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
4092
4093 return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
4094}
4095
4096// Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
4098 const SDLoc &DL) {
4099 assert(N->getOpcode() == ISD::SUB && "Node must be a SUB");
4100 SDValue Sub0 = N->getOperand(0);
4101 SDValue Sub1 = N->getOperand(1);
4102
4103 auto CheckAndFoldMulCase = [&](SDValue DivRem, SDValue MaybeY) -> SDValue {
4104 if ((DivRem.getOpcode() == ISD::SDIVREM ||
4105 DivRem.getOpcode() == ISD::UDIVREM) &&
4106 DivRem.getResNo() == 0 && DivRem.getOperand(0) == Sub0 &&
4107 DivRem.getOperand(1) == MaybeY) {
4108 return SDValue(DivRem.getNode(), 1);
4109 }
4110 return SDValue();
4111 };
4112
4113 if (Sub1.getOpcode() == ISD::MUL) {
4114 // (sub x, (mul divrem(x,y)[0], y))
4115 SDValue Mul0 = Sub1.getOperand(0);
4116 SDValue Mul1 = Sub1.getOperand(1);
4117
4118 if (SDValue Res = CheckAndFoldMulCase(Mul0, Mul1))
4119 return Res;
4120
4121 if (SDValue Res = CheckAndFoldMulCase(Mul1, Mul0))
4122 return Res;
4123
4124 } else if (Sub1.getOpcode() == ISD::SHL) {
4125 // Handle (sub x, (shl divrem(x,y)[0], C)) where y = 1 << C
4126 SDValue Shl0 = Sub1.getOperand(0);
4127 SDValue Shl1 = Sub1.getOperand(1);
4128 // Check if Shl0 is divrem(x, Y)[0]
4129 if ((Shl0.getOpcode() == ISD::SDIVREM ||
4130 Shl0.getOpcode() == ISD::UDIVREM) &&
4131 Shl0.getResNo() == 0 && Shl0.getOperand(0) == Sub0) {
4132
4133 SDValue Divisor = Shl0.getOperand(1);
4134
4135 ConstantSDNode *DivC = isConstOrConstSplat(Divisor);
4137 if (!DivC || !ShC)
4138 return SDValue();
4139
4140 if (DivC->getAPIntValue().isPowerOf2() &&
4141 DivC->getAPIntValue().logBase2() == ShC->getAPIntValue())
4142 return SDValue(Shl0.getNode(), 1);
4143 }
4144 }
4145 return SDValue();
4146}
4147
4148// Since it may not be valid to emit a fold to zero for vector initializers
4149// check if we can before folding.
4150static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
4151 SelectionDAG &DAG, bool LegalOperations) {
4152 if (!VT.isVector())
4153 return DAG.getConstant(0, DL, VT);
4154 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
4155 return DAG.getConstant(0, DL, VT);
4156 return SDValue();
4157}
4158
4159SDValue DAGCombiner::visitSUB(SDNode *N) {
4160 SDValue N0 = N->getOperand(0);
4161 SDValue N1 = N->getOperand(1);
4162 EVT VT = N0.getValueType();
4163 unsigned BitWidth = VT.getScalarSizeInBits();
4164 SDLoc DL(N);
4165
4167 return V;
4168
4169 // fold (sub x, x) -> 0
4170 if (N0 == N1)
4171 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4172
4173 // fold (sub c1, c2) -> c3
4174 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
4175 return C;
4176
4177 // fold vector ops
4178 if (VT.isVector()) {
4179 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4180 return FoldedVOp;
4181
4182 // fold (sub x, 0) -> x, vector edition
4184 return N0;
4185 }
4186
4187 // (sub x, ([v]select (ult x, y), 0, y)) -> (umin x, (sub x, y))
4188 // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y))
4189 if (N1.hasOneUse() && hasUMin(VT)) {
4190 SDValue Y;
4191 auto MS0 = m_Specific(N0);
4192 auto MVY = m_Value(Y);
4193 auto MZ = m_Zero();
4194 auto MCC1 = m_SpecificCondCode(ISD::SETULT);
4195 auto MCC2 = m_SpecificCondCode(ISD::SETUGE);
4196
4197 if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) ||
4198 sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) ||
4199 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) ||
4200 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ)))
4201
4202 return DAG.getNode(ISD::UMIN, DL, VT, N0,
4203 DAG.getNode(ISD::SUB, DL, VT, N0, Y));
4204 }
4205
4206 if (SDValue NewSel = foldBinOpIntoSelect(N))
4207 return NewSel;
4208
4209 // fold (sub x, c) -> (add x, -c)
4210 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4211 return DAG.getNode(ISD::ADD, DL, VT, N0,
4212 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4213
4214 if (isNullOrNullSplat(N0)) {
4215 // Right-shifting everything out but the sign bit followed by negation is
4216 // the same as flipping arithmetic/logical shift type without the negation:
4217 // -(X >>u 31) -> (X >>s 31)
4218 // -(X >>s 31) -> (X >>u 31)
4219 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
4220 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
4221 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
4222 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
4223 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
4224 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
4225 }
4226 }
4227
4228 // 0 - X --> 0 if the sub is NUW.
4229 if (N->getFlags().hasNoUnsignedWrap())
4230 return N0;
4231
4233 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
4234 // N1 must be 0 because negating the minimum signed value is undefined.
4235 if (N->getFlags().hasNoSignedWrap())
4236 return N0;
4237
4238 // 0 - X --> X if X is 0 or the minimum signed value.
4239 return N1;
4240 }
4241
4242 // Convert 0 - abs(x).
4243 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
4245 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
4246 return Result;
4247
4248 // Similar to the previous rule, but this time targeting an expanded abs.
4249 // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
4250 // as well as
4251 // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
4252 // Note that these two are applicable to both signed and unsigned min/max.
4253 SDValue X;
4254 SDValue S0;
4255 auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
4256 if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
4257 m_UMax(m_Value(X), NegPat),
4258 m_SMin(m_Value(X), NegPat),
4259 m_UMin(m_Value(X), NegPat))))) {
4260 unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
4261 if (hasOperation(NewOpc, VT))
4262 return DAG.getNode(NewOpc, DL, VT, X, S0);
4263 }
4264
4265 // Fold neg(splat(neg(x)) -> splat(x)
4266 if (VT.isVector()) {
4267 SDValue N1S = DAG.getSplatValue(N1, true);
4268 if (N1S && N1S.getOpcode() == ISD::SUB &&
4269 isNullConstant(N1S.getOperand(0)))
4270 return DAG.getSplat(VT, DL, N1S.getOperand(1));
4271 }
4272
4273 // sub 0, (and x, 1) --> SIGN_EXTEND_INREG x, i1
4274 if (N1.getOpcode() == ISD::AND && N1.hasOneUse() &&
4275 isOneOrOneSplat(N1->getOperand(1))) {
4276 EVT ExtVT = VT.changeElementType(*DAG.getContext(), MVT::i1);
4279 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N1->getOperand(0),
4280 DAG.getValueType(ExtVT));
4281 }
4282 }
4283 }
4284
4285 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
4287 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4288
4289 // fold (A - (0-B)) -> A+B
4290 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
4291 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
4292
4293 // fold A-(A-B) -> B
4294 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
4295 return N1.getOperand(1);
4296
4297 // fold (A+B)-A -> B
4298 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
4299 return N0.getOperand(1);
4300
4301 // fold (A+B)-B -> A
4302 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
4303 return N0.getOperand(0);
4304
4305 // fold (A+C1)-C2 -> A+(C1-C2)
4306 if (N0.getOpcode() == ISD::ADD) {
4307 SDValue N01 = N0.getOperand(1);
4308 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
4309 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
4310 }
4311
4312 // fold C2-(A+C1) -> (C2-C1)-A
4313 if (N1.getOpcode() == ISD::ADD) {
4314 SDValue N11 = N1.getOperand(1);
4315 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
4316 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
4317 }
4318
4319 // fold (A-C1)-C2 -> A-(C1+C2)
4320 if (N0.getOpcode() == ISD::SUB) {
4321 SDValue N01 = N0.getOperand(1);
4322 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
4323 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
4324 }
4325
4326 // fold (c1-A)-c2 -> (c1-c2)-A
4327 if (N0.getOpcode() == ISD::SUB) {
4328 SDValue N00 = N0.getOperand(0);
4329 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
4330 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
4331 }
4332
4333 SDValue A, B, C;
4334
4335 // fold ((A+(B+C))-B) -> A+C
4336 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
4337 return DAG.getNode(ISD::ADD, DL, VT, A, C);
4338
4339 // fold ((A+(B-C))-B) -> A-C
4340 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
4341 return DAG.getNode(ISD::SUB, DL, VT, A, C);
4342
4343 // fold ((A-(B-C))-C) -> A-B
4344 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
4345 return DAG.getNode(ISD::SUB, DL, VT, A, B);
4346
4347 // fold (A-(B-C)) -> A+(C-B)
4348 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
4349 return DAG.getNode(ISD::ADD, DL, VT, N0,
4350 DAG.getNode(ISD::SUB, DL, VT, C, B));
4351
4352 // A - (A & B) -> A & (~B)
4353 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
4354 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
4355 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
4356
4357 // fold (A - (-B * C)) -> (A + (B * C))
4358 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
4359 return DAG.getNode(ISD::ADD, DL, VT, N0,
4360 DAG.getNode(ISD::MUL, DL, VT, B, C));
4361
4362 // If either operand of a sub is undef, the result is undef
4363 if (N0.isUndef())
4364 return N0;
4365 if (N1.isUndef())
4366 return N1;
4367
4368 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4369 return V;
4370
4371 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4372 return V;
4373
4374 // Try to match AVGCEIL fixedwidth pattern
4375 if (SDValue V = foldSubToAvg(N, DL))
4376 return V;
4377
4378 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
4379 return V;
4380
4381 if (SDValue V = foldSubToUSubSat(VT, N, DL))
4382 return V;
4383
4384 if (SDValue V = foldRemainderIdiom(N, DAG, DL))
4385 return V;
4386
4387 // (A - B) - 1 -> add (xor B, -1), A
4389 m_One(/*AllowUndefs=*/true))))
4390 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
4391
4392 // Look for:
4393 // sub y, (xor x, -1)
4394 // And if the target does not like this form then turn into:
4395 // add (add x, y), 1
4396 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4397 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4398 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4399 }
4400
4401 // Hoist one-use addition by non-opaque constant:
4402 // (x + C) - y -> (x - y) + C
4403 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4404 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4405 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4406 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4407 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4408 }
4409 // y - (x + C) -> (y - x) - C
4410 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4411 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4412 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4413 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4414 }
4415 // (x - C) - y -> (x - y) - C
4416 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4417 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4418 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4419 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4420 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4421 }
4422 // (C - x) - y -> C - (x + y)
4423 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4424 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4425 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4426 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4427 }
4428
4429 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4430 // rather than 'sub 0/1' (the sext should get folded).
4431 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4432 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4433 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4434 TLI.getBooleanContents(VT) ==
4436 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4437 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4438 }
4439
4440 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4441 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4443 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4444 return DAG.getNode(ISD::ABS, DL, VT, A);
4445
4446 // If the relocation model supports it, consider symbol offsets.
4447 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4448 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4449 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4450 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4451 if (GA->getGlobal() == GB->getGlobal())
4452 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4453 DL, VT);
4454 }
4455
4456 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4457 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4458 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4459 if (TN->getVT() == MVT::i1) {
4460 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4461 DAG.getConstant(1, DL, VT));
4462 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4463 }
4464 }
4465
4466 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4467 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4468 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4469 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4470 }
4471
4472 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4473 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4474 APInt NewStep = -N1.getConstantOperandAPInt(0);
4475 return DAG.getNode(ISD::ADD, DL, VT, N0,
4476 DAG.getStepVector(DL, VT, NewStep));
4477 }
4478
4479 // Prefer an add for more folding potential and possibly better codegen:
4480 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4481 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4482 SDValue ShAmt = N1.getOperand(1);
4483 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4484 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4485 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4486 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4487 }
4488 }
4489
4490 // As with the previous fold, prefer add for more folding potential.
4491 // Subtracting SMIN/0 is the same as adding SMIN/0:
4492 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4493 if (N1.getOpcode() == ISD::SHL) {
4494 ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
4495 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4496 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4497 }
4498
4499 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4500 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4501 N0.getResNo() == 0 && N0.hasOneUse())
4502 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4503 N0.getOperand(0), N1, N0.getOperand(2));
4504
4506 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4507 if (SDValue Carry = getAsCarry(TLI, N0)) {
4508 SDValue X = N1;
4509 SDValue Zero = DAG.getConstant(0, DL, VT);
4510 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4511 return DAG.getNode(ISD::UADDO_CARRY, DL,
4512 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4513 Carry);
4514 }
4515 }
4516
4517 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4518 // sub C0, X --> xor X, C0
4519 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4520 if (!C0->isOpaque()) {
4521 const APInt &C0Val = C0->getAPIntValue();
4522 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4523 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4524 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4525 }
4526 }
4527
4528 // smax(a,b) - smin(a,b) --> abds(a,b)
4529 if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
4530 sd_match(N0, m_SMaxLike(m_Value(A), m_Value(B))) &&
4532 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4533
4534 // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4535 if (hasOperation(ISD::ABDS, VT) &&
4536 sd_match(N0, m_SMinLike(m_Value(A), m_Value(B))) &&
4538 return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
4539
4540 // umax(a,b) - umin(a,b) --> abdu(a,b)
4541 if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
4542 sd_match(N0, m_UMaxLike(m_Value(A), m_Value(B))) &&
4544 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4545
4546 // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4547 if (hasOperation(ISD::ABDU, VT) &&
4548 sd_match(N0, m_UMinLike(m_Value(A), m_Value(B))) &&
4550 return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
4551
4552 return SDValue();
4553}
4554
4555SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4556 unsigned Opcode = N->getOpcode();
4557 SDValue N0 = N->getOperand(0);
4558 SDValue N1 = N->getOperand(1);
4559 EVT VT = N0.getValueType();
4560 bool IsSigned = Opcode == ISD::SSUBSAT;
4561 SDLoc DL(N);
4562
4563 // fold (sub_sat x, undef) -> 0
4564 if (N0.isUndef() || N1.isUndef())
4565 return DAG.getConstant(0, DL, VT);
4566
4567 // fold (sub_sat x, x) -> 0
4568 if (N0 == N1)
4569 return DAG.getConstant(0, DL, VT);
4570
4571 // fold (sub_sat c1, c2) -> c3
4572 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4573 return C;
4574
4575 // fold vector ops
4576 if (VT.isVector()) {
4577 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4578 return FoldedVOp;
4579
4580 // fold (sub_sat x, 0) -> x, vector edition
4582 return N0;
4583 }
4584
4585 // fold (sub_sat x, 0) -> x
4586 if (isNullConstant(N1))
4587 return N0;
4588
4589 // If it cannot overflow, transform into an sub.
4590 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4591 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4592
4593 return SDValue();
4594}
4595
4596SDValue DAGCombiner::visitSUBC(SDNode *N) {
4597 SDValue N0 = N->getOperand(0);
4598 SDValue N1 = N->getOperand(1);
4599 EVT VT = N0.getValueType();
4600 SDLoc DL(N);
4601
4602 // If the flag result is dead, turn this into an SUB.
4603 if (!N->hasAnyUseOfValue(1))
4604 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4605 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4606
4607 // fold (subc x, x) -> 0 + no borrow
4608 if (N0 == N1)
4609 return CombineTo(N, DAG.getConstant(0, DL, VT),
4610 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4611
4612 // fold (subc x, 0) -> x + no borrow
4613 if (isNullConstant(N1))
4614 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4615
4616 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4617 if (isAllOnesConstant(N0))
4618 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4619 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4620
4621 return SDValue();
4622}
4623
4624SDValue DAGCombiner::visitSUBO(SDNode *N) {
4625 SDValue N0 = N->getOperand(0);
4626 SDValue N1 = N->getOperand(1);
4627 EVT VT = N0.getValueType();
4628 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4629
4630 EVT CarryVT = N->getValueType(1);
4631 SDLoc DL(N);
4632
4633 // If the flag result is dead, turn this into an SUB.
4634 if (!N->hasAnyUseOfValue(1))
4635 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4636 DAG.getUNDEF(CarryVT));
4637
4638 // fold (subo x, x) -> 0 + no borrow
4639 if (N0 == N1)
4640 return CombineTo(N, DAG.getConstant(0, DL, VT),
4641 DAG.getConstant(0, DL, CarryVT));
4642
4643 // fold (subox, c) -> (addo x, -c)
4644 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4645 if (IsSigned && !N1C->isMinSignedValue())
4646 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4647 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4648
4649 // fold (subo x, 0) -> x + no borrow
4650 if (isNullOrNullSplat(N1))
4651 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4652
4653 // If it cannot overflow, transform into an sub.
4654 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4655 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4656 DAG.getConstant(0, DL, CarryVT));
4657
4658 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4659 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4660 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4661 DAG.getConstant(0, DL, CarryVT));
4662
4663 return SDValue();
4664}
4665
4666SDValue DAGCombiner::visitSUBE(SDNode *N) {
4667 SDValue N0 = N->getOperand(0);
4668 SDValue N1 = N->getOperand(1);
4669 SDValue CarryIn = N->getOperand(2);
4670
4671 // fold (sube x, y, false) -> (subc x, y)
4672 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4673 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4674
4675 return SDValue();
4676}
4677
4678SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4679 SDValue N0 = N->getOperand(0);
4680 SDValue N1 = N->getOperand(1);
4681 SDValue CarryIn = N->getOperand(2);
4682
4683 // fold (usubo_carry x, y, false) -> (usubo x, y)
4684 if (isNullConstant(CarryIn)) {
4685 if (!LegalOperations ||
4686 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4687 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4688 }
4689
4690 return SDValue();
4691}
4692
4693SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4694 SDValue N0 = N->getOperand(0);
4695 SDValue N1 = N->getOperand(1);
4696 SDValue CarryIn = N->getOperand(2);
4697
4698 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4699 if (isNullConstant(CarryIn)) {
4700 if (!LegalOperations ||
4701 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4702 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4703 }
4704
4705 return SDValue();
4706}
4707
4708// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4709// UMULFIXSAT here.
4710SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4711 SDValue N0 = N->getOperand(0);
4712 SDValue N1 = N->getOperand(1);
4713 SDValue Scale = N->getOperand(2);
4714 EVT VT = N0.getValueType();
4715
4716 // fold (mulfix x, undef, scale) -> 0
4717 if (N0.isUndef() || N1.isUndef())
4718 return DAG.getConstant(0, SDLoc(N), VT);
4719
4720 // Canonicalize constant to RHS (vector doesn't have to splat)
4723 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4724
4725 // fold (mulfix x, 0, scale) -> 0
4726 if (isNullConstant(N1))
4727 return DAG.getConstant(0, SDLoc(N), VT);
4728
4729 return SDValue();
4730}
4731
4732template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4733 SDValue N0 = N->getOperand(0);
4734 SDValue N1 = N->getOperand(1);
4735 EVT VT = N0.getValueType();
4736 unsigned BitWidth = VT.getScalarSizeInBits();
4737 SDLoc DL(N);
4738 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4739 MatchContextClass Matcher(DAG, TLI, N);
4740
4741 // fold (mul x, undef) -> 0
4742 if (N0.isUndef() || N1.isUndef())
4743 return DAG.getConstant(0, DL, VT);
4744
4745 // fold (mul c1, c2) -> c1*c2
4746 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4747 return C;
4748
4749 // canonicalize constant to RHS (vector doesn't have to splat)
4752 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4753
4754 bool N1IsConst = false;
4755 bool N1IsOpaqueConst = false;
4756 APInt ConstValue1;
4757
4758 // fold vector ops
4759 if (VT.isVector()) {
4760 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4761 if (!UseVP)
4762 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4763 return FoldedVOp;
4764
4765 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4766 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4767 "Splat APInt should be element width");
4768 } else {
4769 N1IsConst = isa<ConstantSDNode>(N1);
4770 if (N1IsConst) {
4771 ConstValue1 = N1->getAsAPIntVal();
4772 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4773 }
4774 }
4775
4776 // fold (mul x, 0) -> 0
4777 if (N1IsConst && ConstValue1.isZero())
4778 return N1;
4779
4780 // fold (mul x, 1) -> x
4781 if (N1IsConst && ConstValue1.isOne())
4782 return N0;
4783
4784 if (!UseVP)
4785 if (SDValue NewSel = foldBinOpIntoSelect(N))
4786 return NewSel;
4787
4788 // fold (mul x, -1) -> 0-x
4789 if (N1IsConst && ConstValue1.isAllOnes())
4790 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4791
4792 // fold (mul x, (1 << c)) -> x << c
4793 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4794 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4795 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4796 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4797 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4798 SDNodeFlags Flags;
4799 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
4800 // TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
4801 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
4802 }
4803 }
4804
4805 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4806 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4807 unsigned Log2Val = (-ConstValue1).logBase2();
4808
4809 // FIXME: If the input is something that is easily negated (e.g. a
4810 // single-use add), we should put the negate there.
4811 return Matcher.getNode(
4812 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4813 Matcher.getNode(ISD::SHL, DL, VT, N0,
4814 DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4815 }
4816
4817 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4818 // hi result is in use in case we hit this mid-legalization.
4819 if (!UseVP) {
4820 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4821 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4822 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4823 // TODO: Can we match commutable operands with getNodeIfExists?
4824 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4825 if (LoHi->hasAnyUseOfValue(1))
4826 return SDValue(LoHi, 0);
4827 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4828 if (LoHi->hasAnyUseOfValue(1))
4829 return SDValue(LoHi, 0);
4830 }
4831 }
4832 }
4833
4834 // Try to transform:
4835 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4836 // mul x, (2^N + 1) --> add (shl x, N), x
4837 // mul x, (2^N - 1) --> sub (shl x, N), x
4838 // Examples: x * 33 --> (x << 5) + x
4839 // x * 15 --> (x << 4) - x
4840 // x * -33 --> -((x << 5) + x)
4841 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4842 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4843 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4844 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4845 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4846 // x * 0xf800 --> (x << 16) - (x << 11)
4847 // x * -0x8800 --> -((x << 15) + (x << 11))
4848 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4849 if (!UseVP && N1IsConst &&
4850 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4851 // TODO: We could handle more general decomposition of any constant by
4852 // having the target set a limit on number of ops and making a
4853 // callback to determine that sequence (similar to sqrt expansion).
4854 unsigned MathOp = ISD::DELETED_NODE;
4855 APInt MulC = ConstValue1.abs();
4856 // The constant `2` should be treated as (2^0 + 1).
4857 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4858 MulC.lshrInPlace(TZeros);
4859 if ((MulC - 1).isPowerOf2())
4860 MathOp = ISD::ADD;
4861 else if ((MulC + 1).isPowerOf2())
4862 MathOp = ISD::SUB;
4863
4864 if (MathOp != ISD::DELETED_NODE) {
4865 unsigned ShAmt =
4866 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4867 ShAmt += TZeros;
4868 assert(ShAmt < BitWidth &&
4869 "multiply-by-constant generated out of bounds shift");
4870 SDValue Shl =
4871 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4872 SDValue R =
4873 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4874 DAG.getNode(ISD::SHL, DL, VT, N0,
4875 DAG.getConstant(TZeros, DL, VT)))
4876 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4877 if (ConstValue1.isNegative())
4878 R = DAG.getNegative(R, DL, VT);
4879 return R;
4880 }
4881 }
4882
4883 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4884 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4885 SDValue N01 = N0.getOperand(1);
4886 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4887 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4888 }
4889
4890 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4891 // use.
4892 {
4893 SDValue Sh, Y;
4894
4895 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4896 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4898 Sh = N0; Y = N1;
4899 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4901 Sh = N1; Y = N0;
4902 }
4903
4904 if (Sh.getNode()) {
4905 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4906 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4907 }
4908 }
4909
4910 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4911 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4915 return Matcher.getNode(
4916 ISD::ADD, DL, VT,
4917 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4918 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4919
4920 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4921 ConstantSDNode *NC1 = isConstOrConstSplat(N1);
4922 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4923 const APInt &C0 = N0.getConstantOperandAPInt(0);
4924 const APInt &C1 = NC1->getAPIntValue();
4925 return DAG.getVScale(DL, VT, C0 * C1);
4926 }
4927
4928 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4929 APInt MulVal;
4930 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4931 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4932 const APInt &C0 = N0.getConstantOperandAPInt(0);
4933 APInt NewStep = C0 * MulVal;
4934 return DAG.getStepVector(DL, VT, NewStep);
4935 }
4936
4937 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4938 SDValue X;
4939 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4941 N, Matcher,
4943 m_Deferred(X)))) {
4944 return Matcher.getNode(ISD::ABS, DL, VT, X);
4945 }
4946
4947 // Fold ((mul x, 0/undef) -> 0,
4948 // (mul x, 1) -> x) -> x)
4949 // -> and(x, mask)
4950 // We can replace vectors with '0' and '1' factors with a clearing mask.
4951 if (VT.isFixedLengthVector()) {
4952 unsigned NumElts = VT.getVectorNumElements();
4953 SmallBitVector ClearMask;
4954 ClearMask.reserve(NumElts);
4955 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4956 if (!V || V->isZero()) {
4957 ClearMask.push_back(true);
4958 return true;
4959 }
4960 ClearMask.push_back(false);
4961 return V->isOne();
4962 };
4963 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4964 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4965 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4966 EVT LegalSVT = N1.getOperand(0).getValueType();
4967 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4968 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4970 for (unsigned I = 0; I != NumElts; ++I)
4971 if (ClearMask[I])
4972 Mask[I] = Zero;
4973 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4974 }
4975 }
4976
4977 // reassociate mul
4978 // TODO: Change reassociateOps to support vp ops.
4979 if (!UseVP)
4980 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4981 return RMUL;
4982
4983 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4984 // TODO: Change reassociateReduction to support vp ops.
4985 if (!UseVP)
4986 if (SDValue SD =
4987 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4988 return SD;
4989
4990 // Simplify the operands using demanded-bits information.
4992 return SDValue(N, 0);
4993
4994 return SDValue();
4995}
4996
4997/// Return true if divmod libcall is available.
4998static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4999 const TargetLowering &TLI) {
5000 RTLIB::Libcall LC;
5001 EVT NodeType = Node->getValueType(0);
5002 if (!NodeType.isSimple())
5003 return false;
5004 switch (NodeType.getSimpleVT().SimpleTy) {
5005 default: return false; // No libcall for vector types.
5006 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
5007 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
5008 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
5009 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
5010 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
5011 }
5012
5013 return TLI.getLibcallImpl(LC) != RTLIB::Unsupported;
5014}
5015
5016/// Issue divrem if both quotient and remainder are needed.
5017SDValue DAGCombiner::useDivRem(SDNode *Node) {
5018 if (Node->use_empty())
5019 return SDValue(); // This is a dead node, leave it alone.
5020
5021 unsigned Opcode = Node->getOpcode();
5022 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
5023 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
5024
5025 // DivMod lib calls can still work on non-legal types if using lib-calls.
5026 EVT VT = Node->getValueType(0);
5027 if (VT.isVector() || !VT.isInteger())
5028 return SDValue();
5029
5030 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
5031 return SDValue();
5032
5033 // If DIVREM is going to get expanded into a libcall,
5034 // but there is no libcall available, then don't combine.
5035 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
5036 !isDivRemLibcallAvailable(Node, isSigned, TLI))
5037 return SDValue();
5038
5039 // If div is legal, it's better to do the normal expansion
5040 unsigned OtherOpcode = 0;
5041 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
5042 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
5043 if (TLI.isOperationLegalOrCustom(Opcode, VT))
5044 return SDValue();
5045 } else {
5046 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5047 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
5048 return SDValue();
5049 }
5050
5051 SDValue Op0 = Node->getOperand(0);
5052 SDValue Op1 = Node->getOperand(1);
5053 SDValue combined;
5054 for (SDNode *User : Op0->users()) {
5055 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
5056 User->use_empty())
5057 continue;
5058 // Convert the other matching node(s), too;
5059 // otherwise, the DIVREM may get target-legalized into something
5060 // target-specific that we won't be able to recognize.
5061 unsigned UserOpc = User->getOpcode();
5062 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
5063 User->getOperand(0) == Op0 &&
5064 User->getOperand(1) == Op1) {
5065 if (!combined) {
5066 if (UserOpc == OtherOpcode) {
5067 SDVTList VTs = DAG.getVTList(VT, VT);
5068 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
5069 } else if (UserOpc == DivRemOpc) {
5070 combined = SDValue(User, 0);
5071 } else {
5072 assert(UserOpc == Opcode);
5073 continue;
5074 }
5075 }
5076 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
5077 CombineTo(User, combined);
5078 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
5079 CombineTo(User, combined.getValue(1));
5080 }
5081 }
5082 return combined;
5083}
5084
5086 SDValue N0 = N->getOperand(0);
5087 SDValue N1 = N->getOperand(1);
5088 EVT VT = N->getValueType(0);
5089 SDLoc DL(N);
5090
5091 unsigned Opc = N->getOpcode();
5092 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
5093
5094 // X / undef -> undef
5095 // X % undef -> undef
5096 // X / 0 -> undef
5097 // X % 0 -> undef
5098 // NOTE: This includes vectors where any divisor element is zero/undef.
5099 if (DAG.isUndef(Opc, {N0, N1}))
5100 return DAG.getUNDEF(VT);
5101
5102 // undef / X -> 0
5103 // undef % X -> 0
5104 if (N0.isUndef())
5105 return DAG.getConstant(0, DL, VT);
5106
5107 // 0 / X -> 0
5108 // 0 % X -> 0
5110 if (N0C && N0C->isZero())
5111 return N0;
5112
5113 // X / X -> 1
5114 // X % X -> 0
5115 if (N0 == N1)
5116 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
5117
5118 // X / 1 -> X
5119 // X % 1 -> 0
5120 // If this is a boolean op (single-bit element type), we can't have
5121 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
5122 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
5123 // it's a 1.
5124 if (isOneOrOneSplat(N1) || (VT.getScalarType() == MVT::i1))
5125 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
5126
5127 return SDValue();
5128}
5129
5130SDValue DAGCombiner::visitSDIV(SDNode *N) {
5131 SDValue N0 = N->getOperand(0);
5132 SDValue N1 = N->getOperand(1);
5133 EVT VT = N->getValueType(0);
5134 EVT CCVT = getSetCCResultType(VT);
5135 SDLoc DL(N);
5136
5137 // fold (sdiv c1, c2) -> c1/c2
5138 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
5139 return C;
5140
5141 // fold vector ops
5142 if (VT.isVector())
5143 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5144 return FoldedVOp;
5145
5146 // fold (sdiv X, -1) -> 0-X
5147 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5148 if (N1C && N1C->isAllOnes())
5149 return DAG.getNegative(N0, DL, VT);
5150
5151 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
5152 if (N1C && N1C->isMinSignedValue())
5153 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5154 DAG.getConstant(1, DL, VT),
5155 DAG.getConstant(0, DL, VT));
5156
5157 if (SDValue V = simplifyDivRem(N, DAG))
5158 return V;
5159
5160 if (SDValue NewSel = foldBinOpIntoSelect(N))
5161 return NewSel;
5162
5163 // If we know the sign bits of both operands are zero, strength reduce to a
5164 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
5165 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5166 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
5167
5168 if (SDValue V = visitSDIVLike(N0, N1, N)) {
5169 // If the corresponding remainder node exists, update its users with
5170 // (Dividend - (Quotient * Divisor).
5171 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
5172 { N0, N1 })) {
5173 // If the sdiv has the exact flag we shouldn't propagate it to the
5174 // remainder node.
5175 if (!N->getFlags().hasExact()) {
5176 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5177 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5178 AddToWorklist(Mul.getNode());
5179 AddToWorklist(Sub.getNode());
5180 CombineTo(RemNode, Sub);
5181 }
5182 }
5183 return V;
5184 }
5185
5186 // sdiv, srem -> sdivrem
5187 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5188 // true. Otherwise, we break the simplification logic in visitREM().
5189 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5190 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5191 if (SDValue DivRem = useDivRem(N))
5192 return DivRem;
5193
5194 return SDValue();
5195}
5196
5197static bool isDivisorPowerOfTwo(SDValue Divisor) {
5198 // Helper for determining whether a value is a power-2 constant scalar or a
5199 // vector of such elements.
5200 auto IsPowerOfTwo = [](ConstantSDNode *C) {
5201 if (C->isZero() || C->isOpaque())
5202 return false;
5203 if (C->getAPIntValue().isPowerOf2())
5204 return true;
5205 if (C->getAPIntValue().isNegatedPowerOf2())
5206 return true;
5207 return false;
5208 };
5209
5210 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo, /*AllowUndefs=*/false,
5211 /*AllowTruncation=*/true);
5212}
5213
5214SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5215 SDLoc DL(N);
5216 EVT VT = N->getValueType(0);
5217 EVT CCVT = getSetCCResultType(VT);
5218 unsigned BitWidth = VT.getScalarSizeInBits();
5219
5220 // fold (sdiv X, pow2) -> simple ops after legalize
5221 // FIXME: We check for the exact bit here because the generic lowering gives
5222 // better results in that case. The target-specific lowering should learn how
5223 // to handle exact sdivs efficiently.
5224 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
5225 // Target-specific implementation of sdiv x, pow2.
5226 if (SDValue Res = BuildSDIVPow2(N))
5227 return Res;
5228
5229 // Create constants that are functions of the shift amount value.
5230 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
5231 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
5232 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
5233 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
5234 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
5235 if (!isConstantOrConstantVector(Inexact))
5236 return SDValue();
5237
5238 // Splat the sign bit into the register
5239 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
5240 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
5241 AddToWorklist(Sign.getNode());
5242
5243 // Add (N0 < 0) ? abs2 - 1 : 0;
5244 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
5245 AddToWorklist(Srl.getNode());
5246 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
5247 AddToWorklist(Add.getNode());
5248 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
5249 AddToWorklist(Sra.getNode());
5250
5251 // Special case: (sdiv X, 1) -> X
5252 // Special Case: (sdiv X, -1) -> 0-X
5253 SDValue One = DAG.getConstant(1, DL, VT);
5255 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
5256 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
5257 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
5258 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
5259
5260 // If dividing by a positive value, we're done. Otherwise, the result must
5261 // be negated.
5262 SDValue Zero = DAG.getConstant(0, DL, VT);
5263 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
5264
5265 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
5266 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
5267 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
5268 return Res;
5269 }
5270
5271 // If integer divide is expensive and we satisfy the requirements, emit an
5272 // alternate sequence. Targets may check function attributes for size/speed
5273 // trade-offs.
5274 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5275 if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
5276 /*AllowTruncation=*/true) &&
5277 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5278 if (SDValue Op = BuildSDIV(N))
5279 return Op;
5280
5281 return SDValue();
5282}
5283
5284SDValue DAGCombiner::visitUDIV(SDNode *N) {
5285 SDValue N0 = N->getOperand(0);
5286 SDValue N1 = N->getOperand(1);
5287 EVT VT = N->getValueType(0);
5288 EVT CCVT = getSetCCResultType(VT);
5289 SDLoc DL(N);
5290
5291 // fold (udiv c1, c2) -> c1/c2
5292 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
5293 return C;
5294
5295 // fold vector ops
5296 if (VT.isVector())
5297 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5298 return FoldedVOp;
5299
5300 // fold (udiv X, -1) -> select(X == -1, 1, 0)
5301 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5302 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
5303 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5304 DAG.getConstant(1, DL, VT),
5305 DAG.getConstant(0, DL, VT));
5306 }
5307
5308 if (SDValue V = simplifyDivRem(N, DAG))
5309 return V;
5310
5311 if (SDValue NewSel = foldBinOpIntoSelect(N))
5312 return NewSel;
5313
5314 if (SDValue V = visitUDIVLike(N0, N1, N)) {
5315 // If the corresponding remainder node exists, update its users with
5316 // (Dividend - (Quotient * Divisor).
5317 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
5318 { N0, N1 })) {
5319 // If the udiv has the exact flag we shouldn't propagate it to the
5320 // remainder node.
5321 if (!N->getFlags().hasExact()) {
5322 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5323 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5324 AddToWorklist(Mul.getNode());
5325 AddToWorklist(Sub.getNode());
5326 CombineTo(RemNode, Sub);
5327 }
5328 }
5329 return V;
5330 }
5331
5332 // sdiv, srem -> sdivrem
5333 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5334 // true. Otherwise, we break the simplification logic in visitREM().
5335 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5336 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5337 if (SDValue DivRem = useDivRem(N))
5338 return DivRem;
5339
5340 // Simplify the operands using demanded-bits information.
5341 // We don't have demanded bits support for UDIV so this just enables constant
5342 // folding based on known bits.
5344 return SDValue(N, 0);
5345
5346 return SDValue();
5347}
5348
5349SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5350 SDLoc DL(N);
5351 EVT VT = N->getValueType(0);
5352
5353 // fold (udiv x, (1 << c)) -> x >>u c
5354 if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
5355 /*AllowTruncation=*/true)) {
5356 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5357 AddToWorklist(LogBase2.getNode());
5358
5359 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5360 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
5361 AddToWorklist(Trunc.getNode());
5362 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5363 }
5364 }
5365
5366 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5367 if (N1.getOpcode() == ISD::SHL) {
5368 SDValue N10 = N1.getOperand(0);
5369 if (isConstantOrConstantVector(N10, /*NoOpaques=*/true,
5370 /*AllowTruncation=*/true)) {
5371 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
5372 AddToWorklist(LogBase2.getNode());
5373
5374 EVT ADDVT = N1.getOperand(1).getValueType();
5375 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
5376 AddToWorklist(Trunc.getNode());
5377 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
5378 AddToWorklist(Add.getNode());
5379 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
5380 }
5381 }
5382 }
5383
5384 // fold (udiv x, c) -> alternate
5385 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5386 if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
5387 /*AllowTruncation=*/true) &&
5388 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5389 if (SDValue Op = BuildUDIV(N))
5390 return Op;
5391
5392 return SDValue();
5393}
5394
5395SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
5396 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5397 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5398 // Target-specific implementation of srem x, pow2.
5399 if (SDValue Res = BuildSREMPow2(N))
5400 return Res;
5401 }
5402 return SDValue();
5403}
5404
5405// handles ISD::SREM and ISD::UREM
5406SDValue DAGCombiner::visitREM(SDNode *N) {
5407 unsigned Opcode = N->getOpcode();
5408 SDValue N0 = N->getOperand(0);
5409 SDValue N1 = N->getOperand(1);
5410 EVT VT = N->getValueType(0);
5411 EVT CCVT = getSetCCResultType(VT);
5412
5413 bool isSigned = (Opcode == ISD::SREM);
5414 SDLoc DL(N);
5415
5416 // fold (rem c1, c2) -> c1%c2
5417 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5418 return C;
5419
5420 // fold (urem X, -1) -> select(FX == -1, 0, FX)
5421 // Freeze the numerator to avoid a miscompile with an undefined value.
5422 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5423 CCVT.isVector() == VT.isVector()) {
5424 SDValue F0 = DAG.getFreeze(N0);
5425 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5426 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5427 }
5428
5429 if (SDValue V = simplifyDivRem(N, DAG))
5430 return V;
5431
5432 if (SDValue NewSel = foldBinOpIntoSelect(N))
5433 return NewSel;
5434
5435 if (isSigned) {
5436 // If we know the sign bits of both operands are zero, strength reduce to a
5437 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5438 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5439 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5440 } else {
5441 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5442 // fold (urem x, pow2) -> (and x, pow2-1)
5443 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5444 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5445 AddToWorklist(Add.getNode());
5446 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5447 }
5448 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5449 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5450 // TODO: We should sink the following into isKnownToBePowerOfTwo
5451 // using a OrZero parameter analogous to our handling in ValueTracking.
5452 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5454 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5455 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5456 AddToWorklist(Add.getNode());
5457 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5458 }
5459 }
5460
5461 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5462
5463 // If X/C can be simplified by the division-by-constant logic, lower
5464 // X%C to the equivalent of X-X/C*C.
5465 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5466 // speculative DIV must not cause a DIVREM conversion. We guard against this
5467 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5468 // combine will not return a DIVREM. Regardless, checking cheapness here
5469 // makes sense since the simplification results in fatter code.
5470 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5471 if (isSigned) {
5472 // check if we can build faster implementation for srem
5473 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5474 return OptimizedRem;
5475 }
5476
5477 SDValue OptimizedDiv =
5478 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5479 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5480 // If the equivalent Div node also exists, update its users.
5481 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5482 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5483 { N0, N1 }))
5484 CombineTo(DivNode, OptimizedDiv);
5485 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5486 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5487 AddToWorklist(OptimizedDiv.getNode());
5488 AddToWorklist(Mul.getNode());
5489 return Sub;
5490 }
5491 }
5492
5493 // sdiv, srem -> sdivrem
5494 if (SDValue DivRem = useDivRem(N))
5495 return DivRem.getValue(1);
5496
5497 // fold urem(urem(A, BCst), Op1Cst) -> urem(A, Op1Cst)
5498 // iff urem(BCst, Op1Cst) == 0
5499 SDValue A;
5500 APInt Op1Cst, BCst;
5501 if (sd_match(N, m_URem(m_URem(m_Value(A), m_ConstInt(BCst)),
5502 m_ConstInt(Op1Cst))) &&
5503 BCst.urem(Op1Cst).isZero()) {
5504 return DAG.getNode(ISD::UREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
5505 }
5506
5507 // fold srem(srem(A, BCst), Op1Cst) -> srem(A, Op1Cst)
5508 // iff srem(BCst, Op1Cst) == 0 && Op1Cst != 1
5509 if (sd_match(N, m_SRem(m_SRem(m_Value(A), m_ConstInt(BCst)),
5510 m_ConstInt(Op1Cst))) &&
5511 BCst.srem(Op1Cst).isZero() && !Op1Cst.isAllOnes()) {
5512 return DAG.getNode(ISD::SREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
5513 }
5514
5515 return SDValue();
5516}
5517
5518SDValue DAGCombiner::visitMULHS(SDNode *N) {
5519 SDValue N0 = N->getOperand(0);
5520 SDValue N1 = N->getOperand(1);
5521 EVT VT = N->getValueType(0);
5522 SDLoc DL(N);
5523
5524 // fold (mulhs c1, c2)
5525 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5526 return C;
5527
5528 // canonicalize constant to RHS.
5531 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5532
5533 if (VT.isVector()) {
5534 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5535 return FoldedVOp;
5536
5537 // fold (mulhs x, 0) -> 0
5538 // do not return N1, because undef node may exist.
5540 return DAG.getConstant(0, DL, VT);
5541 }
5542
5543 // fold (mulhs x, 0) -> 0
5544 if (isNullConstant(N1))
5545 return N1;
5546
5547 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5548 if (isOneConstant(N1))
5549 return DAG.getNode(
5550 ISD::SRA, DL, VT, N0,
5552
5553 // fold (mulhs x, undef) -> 0
5554 if (N0.isUndef() || N1.isUndef())
5555 return DAG.getConstant(0, DL, VT);
5556
5557 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5558 // plus a shift.
5559 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5560 !VT.isVector()) {
5561 MVT Simple = VT.getSimpleVT();
5562 unsigned SimpleSize = Simple.getSizeInBits();
5563 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5564 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5565 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5566 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5567 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5568 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5569 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5570 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5571 }
5572 }
5573
5574 return SDValue();
5575}
5576
5577SDValue DAGCombiner::visitMULHU(SDNode *N) {
5578 SDValue N0 = N->getOperand(0);
5579 SDValue N1 = N->getOperand(1);
5580 EVT VT = N->getValueType(0);
5581 SDLoc DL(N);
5582
5583 // fold (mulhu c1, c2)
5584 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5585 return C;
5586
5587 // canonicalize constant to RHS.
5590 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5591
5592 if (VT.isVector()) {
5593 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5594 return FoldedVOp;
5595
5596 // fold (mulhu x, 0) -> 0
5597 // do not return N1, because undef node may exist.
5599 return DAG.getConstant(0, DL, VT);
5600 }
5601
5602 // fold (mulhu x, 0) -> 0
5603 if (isNullConstant(N1))
5604 return N1;
5605
5606 // fold (mulhu x, 1) -> 0
5607 if (isOneConstant(N1))
5608 return DAG.getConstant(0, DL, VT);
5609
5610 // fold (mulhu x, undef) -> 0
5611 if (N0.isUndef() || N1.isUndef())
5612 return DAG.getConstant(0, DL, VT);
5613
5614 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5615 if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
5616 /*AllowTruncation=*/true) &&
5617 hasOperation(ISD::SRL, VT)) {
5618 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5619 unsigned NumEltBits = VT.getScalarSizeInBits();
5620 SDValue SRLAmt = DAG.getNode(
5621 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5622 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5623 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5624 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5625 }
5626 }
5627
5628 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5629 // plus a shift.
5630 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5631 !VT.isVector()) {
5632 MVT Simple = VT.getSimpleVT();
5633 unsigned SimpleSize = Simple.getSizeInBits();
5634 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5635 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5636 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5637 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5638 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5639 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5640 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5641 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5642 }
5643 }
5644
5645 // Simplify the operands using demanded-bits information.
5646 // We don't have demanded bits support for MULHU so this just enables constant
5647 // folding based on known bits.
5649 return SDValue(N, 0);
5650
5651 return SDValue();
5652}
5653
5654SDValue DAGCombiner::visitAVG(SDNode *N) {
5655 unsigned Opcode = N->getOpcode();
5656 SDValue N0 = N->getOperand(0);
5657 SDValue N1 = N->getOperand(1);
5658 EVT VT = N->getValueType(0);
5659 SDLoc DL(N);
5660 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5661
5662 // fold (avg c1, c2)
5663 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5664 return C;
5665
5666 // canonicalize constant to RHS.
5669 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5670
5671 if (VT.isVector())
5672 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5673 return FoldedVOp;
5674
5675 // fold (avg x, undef) -> x
5676 if (N0.isUndef())
5677 return N1;
5678 if (N1.isUndef())
5679 return N0;
5680
5681 // fold (avg x, x) --> x
5682 if (N0 == N1 && Level >= AfterLegalizeTypes)
5683 return N0;
5684
5685 // fold (avgfloor x, 0) -> x >> 1
5686 SDValue X, Y;
5688 return DAG.getNode(ISD::SRA, DL, VT, X,
5689 DAG.getShiftAmountConstant(1, VT, DL));
5691 return DAG.getNode(ISD::SRL, DL, VT, X,
5692 DAG.getShiftAmountConstant(1, VT, DL));
5693
5694 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5695 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5696 if (!IsSigned &&
5697 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5698 X.getValueType() == Y.getValueType() &&
5699 hasOperation(Opcode, X.getValueType())) {
5700 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5701 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5702 }
5703 if (IsSigned &&
5704 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5705 X.getValueType() == Y.getValueType() &&
5706 hasOperation(Opcode, X.getValueType())) {
5707 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5708 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5709 }
5710
5711 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5712 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5713 // Check if avgflooru isn't legal/custom but avgceilu is.
5714 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5715 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5716 if (DAG.isKnownNeverZero(N1))
5717 return DAG.getNode(
5718 ISD::AVGCEILU, DL, VT, N0,
5719 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5720 if (DAG.isKnownNeverZero(N0))
5721 return DAG.getNode(
5722 ISD::AVGCEILU, DL, VT, N1,
5723 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5724 }
5725
5726 // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5727 // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5728 if ((Opcode == ISD::AVGFLOORU && hasOperation(ISD::AVGCEILU, VT)) ||
5729 (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGCEILS, VT))) {
5730 SDValue Add;
5731 if (sd_match(N,
5732 m_c_BinOp(Opcode,
5734 m_One())) ||
5735 sd_match(N, m_c_BinOp(Opcode,
5737 m_Value(Y)))) {
5738
5739 if (IsSigned && Add->getFlags().hasNoSignedWrap())
5740 return DAG.getNode(ISD::AVGCEILS, DL, VT, X, Y);
5741
5742 if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5743 return DAG.getNode(ISD::AVGCEILU, DL, VT, X, Y);
5744 }
5745 }
5746
5747 // Fold avgfloors(x,y) -> avgflooru(x,y) if both x and y are non-negative
5748 if (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGFLOORU, VT)) {
5749 if (DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5750 return DAG.getNode(ISD::AVGFLOORU, DL, VT, N0, N1);
5751 }
5752
5753 return SDValue();
5754}
5755
5756SDValue DAGCombiner::visitABD(SDNode *N) {
5757 unsigned Opcode = N->getOpcode();
5758 SDValue N0 = N->getOperand(0);
5759 SDValue N1 = N->getOperand(1);
5760 EVT VT = N->getValueType(0);
5761 SDLoc DL(N);
5762
5763 // fold (abd c1, c2)
5764 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5765 return C;
5766
5767 // canonicalize constant to RHS.
5770 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5771
5772 if (VT.isVector())
5773 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5774 return FoldedVOp;
5775
5776 // fold (abd x, undef) -> 0
5777 if (N0.isUndef() || N1.isUndef())
5778 return DAG.getConstant(0, DL, VT);
5779
5780 // fold (abd x, x) -> 0
5781 if (N0 == N1)
5782 return DAG.getConstant(0, DL, VT);
5783
5784 SDValue X, Y;
5785
5786 // fold (abds x, 0) -> abs x
5788 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5789 return DAG.getNode(ISD::ABS, DL, VT, X);
5790
5791 // fold (abdu x, 0) -> x
5793 return X;
5794
5795 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5796 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5797 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5798 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5799
5800 // fold (abd? (?ext x), (?ext y)) -> (zext (abd? x, y))
5803 EVT SmallVT = X.getScalarValueSizeInBits() > Y.getScalarValueSizeInBits()
5804 ? X.getValueType()
5805 : Y.getValueType();
5806 if (!LegalOperations || hasOperation(Opcode, SmallVT)) {
5807 SDValue ExtedX = DAG.getExtOrTrunc(X, SDLoc(X), SmallVT, N0->getOpcode());
5808 SDValue ExtedY = DAG.getExtOrTrunc(Y, SDLoc(Y), SmallVT, N0->getOpcode());
5809 SDValue SmallABD = DAG.getNode(Opcode, DL, SmallVT, {ExtedX, ExtedY});
5810 SDValue ZExted = DAG.getZExtOrTrunc(SmallABD, DL, VT);
5811 return ZExted;
5812 }
5813 }
5814
5815 return SDValue();
5816}
5817
5818/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5819/// give the opcodes for the two computations that are being performed. Return
5820/// true if a simplification was made.
5821SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5822 unsigned HiOp) {
5823 // If the high half is not needed, just compute the low half.
5824 bool HiExists = N->hasAnyUseOfValue(1);
5825 if (!HiExists && (!LegalOperations ||
5826 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5827 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5828 return CombineTo(N, Res, Res);
5829 }
5830
5831 // If the low half is not needed, just compute the high half.
5832 bool LoExists = N->hasAnyUseOfValue(0);
5833 if (!LoExists && (!LegalOperations ||
5834 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5835 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5836 return CombineTo(N, Res, Res);
5837 }
5838
5839 // If both halves are used, return as it is.
5840 if (LoExists && HiExists)
5841 return SDValue();
5842
5843 // If the two computed results can be simplified separately, separate them.
5844 if (LoExists) {
5845 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5846 AddToWorklist(Lo.getNode());
5847 SDValue LoOpt = combine(Lo.getNode());
5848 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5849 (!LegalOperations ||
5850 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5851 return CombineTo(N, LoOpt, LoOpt);
5852 }
5853
5854 if (HiExists) {
5855 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5856 AddToWorklist(Hi.getNode());
5857 SDValue HiOpt = combine(Hi.getNode());
5858 if (HiOpt.getNode() && HiOpt != Hi &&
5859 (!LegalOperations ||
5860 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5861 return CombineTo(N, HiOpt, HiOpt);
5862 }
5863
5864 return SDValue();
5865}
5866
5867SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5868 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5869 return Res;
5870
5871 SDValue N0 = N->getOperand(0);
5872 SDValue N1 = N->getOperand(1);
5873 EVT VT = N->getValueType(0);
5874 SDLoc DL(N);
5875
5876 // Constant fold.
5878 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5879
5880 // canonicalize constant to RHS (vector doesn't have to splat)
5883 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5884
5885 // If the type is twice as wide is legal, transform the mulhu to a wider
5886 // multiply plus a shift.
5887 if (VT.isSimple() && !VT.isVector()) {
5888 MVT Simple = VT.getSimpleVT();
5889 unsigned SimpleSize = Simple.getSizeInBits();
5890 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5891 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5892 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5893 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5894 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5895 // Compute the high part as N1.
5896 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5897 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5898 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5899 // Compute the low part as N0.
5900 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5901 return CombineTo(N, Lo, Hi);
5902 }
5903 }
5904
5905 return SDValue();
5906}
5907
5908SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5909 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5910 return Res;
5911
5912 SDValue N0 = N->getOperand(0);
5913 SDValue N1 = N->getOperand(1);
5914 EVT VT = N->getValueType(0);
5915 SDLoc DL(N);
5916
5917 // Constant fold.
5919 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5920
5921 // canonicalize constant to RHS (vector doesn't have to splat)
5924 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5925
5926 // (umul_lohi N0, 0) -> (0, 0)
5927 if (isNullConstant(N1)) {
5928 SDValue Zero = DAG.getConstant(0, DL, VT);
5929 return CombineTo(N, Zero, Zero);
5930 }
5931
5932 // (umul_lohi N0, 1) -> (N0, 0)
5933 if (isOneConstant(N1)) {
5934 SDValue Zero = DAG.getConstant(0, DL, VT);
5935 return CombineTo(N, N0, Zero);
5936 }
5937
5938 // If the type is twice as wide is legal, transform the mulhu to a wider
5939 // multiply plus a shift.
5940 if (VT.isSimple() && !VT.isVector()) {
5941 MVT Simple = VT.getSimpleVT();
5942 unsigned SimpleSize = Simple.getSizeInBits();
5943 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5944 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5945 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5946 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5947 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5948 // Compute the high part as N1.
5949 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5950 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5951 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5952 // Compute the low part as N0.
5953 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5954 return CombineTo(N, Lo, Hi);
5955 }
5956 }
5957
5958 return SDValue();
5959}
5960
5961SDValue DAGCombiner::visitMULO(SDNode *N) {
5962 SDValue N0 = N->getOperand(0);
5963 SDValue N1 = N->getOperand(1);
5964 EVT VT = N0.getValueType();
5965 bool IsSigned = (ISD::SMULO == N->getOpcode());
5966
5967 EVT CarryVT = N->getValueType(1);
5968 SDLoc DL(N);
5969
5970 ConstantSDNode *N0C = isConstOrConstSplat(N0);
5971 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5972
5973 // fold operation with constant operands.
5974 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5975 // multiple results.
5976 if (N0C && N1C) {
5977 bool Overflow;
5978 APInt Result =
5979 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5980 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5981 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5982 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5983 }
5984
5985 // canonicalize constant to RHS.
5988 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5989
5990 // fold (mulo x, 0) -> 0 + no carry out
5991 if (isNullOrNullSplat(N1))
5992 return CombineTo(N, DAG.getConstant(0, DL, VT),
5993 DAG.getConstant(0, DL, CarryVT));
5994
5995 // (mulo x, 2) -> (addo x, x)
5996 // FIXME: This needs a freeze.
5997 if (N1C && N1C->getAPIntValue() == 2 &&
5998 (!IsSigned || VT.getScalarSizeInBits() > 2))
5999 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
6000 N->getVTList(), N0, N0);
6001
6002 // A 1 bit SMULO overflows if both inputs are 1.
6003 if (IsSigned && VT.getScalarSizeInBits() == 1) {
6004 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
6005 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
6006 DAG.getConstant(0, DL, VT), ISD::SETNE);
6007 return CombineTo(N, And, Cmp);
6008 }
6009
6010 // If it cannot overflow, transform into a mul.
6011 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
6012 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
6013 DAG.getConstant(0, DL, CarryVT));
6014 return SDValue();
6015}
6016
6017// Function to calculate whether the Min/Max pair of SDNodes (potentially
6018// swapped around) make a signed saturate pattern, clamping to between a signed
6019// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
6020// Returns the node being clamped and the bitwidth of the clamp in BW. Should
6021// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
6022// same as SimplifySelectCC. N0<N1 ? N2 : N3.
6024 SDValue N3, ISD::CondCode CC, unsigned &BW,
6025 bool &Unsigned, SelectionDAG &DAG) {
6026 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
6027 ISD::CondCode CC) {
6028 // The compare and select operand should be the same or the select operands
6029 // should be truncated versions of the comparison.
6030 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
6031 return 0;
6032 // The constants need to be the same or a truncated version of each other.
6035 if (!N1C || !N3C)
6036 return 0;
6037 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
6038 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
6039 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
6040 return 0;
6041 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
6042 };
6043
6044 // Check the initial value is a SMIN/SMAX equivalent.
6045 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
6046 if (!Opcode0)
6047 return SDValue();
6048
6049 // We could only need one range check, if the fptosi could never produce
6050 // the upper value.
6051 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
6052 if (isNullOrNullSplat(N3)) {
6053 EVT IntVT = N0.getValueType().getScalarType();
6054 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
6055 if (FPVT.isSimple()) {
6056 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
6057 const fltSemantics &Semantics = InputTy->getFltSemantics();
6058 uint32_t MinBitWidth =
6059 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
6060 if (IntVT.getSizeInBits() >= MinBitWidth) {
6061 Unsigned = true;
6062 BW = PowerOf2Ceil(MinBitWidth);
6063 return N0;
6064 }
6065 }
6066 }
6067 }
6068
6069 SDValue N00, N01, N02, N03;
6070 ISD::CondCode N0CC;
6071 switch (N0.getOpcode()) {
6072 case ISD::SMIN:
6073 case ISD::SMAX:
6074 N00 = N02 = N0.getOperand(0);
6075 N01 = N03 = N0.getOperand(1);
6076 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
6077 break;
6078 case ISD::SELECT_CC:
6079 N00 = N0.getOperand(0);
6080 N01 = N0.getOperand(1);
6081 N02 = N0.getOperand(2);
6082 N03 = N0.getOperand(3);
6083 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
6084 break;
6085 case ISD::SELECT:
6086 case ISD::VSELECT:
6087 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
6088 return SDValue();
6089 N00 = N0.getOperand(0).getOperand(0);
6090 N01 = N0.getOperand(0).getOperand(1);
6091 N02 = N0.getOperand(1);
6092 N03 = N0.getOperand(2);
6093 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
6094 break;
6095 default:
6096 return SDValue();
6097 }
6098
6099 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
6100 if (!Opcode1 || Opcode0 == Opcode1)
6101 return SDValue();
6102
6103 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
6104 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
6105 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
6106 return SDValue();
6107
6108 const APInt &MinC = MinCOp->getAPIntValue();
6109 const APInt &MaxC = MaxCOp->getAPIntValue();
6110 APInt MinCPlus1 = MinC + 1;
6111 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
6112 BW = MinCPlus1.exactLogBase2() + 1;
6113 Unsigned = false;
6114 return N02;
6115 }
6116
6117 if (MaxC == 0 && MinC != 0 && MinCPlus1.isPowerOf2()) {
6118 BW = MinCPlus1.exactLogBase2();
6119 Unsigned = true;
6120 return N02;
6121 }
6122
6123 return SDValue();
6124}
6125
6127 SDValue N3, ISD::CondCode CC,
6128 SelectionDAG &DAG) {
6129 unsigned BW;
6130 bool Unsigned;
6131 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
6132 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
6133 return SDValue();
6134 EVT FPVT = Fp.getOperand(0).getValueType();
6135 EVT NewVT = FPVT.changeElementType(*DAG.getContext(),
6136 EVT::getIntegerVT(*DAG.getContext(), BW));
6137 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
6138 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
6139 return SDValue();
6140 SDLoc DL(Fp);
6141 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
6142 DAG.getValueType(NewVT.getScalarType()));
6143 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
6144}
6145
6147 SDValue N3, ISD::CondCode CC,
6148 SelectionDAG &DAG) {
6149 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
6150 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
6151 // be truncated versions of the setcc (N0/N1).
6152 if ((N0 != N2 &&
6153 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
6154 N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
6155 return SDValue();
6158 if (!N1C || !N3C)
6159 return SDValue();
6160 const APInt &C1 = N1C->getAPIntValue();
6161 const APInt &C3 = N3C->getAPIntValue();
6162 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
6163 C1 != C3.zext(C1.getBitWidth()))
6164 return SDValue();
6165
6166 unsigned BW = (C1 + 1).exactLogBase2();
6167 EVT FPVT = N0.getOperand(0).getValueType();
6168 EVT NewVT = FPVT.changeElementType(*DAG.getContext(),
6169 EVT::getIntegerVT(*DAG.getContext(), BW));
6171 FPVT, NewVT))
6172 return SDValue();
6173
6174 SDValue Sat =
6175 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
6176 DAG.getValueType(NewVT.getScalarType()));
6177 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
6178}
6179
6180SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
6181 SDValue N0 = N->getOperand(0);
6182 SDValue N1 = N->getOperand(1);
6183 EVT VT = N0.getValueType();
6184 unsigned Opcode = N->getOpcode();
6185 SDLoc DL(N);
6186
6187 // fold operation with constant operands.
6188 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
6189 return C;
6190
6191 // If the operands are the same, this is a no-op.
6192 if (N0 == N1)
6193 return N0;
6194
6195 // Fold operation with vscale operands.
6196 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
6197 uint64_t C0 = N0->getConstantOperandVal(0);
6198 uint64_t C1 = N1->getConstantOperandVal(0);
6199 if (Opcode == ISD::UMAX)
6200 return C0 > C1 ? N0 : N1;
6201 else if (Opcode == ISD::UMIN)
6202 return C0 > C1 ? N1 : N0;
6203 }
6204
6205 // canonicalize constant to RHS
6208 return DAG.getNode(Opcode, DL, VT, N1, N0);
6209
6210 // fold vector ops
6211 if (VT.isVector())
6212 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6213 return FoldedVOp;
6214
6215 // reassociate minmax
6216 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
6217 return RMINMAX;
6218
6219 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
6220 // Only do this if:
6221 // 1. The current op isn't legal and the flipped is.
6222 // 2. The saturation pattern is broken by canonicalization in InstCombine.
6223 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
6224 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
6225 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
6226 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
6227 unsigned AltOpcode;
6228 switch (Opcode) {
6229 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
6230 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
6231 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
6232 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
6233 default: llvm_unreachable("Unknown MINMAX opcode");
6234 }
6235 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
6236 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
6237 }
6238
6239 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
6241 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
6242 return S;
6243 if (Opcode == ISD::UMIN)
6244 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
6245 return S;
6246
6247 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
6248 auto ReductionOpcode = [](unsigned Opcode) {
6249 switch (Opcode) {
6250 case ISD::SMIN:
6251 return ISD::VECREDUCE_SMIN;
6252 case ISD::SMAX:
6253 return ISD::VECREDUCE_SMAX;
6254 case ISD::UMIN:
6255 return ISD::VECREDUCE_UMIN;
6256 case ISD::UMAX:
6257 return ISD::VECREDUCE_UMAX;
6258 default:
6259 llvm_unreachable("Unexpected opcode");
6260 }
6261 };
6262 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
6263 SDLoc(N), VT, N0, N1))
6264 return SD;
6265
6266 // Simplify the operands using demanded-bits information.
6268 return SDValue(N, 0);
6269
6270 return SDValue();
6271}
6272
6273/// If this is a bitwise logic instruction and both operands have the same
6274/// opcode, try to sink the other opcode after the logic instruction.
6275SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
6276 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6277 EVT VT = N0.getValueType();
6278 unsigned LogicOpcode = N->getOpcode();
6279 unsigned HandOpcode = N0.getOpcode();
6280 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
6281 assert(HandOpcode == N1.getOpcode() && "Bad input!");
6282
6283 // Bail early if none of these transforms apply.
6284 if (N0.getNumOperands() == 0)
6285 return SDValue();
6286
6287 // FIXME: We should check number of uses of the operands to not increase
6288 // the instruction count for all transforms.
6289
6290 // Handle size-changing casts (or sign_extend_inreg).
6291 SDValue X = N0.getOperand(0);
6292 SDValue Y = N1.getOperand(0);
6293 EVT XVT = X.getValueType();
6294 SDLoc DL(N);
6295 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
6296 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
6297 N0.getOperand(1) == N1.getOperand(1))) {
6298 // If both operands have other uses, this transform would create extra
6299 // instructions without eliminating anything.
6300 if (!N0.hasOneUse() && !N1.hasOneUse())
6301 return SDValue();
6302 // We need matching integer source types.
6303 if (XVT != Y.getValueType())
6304 return SDValue();
6305 // Don't create an illegal op during or after legalization. Don't ever
6306 // create an unsupported vector op.
6307 if ((VT.isVector() || LegalOperations) &&
6308 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
6309 return SDValue();
6310 // Avoid infinite looping with PromoteIntBinOp.
6311 // TODO: Should we apply desirable/legal constraints to all opcodes?
6312 if ((HandOpcode == ISD::ANY_EXTEND ||
6313 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6314 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
6315 return SDValue();
6316 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
6317 SDNodeFlags LogicFlags;
6318 LogicFlags.setDisjoint(N->getFlags().hasDisjoint() &&
6319 ISD::isExtOpcode(HandOpcode));
6320 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y, LogicFlags);
6321 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
6322 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6323 return DAG.getNode(HandOpcode, DL, VT, Logic);
6324 }
6325
6326 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
6327 if (HandOpcode == ISD::TRUNCATE) {
6328 // If both operands have other uses, this transform would create extra
6329 // instructions without eliminating anything.
6330 if (!N0.hasOneUse() && !N1.hasOneUse())
6331 return SDValue();
6332 // We need matching source types.
6333 if (XVT != Y.getValueType())
6334 return SDValue();
6335 // Don't create an illegal op during or after legalization.
6336 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
6337 return SDValue();
6338 // Be extra careful sinking truncate. If it's free, there's no benefit in
6339 // widening a binop. Also, don't create a logic op on an illegal type.
6340 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
6341 return SDValue();
6342 if (!TLI.isTypeLegal(XVT))
6343 return SDValue();
6344 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6345 return DAG.getNode(HandOpcode, DL, VT, Logic);
6346 }
6347
6348 // For binops SHL/SRL/SRA/AND:
6349 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
6350 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
6351 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
6352 N0.getOperand(1) == N1.getOperand(1)) {
6353 // If either operand has other uses, this transform is not an improvement.
6354 if (!N0.hasOneUse() || !N1.hasOneUse())
6355 return SDValue();
6356 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6357 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6358 }
6359
6360 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
6361 if (HandOpcode == ISD::BSWAP) {
6362 // If either operand has other uses, this transform is not an improvement.
6363 if (!N0.hasOneUse() || !N1.hasOneUse())
6364 return SDValue();
6365 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6366 return DAG.getNode(HandOpcode, DL, VT, Logic);
6367 }
6368
6369 // For funnel shifts FSHL/FSHR:
6370 // logic_op (OP x, x1, s), (OP y, y1, s) -->
6371 // --> OP (logic_op x, y), (logic_op, x1, y1), s
6372 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
6373 N0.getOperand(2) == N1.getOperand(2)) {
6374 if (!N0.hasOneUse() || !N1.hasOneUse())
6375 return SDValue();
6376 SDValue X1 = N0.getOperand(1);
6377 SDValue Y1 = N1.getOperand(1);
6378 SDValue S = N0.getOperand(2);
6379 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
6380 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
6381 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
6382 }
6383
6384 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6385 // Only perform this optimization up until type legalization, before
6386 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
6387 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
6388 // we don't want to undo this promotion.
6389 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
6390 // on scalars.
6391 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
6392 Level <= AfterLegalizeTypes) {
6393 // Input types must be integer and the same.
6394 if (XVT.isInteger() && XVT == Y.getValueType() &&
6395 !(VT.isVector() && TLI.isTypeLegal(VT) &&
6396 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
6397 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6398 return DAG.getNode(HandOpcode, DL, VT, Logic);
6399 }
6400 }
6401
6402 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
6403 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6404 // If both shuffles use the same mask, and both shuffle within a single
6405 // vector, then it is worthwhile to move the swizzle after the operation.
6406 // The type-legalizer generates this pattern when loading illegal
6407 // vector types from memory. In many cases this allows additional shuffle
6408 // optimizations.
6409 // There are other cases where moving the shuffle after the xor/and/or
6410 // is profitable even if shuffles don't perform a swizzle.
6411 // If both shuffles use the same mask, and both shuffles have the same first
6412 // or second operand, then it might still be profitable to move the shuffle
6413 // after the xor/and/or operation.
6414 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
6415 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
6416 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
6417 assert(X.getValueType() == Y.getValueType() &&
6418 "Inputs to shuffles are not the same type");
6419
6420 // Check that both shuffles use the same mask. The masks are known to be of
6421 // the same length because the result vector type is the same.
6422 // Check also that shuffles have only one use to avoid introducing extra
6423 // instructions.
6424 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6425 !SVN0->getMask().equals(SVN1->getMask()))
6426 return SDValue();
6427
6428 // Don't try to fold this node if it requires introducing a
6429 // build vector of all zeros that might be illegal at this stage.
6430 SDValue ShOp = N0.getOperand(1);
6431 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6432 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6433
6434 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6435 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
6436 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
6437 N0.getOperand(0), N1.getOperand(0));
6438 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6439 }
6440
6441 // Don't try to fold this node if it requires introducing a
6442 // build vector of all zeros that might be illegal at this stage.
6443 ShOp = N0.getOperand(0);
6444 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6445 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6446
6447 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6448 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
6449 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
6450 N1.getOperand(1));
6451 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6452 }
6453 }
6454
6455 return SDValue();
6456}
6457
6458/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
6459SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
6460 const SDLoc &DL) {
6461 SDValue LL, LR, RL, RR, N0CC, N1CC;
6462 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
6463 !isSetCCEquivalent(N1, RL, RR, N1CC))
6464 return SDValue();
6465
6466 assert(N0.getValueType() == N1.getValueType() &&
6467 "Unexpected operand types for bitwise logic op");
6468 assert(LL.getValueType() == LR.getValueType() &&
6469 RL.getValueType() == RR.getValueType() &&
6470 "Unexpected operand types for setcc");
6471
6472 // If we're here post-legalization or the logic op type is not i1, the logic
6473 // op type must match a setcc result type. Also, all folds require new
6474 // operations on the left and right operands, so those types must match.
6475 EVT VT = N0.getValueType();
6476 EVT OpVT = LL.getValueType();
6477 if (LegalOperations || VT.getScalarType() != MVT::i1)
6478 if (VT != getSetCCResultType(OpVT))
6479 return SDValue();
6480 if (OpVT != RL.getValueType())
6481 return SDValue();
6482
6483 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6484 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6485 bool IsInteger = OpVT.isInteger();
6486 if (LR == RR && CC0 == CC1 && IsInteger) {
6487 bool IsZero = isNullOrNullSplat(LR);
6488 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
6489
6490 // All bits clear?
6491 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
6492 // All sign bits clear?
6493 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
6494 // Any bits set?
6495 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
6496 // Any sign bits set?
6497 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6498
6499 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6500 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6501 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6502 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6503 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6504 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6505 AddToWorklist(Or.getNode());
6506 return DAG.getSetCC(DL, VT, Or, LR, CC1);
6507 }
6508
6509 // All bits set?
6510 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6511 // All sign bits set?
6512 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6513 // Any bits clear?
6514 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6515 // Any sign bits clear?
6516 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6517
6518 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6519 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6520 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6521 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6522 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6523 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6524 AddToWorklist(And.getNode());
6525 return DAG.getSetCC(DL, VT, And, LR, CC1);
6526 }
6527 }
6528
6529 // TODO: What is the 'or' equivalent of this fold?
6530 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6531 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6532 IsInteger && CC0 == ISD::SETNE &&
6533 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6534 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6535 SDValue One = DAG.getConstant(1, DL, OpVT);
6536 SDValue Two = DAG.getConstant(2, DL, OpVT);
6537 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6538 AddToWorklist(Add.getNode());
6539 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6540 }
6541
6542 // Try more general transforms if the predicates match and the only user of
6543 // the compares is the 'and' or 'or'.
6544 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6545 N0.hasOneUse() && N1.hasOneUse()) {
6546 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6547 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6548 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6549 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6550 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6551 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6552 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6553 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6554 }
6555
6556 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6557 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6558 // Match a shared variable operand and 2 non-opaque constant operands.
6559 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6560 // The difference of the constants must be a single bit.
6561 const APInt &CMax =
6562 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6563 const APInt &CMin =
6564 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6565 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6566 };
6567 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6568 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6569 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6570 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6571 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6572 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6573 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6574 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6575 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6576 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6577 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6578 }
6579 }
6580 }
6581
6582 // Canonicalize equivalent operands to LL == RL.
6583 if (LL == RR && LR == RL) {
6585 std::swap(RL, RR);
6586 }
6587
6588 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6589 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6590 if (LL == RL && LR == RR) {
6591 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6592 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6593 if (NewCC != ISD::SETCC_INVALID &&
6594 (!LegalOperations ||
6595 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6596 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6597 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6598 }
6599
6600 return SDValue();
6601}
6602
6603static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6604 SelectionDAG &DAG) {
6605 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6606}
6607
6608static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6609 SelectionDAG &DAG) {
6610 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6611}
6612
6613/// Returns an appropriate FP min/max opcode for clamping operations.
6614static unsigned getMinMaxOpcodeForClamp(bool IsMin, SDValue Operand1,
6615 SDValue Operand2, SelectionDAG &DAG,
6616 const TargetLowering &TLI) {
6617 EVT VT = Operand1.getValueType();
6618 unsigned IEEEOp = IsMin ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
6619 if (TLI.isOperationLegalOrCustom(IEEEOp, VT) &&
6620 arebothOperandsNotNan(Operand1, Operand2, DAG))
6621 return IEEEOp;
6622 unsigned PreferredOp = IsMin ? ISD::FMINNUM : ISD::FMAXNUM;
6623 if (TLI.isOperationLegalOrCustom(PreferredOp, VT))
6624 return PreferredOp;
6625 return ISD::DELETED_NODE;
6626}
6627
6628// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6630 SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode,
6631 SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM) {
6632 // The optimization cannot be applied for all the predicates because
6633 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6634 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6635 // applied at all if one of the operands is a signaling NaN.
6636
6637 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6638 // are non NaN values.
6639 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6640 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
6641 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6642 isFMAXNUMFMINNUM_IEEE
6645 }
6646
6647 if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) ||
6648 ((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
6649 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6650 isFMAXNUMFMINNUM_IEEE
6653 }
6654
6655 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6656 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6657 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6658 // that there are not any sNaNs, then the optimization is not valid
6659 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6660 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6661 // we can prove that we do not have any sNaNs, then we can do the
6662 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6663 // cases.
6664 if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && (OrAndOpcode == ISD::OR)) ||
6665 ((CC == ISD::SETUGT || CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) {
6666 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6667 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6668 isFMAXNUMFMINNUM_IEEE
6671 }
6672
6673 if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && (OrAndOpcode == ISD::OR)) ||
6674 ((CC == ISD::SETULT || CC == ISD::SETULE) && (OrAndOpcode == ISD::AND))) {
6675 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6676 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6677 isFMAXNUMFMINNUM_IEEE
6680 }
6681
6682 return ISD::DELETED_NODE;
6683}
6684
6687 assert(
6688 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6689 "Invalid Op to combine SETCC with");
6690
6691 // TODO: Search past casts/truncates.
6692 SDValue LHS = LogicOp->getOperand(0);
6693 SDValue RHS = LogicOp->getOperand(1);
6694 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6695 !LHS->hasOneUse() || !RHS->hasOneUse())
6696 return SDValue();
6697
6698 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6700 LogicOp, LHS.getNode(), RHS.getNode());
6701
6702 SDValue LHS0 = LHS->getOperand(0);
6703 SDValue RHS0 = RHS->getOperand(0);
6704 SDValue LHS1 = LHS->getOperand(1);
6705 SDValue RHS1 = RHS->getOperand(1);
6706 // TODO: We don't actually need a splat here, for vectors we just need the
6707 // invariants to hold for each element.
6708 auto *LHS1C = isConstOrConstSplat(LHS1);
6709 auto *RHS1C = isConstOrConstSplat(RHS1);
6710 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6711 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6712 EVT VT = LogicOp->getValueType(0);
6713 EVT OpVT = LHS0.getValueType();
6714 SDLoc DL(LogicOp);
6715
6716 // Check if the operands of an and/or operation are comparisons and if they
6717 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6718 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6719 // sequence will be replaced with min-cmp sequence:
6720 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6721 // and and-cmp-cmp will be replaced with max-cmp sequence:
6722 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6723 // The optimization does not work for `==` or `!=` .
6724 // The two comparisons should have either the same predicate or the
6725 // predicate of one of the comparisons is the opposite of the other one.
6726 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6728 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6730 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6731 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6732 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6733 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6734 (OpVT.isFloatingPoint() &&
6735 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6737 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6738 CCL != ISD::SETTRUE &&
6739 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6740
6741 SDValue CommonValue, Operand1, Operand2;
6743 if (CCL == CCR) {
6744 if (LHS0 == RHS0) {
6745 CommonValue = LHS0;
6746 Operand1 = LHS1;
6747 Operand2 = RHS1;
6749 } else if (LHS1 == RHS1) {
6750 CommonValue = LHS1;
6751 Operand1 = LHS0;
6752 Operand2 = RHS0;
6753 CC = CCL;
6754 }
6755 } else {
6756 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6757 if (LHS0 == RHS1) {
6758 CommonValue = LHS0;
6759 Operand1 = LHS1;
6760 Operand2 = RHS0;
6761 CC = CCR;
6762 } else if (RHS0 == LHS1) {
6763 CommonValue = LHS1;
6764 Operand1 = LHS0;
6765 Operand2 = RHS1;
6766 CC = CCL;
6767 }
6768 }
6769
6770 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6771 // handle it using OR/AND.
6772 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6773 CC = ISD::SETCC_INVALID;
6774 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6775 CC = ISD::SETCC_INVALID;
6776
6777 if (CC != ISD::SETCC_INVALID) {
6778 unsigned NewOpcode = ISD::DELETED_NODE;
6779 bool IsSigned = isSignedIntSetCC(CC);
6780 if (OpVT.isInteger()) {
6781 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6782 CC == ISD::SETLT || CC == ISD::SETULT);
6783 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6784 if (IsLess == IsOr)
6785 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6786 else
6787 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6788 } else if (OpVT.isFloatingPoint())
6790 Operand1, Operand2, CC, LogicOp->getOpcode(), DAG,
6791 isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6792
6793 if (NewOpcode != ISD::DELETED_NODE) {
6794 SDValue MinMaxValue =
6795 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6796 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6797 }
6798 }
6799 }
6800
6801 if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
6802 LHS0.getValueType() == RHS0.getValueType() &&
6803 ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
6804 (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
6805 return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
6806
6807 if (TargetPreference == AndOrSETCCFoldKind::None)
6808 return SDValue();
6809
6810 if (CCL == CCR &&
6811 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6812 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6813 const APInt &APLhs = LHS1C->getAPIntValue();
6814 const APInt &APRhs = RHS1C->getAPIntValue();
6815
6816 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6817 // case this is just a compare).
6818 if (APLhs == (-APRhs) &&
6819 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6820 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6821 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6822 // (icmp eq A, C) | (icmp eq A, -C)
6823 // -> (icmp eq Abs(A), C)
6824 // (icmp ne A, C) & (icmp ne A, -C)
6825 // -> (icmp ne Abs(A), C)
6826 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6827 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6828 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6829 } else if (TargetPreference &
6831
6832 // AndOrSETCCFoldKind::AddAnd:
6833 // A == C0 | A == C1
6834 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6835 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6836 // A != C0 & A != C1
6837 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6838 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6839
6840 // AndOrSETCCFoldKind::NotAnd:
6841 // A == C0 | A == C1
6842 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6843 // -> ~A & smin(C0, C1) == 0
6844 // A != C0 & A != C1
6845 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6846 // -> ~A & smin(C0, C1) != 0
6847
6848 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6849 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6850 APInt Dif = MaxC - MinC;
6851 if (!Dif.isZero() && Dif.isPowerOf2()) {
6852 if (MaxC.isAllOnes() &&
6853 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6854 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6855 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6856 DAG.getConstant(MinC, DL, OpVT));
6857 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6858 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6859 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6860
6861 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6862 DAG.getConstant(-MinC, DL, OpVT));
6863 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6864 DAG.getConstant(~Dif, DL, OpVT));
6865 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6866 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6867 }
6868 }
6869 }
6870 }
6871
6872 return SDValue();
6873}
6874
6875// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6876// We canonicalize to the `select` form in the middle end, but the `and` form
6877// gets better codegen and all tested targets (arm, x86, riscv)
6879 const SDLoc &DL, SelectionDAG &DAG) {
6880 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6881 if (!isNullConstant(F))
6882 return SDValue();
6883
6884 EVT CondVT = Cond.getValueType();
6885 if (TLI.getBooleanContents(CondVT) !=
6887 return SDValue();
6888
6889 if (T.getOpcode() != ISD::AND)
6890 return SDValue();
6891
6892 if (!isOneConstant(T.getOperand(1)))
6893 return SDValue();
6894
6895 EVT OpVT = T.getValueType();
6896
6897 SDValue CondMask =
6898 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6899 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6900}
6901
6902/// This contains all DAGCombine rules which reduce two values combined by
6903/// an And operation to a single value. This makes them reusable in the context
6904/// of visitSELECT(). Rules involving constants are not included as
6905/// visitSELECT() already handles those cases.
6906SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6907 EVT VT = N1.getValueType();
6908 SDLoc DL(N);
6909
6910 // fold (and x, undef) -> 0
6911 if (N0.isUndef() || N1.isUndef())
6912 return DAG.getConstant(0, DL, VT);
6913
6914 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6915 return V;
6916
6917 // Canonicalize:
6918 // and(x, add) -> and(add, x)
6919 if (N1.getOpcode() == ISD::ADD)
6920 std::swap(N0, N1);
6921
6922 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6923 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6924 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6925 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6926 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6927 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6928 // immediate for an add, but it is legal if its top c2 bits are set,
6929 // transform the ADD so the immediate doesn't need to be materialized
6930 // in a register.
6931 APInt ADDC = ADDI->getAPIntValue();
6932 APInt SRLC = SRLI->getAPIntValue();
6933 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6934 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6936 SRLC.getZExtValue());
6937 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6938 ADDC |= Mask;
6939 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6940 SDLoc DL0(N0);
6941 SDValue NewAdd =
6942 DAG.getNode(ISD::ADD, DL0, VT,
6943 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6944 CombineTo(N0.getNode(), NewAdd);
6945 // Return N so it doesn't get rechecked!
6946 return SDValue(N, 0);
6947 }
6948 }
6949 }
6950 }
6951 }
6952 }
6953
6954 return SDValue();
6955}
6956
6957bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6958 EVT LoadResultTy, EVT &ExtVT) {
6959 if (!AndC->getAPIntValue().isMask())
6960 return false;
6961
6962 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6963
6964 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6965 EVT LoadedVT = LoadN->getMemoryVT();
6966
6967 if (ExtVT == LoadedVT &&
6968 (!LegalOperations ||
6969 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6970 // ZEXTLOAD will match without needing to change the size of the value being
6971 // loaded.
6972 return true;
6973 }
6974
6975 // Do not change the width of a volatile or atomic loads.
6976 if (!LoadN->isSimple())
6977 return false;
6978
6979 // Do not generate loads of non-round integer types since these can
6980 // be expensive (and would be wrong if the type is not byte sized).
6981 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6982 return false;
6983
6984 if (LegalOperations &&
6985 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6986 return false;
6987
6988 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT, /*ByteOffset=*/0))
6989 return false;
6990
6991 return true;
6992}
6993
6994bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6995 ISD::LoadExtType ExtType, EVT &MemVT,
6996 unsigned ShAmt) {
6997 if (!LDST)
6998 return false;
6999
7000 // Only allow byte offsets.
7001 if (ShAmt % 8)
7002 return false;
7003 const unsigned ByteShAmt = ShAmt / 8;
7004
7005 // Do not generate loads of non-round integer types since these can
7006 // be expensive (and would be wrong if the type is not byte sized).
7007 if (!MemVT.isRound())
7008 return false;
7009
7010 // Don't change the width of a volatile or atomic loads.
7011 if (!LDST->isSimple())
7012 return false;
7013
7014 EVT LdStMemVT = LDST->getMemoryVT();
7015
7016 // Bail out when changing the scalable property, since we can't be sure that
7017 // we're actually narrowing here.
7018 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
7019 return false;
7020
7021 // Verify that we are actually reducing a load width here.
7022 if (LdStMemVT.bitsLT(MemVT))
7023 return false;
7024
7025 // Ensure that this isn't going to produce an unsupported memory access.
7026 if (ShAmt) {
7027 const Align LDSTAlign = LDST->getAlign();
7028 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
7029 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7030 LDST->getAddressSpace(), NarrowAlign,
7031 LDST->getMemOperand()->getFlags()))
7032 return false;
7033 }
7034
7035 // It's not possible to generate a constant of extended or untyped type.
7036 EVT PtrType = LDST->getBasePtr().getValueType();
7037 if (PtrType == MVT::Untyped || PtrType.isExtended())
7038 return false;
7039
7040 if (isa<LoadSDNode>(LDST)) {
7041 LoadSDNode *Load = cast<LoadSDNode>(LDST);
7042 // Don't transform one with multiple uses, this would require adding a new
7043 // load.
7044 if (!SDValue(Load, 0).hasOneUse())
7045 return false;
7046
7047 if (LegalOperations &&
7048 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
7049 return false;
7050
7051 // For the transform to be legal, the load must produce only two values
7052 // (the value loaded and the chain). Don't transform a pre-increment
7053 // load, for example, which produces an extra value. Otherwise the
7054 // transformation is not equivalent, and the downstream logic to replace
7055 // uses gets things wrong.
7056 if (Load->getNumValues() > 2)
7057 return false;
7058
7059 // If the load that we're shrinking is an extload and we're not just
7060 // discarding the extension we can't simply shrink the load. Bail.
7061 // TODO: It would be possible to merge the extensions in some cases.
7062 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
7063 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
7064 return false;
7065
7066 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT, ByteShAmt))
7067 return false;
7068 } else {
7069 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
7070 StoreSDNode *Store = cast<StoreSDNode>(LDST);
7071 // Can't write outside the original store
7072 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
7073 return false;
7074
7075 if (LegalOperations &&
7076 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
7077 return false;
7078 }
7079 return true;
7080}
7081
7082bool DAGCombiner::SearchForAndLoads(SDNode *N,
7083 SmallVectorImpl<LoadSDNode*> &Loads,
7084 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
7085 ConstantSDNode *Mask,
7086 SDNode *&NodeToMask) {
7087 // Recursively search for the operands, looking for loads which can be
7088 // narrowed.
7089 for (SDValue Op : N->op_values()) {
7090 if (Op.getValueType().isVector())
7091 return false;
7092
7093 // Some constants may need fixing up later if they are too large.
7094 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
7095 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
7096 "Expected bitwise logic operation");
7097 if (!C->getAPIntValue().isSubsetOf(Mask->getAPIntValue()))
7098 NodesWithConsts.insert(N);
7099 continue;
7100 }
7101
7102 if (!Op.hasOneUse())
7103 return false;
7104
7105 switch(Op.getOpcode()) {
7106 case ISD::LOAD: {
7107 auto *Load = cast<LoadSDNode>(Op);
7108 EVT ExtVT;
7109 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
7110 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
7111
7112 // ZEXTLOAD is already small enough.
7113 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
7114 ExtVT.bitsGE(Load->getMemoryVT()))
7115 continue;
7116
7117 // Use LE to convert equal sized loads to zext.
7118 if (ExtVT.bitsLE(Load->getMemoryVT()))
7119 Loads.push_back(Load);
7120
7121 continue;
7122 }
7123 return false;
7124 }
7125 case ISD::ZERO_EXTEND:
7126 case ISD::AssertZext: {
7127 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
7128 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
7129 EVT VT = Op.getOpcode() == ISD::AssertZext ?
7130 cast<VTSDNode>(Op.getOperand(1))->getVT() :
7131 Op.getOperand(0).getValueType();
7132
7133 // We can accept extending nodes if the mask is wider or an equal
7134 // width to the original type.
7135 if (ExtVT.bitsGE(VT))
7136 continue;
7137 break;
7138 }
7139 case ISD::OR:
7140 case ISD::XOR:
7141 case ISD::AND:
7142 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
7143 NodeToMask))
7144 return false;
7145 continue;
7146 }
7147
7148 // Allow one node which will masked along with any loads found.
7149 if (NodeToMask)
7150 return false;
7151
7152 // Also ensure that the node to be masked only produces one data result.
7153 NodeToMask = Op.getNode();
7154 if (NodeToMask->getNumValues() > 1) {
7155 bool HasValue = false;
7156 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
7157 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
7158 if (VT != MVT::Glue && VT != MVT::Other) {
7159 if (HasValue) {
7160 NodeToMask = nullptr;
7161 return false;
7162 }
7163 HasValue = true;
7164 }
7165 }
7166 assert(HasValue && "Node to be masked has no data result?");
7167 }
7168 }
7169 return true;
7170}
7171
7172bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
7173 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
7174 if (!Mask)
7175 return false;
7176
7177 if (!Mask->getAPIntValue().isMask())
7178 return false;
7179
7180 // No need to do anything if the and directly uses a load.
7181 if (isa<LoadSDNode>(N->getOperand(0)))
7182 return false;
7183
7185 SmallPtrSet<SDNode*, 2> NodesWithConsts;
7186 SDNode *FixupNode = nullptr;
7187 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
7188 if (Loads.empty())
7189 return false;
7190
7191 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
7192 SDValue MaskOp = N->getOperand(1);
7193
7194 // If it exists, fixup the single node we allow in the tree that needs
7195 // masking.
7196 if (FixupNode) {
7197 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
7198 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
7199 FixupNode->getValueType(0),
7200 SDValue(FixupNode, 0), MaskOp);
7201 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
7202 if (And.getOpcode() == ISD ::AND)
7203 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
7204 }
7205
7206 // Narrow any constants that need it.
7207 for (auto *LogicN : NodesWithConsts) {
7208 SDValue Op0 = LogicN->getOperand(0);
7209 SDValue Op1 = LogicN->getOperand(1);
7210
7211 // We only need to fix AND if both inputs are constants. And we only need
7212 // to fix one of the constants.
7213 if (LogicN->getOpcode() == ISD::AND &&
7215 continue;
7216
7217 if (isa<ConstantSDNode>(Op0) && LogicN->getOpcode() != ISD::AND)
7218 Op0 =
7219 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
7220
7221 if (isa<ConstantSDNode>(Op1))
7222 Op1 =
7223 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
7224
7225 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
7226 std::swap(Op0, Op1);
7227
7228 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
7229 }
7230
7231 // Create narrow loads.
7232 for (auto *Load : Loads) {
7233 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
7234 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
7235 SDValue(Load, 0), MaskOp);
7236 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
7237 if (And.getOpcode() == ISD ::AND)
7238 And = SDValue(
7239 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
7240 SDValue NewLoad = reduceLoadWidth(And.getNode());
7241 assert(NewLoad &&
7242 "Shouldn't be masking the load if it can't be narrowed");
7243 CombineTo(Load, NewLoad, NewLoad.getValue(1));
7244 }
7245 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
7246 return true;
7247 }
7248 return false;
7249}
7250
7251// Unfold
7252// x & (-1 'logical shift' y)
7253// To
7254// (x 'opposite logical shift' y) 'logical shift' y
7255// if it is better for performance.
7256SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
7257 assert(N->getOpcode() == ISD::AND);
7258
7259 SDValue N0 = N->getOperand(0);
7260 SDValue N1 = N->getOperand(1);
7261
7262 // Do we actually prefer shifts over mask?
7264 return SDValue();
7265
7266 // Try to match (-1 '[outer] logical shift' y)
7267 unsigned OuterShift;
7268 unsigned InnerShift; // The opposite direction to the OuterShift.
7269 SDValue Y; // Shift amount.
7270 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
7271 if (!M.hasOneUse())
7272 return false;
7273 OuterShift = M->getOpcode();
7274 if (OuterShift == ISD::SHL)
7275 InnerShift = ISD::SRL;
7276 else if (OuterShift == ISD::SRL)
7277 InnerShift = ISD::SHL;
7278 else
7279 return false;
7280 if (!isAllOnesConstant(M->getOperand(0)))
7281 return false;
7282 Y = M->getOperand(1);
7283 return true;
7284 };
7285
7286 SDValue X;
7287 if (matchMask(N1))
7288 X = N0;
7289 else if (matchMask(N0))
7290 X = N1;
7291 else
7292 return SDValue();
7293
7294 SDLoc DL(N);
7295 EVT VT = N->getValueType(0);
7296
7297 // tmp = x 'opposite logical shift' y
7298 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
7299 // ret = tmp 'logical shift' y
7300 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
7301
7302 return T1;
7303}
7304
7305/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
7306/// For a target with a bit test, this is expected to become test + set and save
7307/// at least 1 instruction.
7309 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
7310
7311 // Look through an optional extension.
7312 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
7313 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
7314 And0 = And0.getOperand(0);
7315 if (!isOneConstant(And1) || !And0.hasOneUse())
7316 return SDValue();
7317
7318 SDValue Src = And0;
7319
7320 // Attempt to find a 'not' op.
7321 // TODO: Should we favor test+set even without the 'not' op?
7322 bool FoundNot = false;
7323 if (isBitwiseNot(Src)) {
7324 FoundNot = true;
7325 Src = Src.getOperand(0);
7326
7327 // Look though an optional truncation. The source operand may not be the
7328 // same type as the original 'and', but that is ok because we are masking
7329 // off everything but the low bit.
7330 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
7331 Src = Src.getOperand(0);
7332 }
7333
7334 // Match a shift-right by constant.
7335 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
7336 return SDValue();
7337
7338 // This is probably not worthwhile without a supported type.
7339 EVT SrcVT = Src.getValueType();
7340 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7341 if (!TLI.isTypeLegal(SrcVT))
7342 return SDValue();
7343
7344 // We might have looked through casts that make this transform invalid.
7345 unsigned BitWidth = SrcVT.getScalarSizeInBits();
7346 SDValue ShiftAmt = Src.getOperand(1);
7347 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
7348 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
7349 return SDValue();
7350
7351 // Set source to shift source.
7352 Src = Src.getOperand(0);
7353
7354 // Try again to find a 'not' op.
7355 // TODO: Should we favor test+set even with two 'not' ops?
7356 if (!FoundNot) {
7357 if (!isBitwiseNot(Src))
7358 return SDValue();
7359 Src = Src.getOperand(0);
7360 }
7361
7362 if (!TLI.hasBitTest(Src, ShiftAmt))
7363 return SDValue();
7364
7365 // Turn this into a bit-test pattern using mask op + setcc:
7366 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
7367 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
7368 SDLoc DL(And);
7369 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
7370 EVT CCVT =
7371 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7372 SDValue Mask = DAG.getConstant(
7373 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
7374 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
7375 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
7376 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
7377 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
7378}
7379
7380/// For targets that support usubsat, match a bit-hack form of that operation
7381/// that ends in 'and' and convert it.
7383 EVT VT = N->getValueType(0);
7384 unsigned BitWidth = VT.getScalarSizeInBits();
7385 APInt SignMask = APInt::getSignMask(BitWidth);
7386
7387 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
7388 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
7389 // xor/add with SMIN (signmask) are logically equivalent.
7390 SDValue X;
7391 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
7393 m_SpecificInt(BitWidth - 1))))) &&
7396 m_SpecificInt(BitWidth - 1))))))
7397 return SDValue();
7398
7399 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
7400 DAG.getConstant(SignMask, DL, VT));
7401}
7402
7403/// Given a bitwise logic operation N with a matching bitwise logic operand,
7404/// fold a pattern where 2 of the source operands are identically shifted
7405/// values. For example:
7406/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7408 SelectionDAG &DAG) {
7409 unsigned LogicOpcode = N->getOpcode();
7410 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7411 "Expected bitwise logic operation");
7412
7413 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
7414 return SDValue();
7415
7416 // Match another bitwise logic op and a shift.
7417 unsigned ShiftOpcode = ShiftOp.getOpcode();
7418 if (LogicOp.getOpcode() != LogicOpcode ||
7419 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
7420 ShiftOpcode == ISD::SRA))
7421 return SDValue();
7422
7423 // Match another shift op inside the first logic operand. Handle both commuted
7424 // possibilities.
7425 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7426 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7427 SDValue X1 = ShiftOp.getOperand(0);
7428 SDValue Y = ShiftOp.getOperand(1);
7429 SDValue X0, Z;
7430 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
7431 LogicOp.getOperand(0).getOperand(1) == Y) {
7432 X0 = LogicOp.getOperand(0).getOperand(0);
7433 Z = LogicOp.getOperand(1);
7434 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
7435 LogicOp.getOperand(1).getOperand(1) == Y) {
7436 X0 = LogicOp.getOperand(1).getOperand(0);
7437 Z = LogicOp.getOperand(0);
7438 } else {
7439 return SDValue();
7440 }
7441
7442 EVT VT = N->getValueType(0);
7443 SDLoc DL(N);
7444 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
7445 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
7446 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
7447}
7448
7449/// Given a tree of logic operations with shape like
7450/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
7451/// try to match and fold shift operations with the same shift amount.
7452/// For example:
7453/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7454/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7456 SDValue RightHand, SelectionDAG &DAG) {
7457 unsigned LogicOpcode = N->getOpcode();
7458 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7459 "Expected bitwise logic operation");
7460 if (LeftHand.getOpcode() != LogicOpcode ||
7461 RightHand.getOpcode() != LogicOpcode)
7462 return SDValue();
7463 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
7464 return SDValue();
7465
7466 // Try to match one of following patterns:
7467 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
7468 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
7469 // Note that foldLogicOfShifts will handle commuted versions of the left hand
7470 // itself.
7471 SDValue CombinedShifts, W;
7472 SDValue R0 = RightHand.getOperand(0);
7473 SDValue R1 = RightHand.getOperand(1);
7474 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
7475 W = R1;
7476 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
7477 W = R0;
7478 else
7479 return SDValue();
7480
7481 EVT VT = N->getValueType(0);
7482 SDLoc DL(N);
7483 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
7484}
7485
7486/// Fold "masked merge" expressions like `(m & x) | (~m & y)` and its DeMorgan
7487/// variant `(~m | x) & (m | y)` into the equivalent `((x ^ y) & m) ^ y)`
7488/// pattern. This is typically a better representation for targets without a
7489/// fused "and-not" operation.
7491 const TargetLowering &TLI, const SDLoc &DL) {
7492 // Note that masked-merge variants using XOR or ADD expressions are
7493 // normalized to OR by InstCombine so we only check for OR or AND.
7494 assert((Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::AND) &&
7495 "Must be called with ISD::OR or ISD::AND node");
7496
7497 // If the target supports and-not, don't fold this.
7498 if (TLI.hasAndNot(SDValue(Node, 0)))
7499 return SDValue();
7500
7501 SDValue M, X, Y;
7502
7503 if (sd_match(Node,
7505 m_OneUse(m_And(m_Deferred(M), m_Value(X))))) ||
7506 sd_match(Node,
7508 m_OneUse(m_Or(m_Deferred(M), m_Value(Y)))))) {
7509 EVT VT = M.getValueType();
7510 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
7511 SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
7512 return DAG.getNode(ISD::XOR, DL, VT, And, Y);
7513 }
7514 return SDValue();
7515}
7516
7517SDValue DAGCombiner::visitAND(SDNode *N) {
7518 SDValue N0 = N->getOperand(0);
7519 SDValue N1 = N->getOperand(1);
7520 EVT VT = N1.getValueType();
7521 SDLoc DL(N);
7522
7523 // x & x --> x
7524 if (N0 == N1)
7525 return N0;
7526
7527 // fold (and c1, c2) -> c1&c2
7528 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
7529 return C;
7530
7531 // canonicalize constant to RHS
7534 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
7535
7536 if (areBitwiseNotOfEachother(N0, N1))
7537 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
7538
7539 // fold vector ops
7540 if (VT.isVector()) {
7541 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7542 return FoldedVOp;
7543
7544 // fold (and x, 0) -> 0, vector edition
7546 // do not return N1, because undef node may exist in N1
7548 N1.getValueType());
7549
7550 // fold (and x, -1) -> x, vector edition
7552 return N0;
7553
7554 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
7555 bool Frozen = N0.getOpcode() == ISD::FREEZE;
7556 auto *MLoad = dyn_cast<MaskedLoadSDNode>(Frozen ? N0.getOperand(0) : N0);
7557 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7558 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7559 EVT MemVT = MLoad->getMemoryVT();
7560 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
7561 // For this AND to be a zero extension of the masked load the elements
7562 // of the BuildVec must mask the bottom bits of the extended element
7563 // type
7564 if (Splat->getAPIntValue().isMask(MemVT.getScalarSizeInBits())) {
7565 SDValue NewLoad = DAG.getMaskedLoad(
7566 VT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7567 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(), MemVT,
7568 MLoad->getMemOperand(), MLoad->getAddressingMode(), ISD::ZEXTLOAD,
7569 MLoad->isExpandingLoad());
7570 CombineTo(N, Frozen ? N0 : NewLoad);
7571 CombineTo(MLoad, NewLoad, NewLoad.getValue(1));
7572 return SDValue(N, 0);
7573 }
7574 }
7575 }
7576 }
7577
7578 // fold (and x, -1) -> x
7579 if (isAllOnesConstant(N1))
7580 return N0;
7581
7582 // if (and x, c) is known to be zero, return 0
7583 unsigned BitWidth = VT.getScalarSizeInBits();
7584 ConstantSDNode *N1C = isConstOrConstSplat(N1);
7586 return DAG.getConstant(0, DL, VT);
7587
7588 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7589 return R;
7590
7591 if (SDValue NewSel = foldBinOpIntoSelect(N))
7592 return NewSel;
7593
7594 // reassociate and
7595 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7596 return RAND;
7597
7598 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7599 if (SDValue SD =
7600 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7601 return SD;
7602
7603 // fold (and (or x, C), D) -> D if (C & D) == D
7604 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7605 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7606 };
7607 if (N0.getOpcode() == ISD::OR &&
7608 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7609 return N1;
7610
7611 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7612 SDValue N0Op0 = N0.getOperand(0);
7613 EVT SrcVT = N0Op0.getValueType();
7614 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7615 APInt Mask = ~N1C->getAPIntValue();
7616 Mask = Mask.trunc(SrcBitWidth);
7617
7618 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7619 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7620 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7621
7622 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7623 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7624 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7625 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7626 TLI.isNarrowingProfitable(N, VT, SrcVT))
7627 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7628 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7629 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7630 }
7631
7632 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7633 if (ISD::isExtOpcode(N0.getOpcode())) {
7634 unsigned ExtOpc = N0.getOpcode();
7635 SDValue N0Op0 = N0.getOperand(0);
7636 if (N0Op0.getOpcode() == ISD::AND &&
7637 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7638 N0->hasOneUse() && N0Op0->hasOneUse()) {
7639 if (SDValue NewExt = DAG.FoldConstantArithmetic(ExtOpc, DL, VT,
7640 {N0Op0.getOperand(1)})) {
7641 if (SDValue NewMask =
7642 DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N1, NewExt})) {
7643 return DAG.getNode(ISD::AND, DL, VT,
7644 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7645 NewMask);
7646 }
7647 }
7648 }
7649 }
7650
7651 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7652 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7653 // already be zero by virtue of the width of the base type of the load.
7654 //
7655 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7656 // more cases.
7657 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7659 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7660 N0.getOperand(0).getResNo() == 0) ||
7661 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7662 auto *Load =
7663 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7664
7665 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7666 // This can be a pure constant or a vector splat, in which case we treat the
7667 // vector as a scalar and use the splat value.
7668 APInt Constant = APInt::getZero(1);
7669 if (const ConstantSDNode *C = isConstOrConstSplat(
7670 N1, /*AllowUndefs=*/false, /*AllowTruncation=*/true)) {
7671 Constant = C->getAPIntValue();
7672 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7673 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7674 APInt SplatValue, SplatUndef;
7675 unsigned SplatBitSize;
7676 bool HasAnyUndefs;
7677 // Endianness should not matter here. Code below makes sure that we only
7678 // use the result if the SplatBitSize is a multiple of the vector element
7679 // size. And after that we AND all element sized parts of the splat
7680 // together. So the end result should be the same regardless of in which
7681 // order we do those operations.
7682 const bool IsBigEndian = false;
7683 bool IsSplat =
7684 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7685 HasAnyUndefs, EltBitWidth, IsBigEndian);
7686
7687 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7688 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7689 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7690 // Undef bits can contribute to a possible optimisation if set, so
7691 // set them.
7692 SplatValue |= SplatUndef;
7693
7694 // The splat value may be something like "0x00FFFFFF", which means 0 for
7695 // the first vector value and FF for the rest, repeating. We need a mask
7696 // that will apply equally to all members of the vector, so AND all the
7697 // lanes of the constant together.
7698 Constant = APInt::getAllOnes(EltBitWidth);
7699 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7700 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7701 }
7702 }
7703
7704 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7705 // actually legal and isn't going to get expanded, else this is a false
7706 // optimisation.
7707 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7708 Load->getValueType(0),
7709 Load->getMemoryVT());
7710
7711 // Resize the constant to the same size as the original memory access before
7712 // extension. If it is still the AllOnesValue then this AND is completely
7713 // unneeded.
7714 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7715
7716 bool B;
7717 switch (Load->getExtensionType()) {
7718 default: B = false; break;
7719 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7720 case ISD::ZEXTLOAD:
7721 case ISD::NON_EXTLOAD: B = true; break;
7722 }
7723
7724 if (B && Constant.isAllOnes()) {
7725 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7726 // preserve semantics once we get rid of the AND.
7727 SDValue NewLoad(Load, 0);
7728
7729 // Fold the AND away. NewLoad may get replaced immediately.
7730 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7731
7732 if (Load->getExtensionType() == ISD::EXTLOAD) {
7733 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7734 Load->getValueType(0), SDLoc(Load),
7735 Load->getChain(), Load->getBasePtr(),
7736 Load->getOffset(), Load->getMemoryVT(),
7737 Load->getMemOperand());
7738 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7739 if (Load->getNumValues() == 3) {
7740 // PRE/POST_INC loads have 3 values.
7741 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7742 NewLoad.getValue(2) };
7743 CombineTo(Load, To, 3, true);
7744 } else {
7745 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7746 }
7747 }
7748
7749 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7750 }
7751 }
7752
7753 // Try to convert a constant mask AND into a shuffle clear mask.
7754 if (VT.isVector())
7755 if (SDValue Shuffle = XformToShuffleWithZero(N))
7756 return Shuffle;
7757
7758 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7759 return Combined;
7760
7761 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7763 SDValue Ext = N0.getOperand(0);
7764 EVT ExtVT = Ext->getValueType(0);
7765 SDValue Extendee = Ext->getOperand(0);
7766
7767 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7768 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7769 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7770 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7771 // => (extract_subvector (iN_zeroext v))
7772 SDValue ZeroExtExtendee =
7773 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7774
7775 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7776 N0.getOperand(1));
7777 }
7778 }
7779
7780 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7781 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7782 EVT MemVT = GN0->getMemoryVT();
7783 EVT ScalarVT = MemVT.getScalarType();
7784
7785 if (SDValue(GN0, 0).hasOneUse() &&
7786 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7788 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7789 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7790
7791 SDValue ZExtLoad = DAG.getMaskedGather(
7792 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7793 GN0->getIndexType(), ISD::ZEXTLOAD);
7794
7795 CombineTo(N, ZExtLoad);
7796 AddToWorklist(ZExtLoad.getNode());
7797 // Avoid recheck of N.
7798 return SDValue(N, 0);
7799 }
7800 }
7801
7802 // fold (and (load x), 255) -> (zextload x, i8)
7803 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7804 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7805 if (SDValue Res = reduceLoadWidth(N))
7806 return Res;
7807
7808 if (LegalTypes) {
7809 // Attempt to propagate the AND back up to the leaves which, if they're
7810 // loads, can be combined to narrow loads and the AND node can be removed.
7811 // Perform after legalization so that extend nodes will already be
7812 // combined into the loads.
7813 if (BackwardsPropagateMask(N))
7814 return SDValue(N, 0);
7815 }
7816
7817 if (SDValue Combined = visitANDLike(N0, N1, N))
7818 return Combined;
7819
7820 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7821 if (N0.getOpcode() == N1.getOpcode())
7822 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7823 return V;
7824
7825 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7826 return R;
7827 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7828 return R;
7829
7830 // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7831 // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7832 SDValue X, Y, Z, NotY;
7833 for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
7834 if (sd_match(N,
7835 m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
7836 sd_match(NotY, m_Not(m_Value(Y))) &&
7837 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7838 return DAG.getNode(ISD::AND, DL, VT, X,
7839 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
7840
7841 // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7842 for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
7843 if (sd_match(N, m_And(m_Value(X),
7844 m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) &&
7845 sd_match(NotY, m_Not(m_Value(Y))) &&
7846 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7847 return DAG.getNode(ISD::AND, DL, VT, X,
7848 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
7849
7850 // Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
7851 // Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
7852 if (TLI.hasAndNot(SDValue(N, 0)))
7853 if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
7854 return Folded;
7855
7856 // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7857 // If we are shifting down an extended sign bit, see if we can simplify
7858 // this to shifting the MSB directly to expose further simplifications.
7859 // This pattern often appears after sext_inreg legalization.
7860 APInt Amt;
7861 if (sd_match(N, m_And(m_Srl(m_Value(X), m_ConstInt(Amt)), m_One())) &&
7862 Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
7863 return DAG.getNode(ISD::SRL, DL, VT, X,
7864 DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
7865
7866 // Masking the negated extension of a boolean is just the zero-extended
7867 // boolean:
7868 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7869 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7870 //
7871 // Note: the SimplifyDemandedBits fold below can make an information-losing
7872 // transform, and then we have no way to find this better fold.
7873 if (sd_match(N, m_And(m_Sub(m_Zero(), m_Value(X)), m_One()))) {
7874 if (X.getOpcode() == ISD::ZERO_EXTEND &&
7875 X.getOperand(0).getScalarValueSizeInBits() == 1)
7876 return X;
7877 if (X.getOpcode() == ISD::SIGN_EXTEND &&
7878 X.getOperand(0).getScalarValueSizeInBits() == 1)
7879 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, X.getOperand(0));
7880 }
7881
7882 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7883 // fold (and (sra)) -> (and (srl)) when possible.
7885 return SDValue(N, 0);
7886
7887 // fold (zext_inreg (extload x)) -> (zextload x)
7888 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7889 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7890 (ISD::isEXTLoad(N0.getNode()) ||
7891 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7892 auto *LN0 = cast<LoadSDNode>(N0);
7893 EVT MemVT = LN0->getMemoryVT();
7894 // If we zero all the possible extended bits, then we can turn this into
7895 // a zextload if we are running before legalize or the operation is legal.
7896 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7897 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7898 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7899 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7900 ((!LegalOperations && LN0->isSimple()) ||
7901 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7902 SDValue ExtLoad =
7903 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7904 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7905 AddToWorklist(N);
7906 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7907 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7908 }
7909 }
7910
7911 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7912 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7913 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7914 N0.getOperand(1), false))
7915 return BSwap;
7916 }
7917
7918 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7919 return Shifts;
7920
7921 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7922 return V;
7923
7924 // Recognize the following pattern:
7925 //
7926 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7927 //
7928 // where bitmask is a mask that clears the upper bits of AndVT. The
7929 // number of bits in bitmask must be a power of two.
7930 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7931 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7932 return false;
7933
7935 if (!C)
7936 return false;
7937
7938 if (!C->getAPIntValue().isMask(
7939 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7940 return false;
7941
7942 return true;
7943 };
7944
7945 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7946 if (IsAndZeroExtMask(N0, N1))
7947 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7948
7949 if (hasOperation(ISD::USUBSAT, VT))
7950 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7951 return V;
7952
7953 // Postpone until legalization completed to avoid interference with bswap
7954 // folding
7955 if (LegalOperations || VT.isVector())
7956 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7957 return R;
7958
7959 if (VT.isScalarInteger() && VT != MVT::i1)
7960 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
7961 return R;
7962
7963 return SDValue();
7964}
7965
7966/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7967SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7968 bool DemandHighBits) {
7969 if (!LegalOperations)
7970 return SDValue();
7971
7972 EVT VT = N->getValueType(0);
7973 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7974 return SDValue();
7976 return SDValue();
7977
7978 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7979 bool LookPassAnd0 = false;
7980 bool LookPassAnd1 = false;
7981 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7982 std::swap(N0, N1);
7983 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7984 std::swap(N0, N1);
7985 if (N0.getOpcode() == ISD::AND) {
7986 if (!N0->hasOneUse())
7987 return SDValue();
7988 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7989 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7990 // This is needed for X86.
7991 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7992 N01C->getZExtValue() != 0xFFFF))
7993 return SDValue();
7994 N0 = N0.getOperand(0);
7995 LookPassAnd0 = true;
7996 }
7997
7998 if (N1.getOpcode() == ISD::AND) {
7999 if (!N1->hasOneUse())
8000 return SDValue();
8001 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
8002 if (!N11C || N11C->getZExtValue() != 0xFF)
8003 return SDValue();
8004 N1 = N1.getOperand(0);
8005 LookPassAnd1 = true;
8006 }
8007
8008 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
8009 std::swap(N0, N1);
8010 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
8011 return SDValue();
8012 if (!N0->hasOneUse() || !N1->hasOneUse())
8013 return SDValue();
8014
8015 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8016 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
8017 if (!N01C || !N11C)
8018 return SDValue();
8019 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
8020 return SDValue();
8021
8022 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
8023 SDValue N00 = N0->getOperand(0);
8024 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
8025 if (!N00->hasOneUse())
8026 return SDValue();
8027 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
8028 if (!N001C || N001C->getZExtValue() != 0xFF)
8029 return SDValue();
8030 N00 = N00.getOperand(0);
8031 LookPassAnd0 = true;
8032 }
8033
8034 SDValue N10 = N1->getOperand(0);
8035 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
8036 if (!N10->hasOneUse())
8037 return SDValue();
8038 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
8039 // Also allow 0xFFFF since the bits will be shifted out. This is needed
8040 // for X86.
8041 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
8042 N101C->getZExtValue() != 0xFFFF))
8043 return SDValue();
8044 N10 = N10.getOperand(0);
8045 LookPassAnd1 = true;
8046 }
8047
8048 if (N00 != N10)
8049 return SDValue();
8050
8051 // Make sure everything beyond the low halfword gets set to zero since the SRL
8052 // 16 will clear the top bits.
8053 unsigned OpSizeInBits = VT.getSizeInBits();
8054 if (OpSizeInBits > 16) {
8055 // If the left-shift isn't masked out then the only way this is a bswap is
8056 // if all bits beyond the low 8 are 0. In that case the entire pattern
8057 // reduces to a left shift anyway: leave it for other parts of the combiner.
8058 if (DemandHighBits && !LookPassAnd0)
8059 return SDValue();
8060
8061 // However, if the right shift isn't masked out then it might be because
8062 // it's not needed. See if we can spot that too. If the high bits aren't
8063 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
8064 // upper bits to be zero.
8065 if (!LookPassAnd1) {
8066 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
8067 if (!DAG.MaskedValueIsZero(N10,
8068 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
8069 return SDValue();
8070 }
8071 }
8072
8073 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
8074 if (OpSizeInBits > 16) {
8075 SDLoc DL(N);
8076 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
8077 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
8078 }
8079 return Res;
8080}
8081
8082/// Return true if the specified node is an element that makes up a 32-bit
8083/// packed halfword byteswap.
8084/// ((x & 0x000000ff) << 8) |
8085/// ((x & 0x0000ff00) >> 8) |
8086/// ((x & 0x00ff0000) << 8) |
8087/// ((x & 0xff000000) >> 8)
8089 if (!N->hasOneUse())
8090 return false;
8091
8092 unsigned Opc = N.getOpcode();
8093 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
8094 return false;
8095
8096 SDValue N0 = N.getOperand(0);
8097 unsigned Opc0 = N0.getOpcode();
8098 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
8099 return false;
8100
8101 ConstantSDNode *N1C = nullptr;
8102 // SHL or SRL: look upstream for AND mask operand
8103 if (Opc == ISD::AND)
8104 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8105 else if (Opc0 == ISD::AND)
8107 if (!N1C)
8108 return false;
8109
8110 unsigned MaskByteOffset;
8111 switch (N1C->getZExtValue()) {
8112 default:
8113 return false;
8114 case 0xFF: MaskByteOffset = 0; break;
8115 case 0xFF00: MaskByteOffset = 1; break;
8116 case 0xFFFF:
8117 // In case demanded bits didn't clear the bits that will be shifted out.
8118 // This is needed for X86.
8119 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
8120 MaskByteOffset = 1;
8121 break;
8122 }
8123 return false;
8124 case 0xFF0000: MaskByteOffset = 2; break;
8125 case 0xFF000000: MaskByteOffset = 3; break;
8126 }
8127
8128 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
8129 if (Opc == ISD::AND) {
8130 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
8131 // (x >> 8) & 0xff
8132 // (x >> 8) & 0xff0000
8133 if (Opc0 != ISD::SRL)
8134 return false;
8136 if (!C || C->getZExtValue() != 8)
8137 return false;
8138 } else {
8139 // (x << 8) & 0xff00
8140 // (x << 8) & 0xff000000
8141 if (Opc0 != ISD::SHL)
8142 return false;
8144 if (!C || C->getZExtValue() != 8)
8145 return false;
8146 }
8147 } else if (Opc == ISD::SHL) {
8148 // (x & 0xff) << 8
8149 // (x & 0xff0000) << 8
8150 if (MaskByteOffset != 0 && MaskByteOffset != 2)
8151 return false;
8152 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8153 if (!C || C->getZExtValue() != 8)
8154 return false;
8155 } else { // Opc == ISD::SRL
8156 // (x & 0xff00) >> 8
8157 // (x & 0xff000000) >> 8
8158 if (MaskByteOffset != 1 && MaskByteOffset != 3)
8159 return false;
8160 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8161 if (!C || C->getZExtValue() != 8)
8162 return false;
8163 }
8164
8165 if (Parts[MaskByteOffset])
8166 return false;
8167
8168 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
8169 return true;
8170}
8171
8172// Match 2 elements of a packed halfword bswap.
8174 if (N.getOpcode() == ISD::OR)
8175 return isBSwapHWordElement(N.getOperand(0), Parts) &&
8176 isBSwapHWordElement(N.getOperand(1), Parts);
8177
8178 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
8179 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
8180 if (!C || C->getAPIntValue() != 16)
8181 return false;
8182 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
8183 return true;
8184 }
8185
8186 return false;
8187}
8188
8189// Match this pattern:
8190// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
8191// And rewrite this to:
8192// (rotr (bswap A), 16)
8194 SelectionDAG &DAG, SDNode *N, SDValue N0,
8195 SDValue N1, EVT VT) {
8196 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
8197 "MatchBSwapHWordOrAndAnd: expecting i32");
8198 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
8199 return SDValue();
8200 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
8201 return SDValue();
8202 // TODO: this is too restrictive; lifting this restriction requires more tests
8203 if (!N0->hasOneUse() || !N1->hasOneUse())
8204 return SDValue();
8207 if (!Mask0 || !Mask1)
8208 return SDValue();
8209 if (Mask0->getAPIntValue() != 0xff00ff00 ||
8210 Mask1->getAPIntValue() != 0x00ff00ff)
8211 return SDValue();
8212 SDValue Shift0 = N0.getOperand(0);
8213 SDValue Shift1 = N1.getOperand(0);
8214 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
8215 return SDValue();
8216 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
8217 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
8218 if (!ShiftAmt0 || !ShiftAmt1)
8219 return SDValue();
8220 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
8221 return SDValue();
8222 if (Shift0.getOperand(0) != Shift1.getOperand(0))
8223 return SDValue();
8224
8225 SDLoc DL(N);
8226 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
8227 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8228 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8229}
8230
8231/// Match a 32-bit packed halfword bswap. That is
8232/// ((x & 0x000000ff) << 8) |
8233/// ((x & 0x0000ff00) >> 8) |
8234/// ((x & 0x00ff0000) << 8) |
8235/// ((x & 0xff000000) >> 8)
8236/// => (rotl (bswap x), 16)
8237SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
8238 if (!LegalOperations)
8239 return SDValue();
8240
8241 EVT VT = N->getValueType(0);
8242 if (VT != MVT::i32)
8243 return SDValue();
8245 return SDValue();
8246
8247 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
8248 return BSwap;
8249
8250 // Try again with commuted operands.
8251 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
8252 return BSwap;
8253
8254
8255 // Look for either
8256 // (or (bswaphpair), (bswaphpair))
8257 // (or (or (bswaphpair), (and)), (and))
8258 // (or (or (and), (bswaphpair)), (and))
8259 SDNode *Parts[4] = {};
8260
8261 if (isBSwapHWordPair(N0, Parts)) {
8262 // (or (or (and), (and)), (or (and), (and)))
8263 if (!isBSwapHWordPair(N1, Parts))
8264 return SDValue();
8265 } else if (N0.getOpcode() == ISD::OR) {
8266 // (or (or (or (and), (and)), (and)), (and))
8267 if (!isBSwapHWordElement(N1, Parts))
8268 return SDValue();
8269 SDValue N00 = N0.getOperand(0);
8270 SDValue N01 = N0.getOperand(1);
8271 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
8272 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
8273 return SDValue();
8274 } else {
8275 return SDValue();
8276 }
8277
8278 // Make sure the parts are all coming from the same node.
8279 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
8280 return SDValue();
8281
8282 SDLoc DL(N);
8283 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
8284 SDValue(Parts[0], 0));
8285
8286 // Result of the bswap should be rotated by 16. If it's not legal, then
8287 // do (x << 16) | (x >> 16).
8288 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8290 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
8292 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8293 return DAG.getNode(ISD::OR, DL, VT,
8294 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
8295 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
8296}
8297
8298/// This contains all DAGCombine rules which reduce two values combined by
8299/// an Or operation to a single value \see visitANDLike().
8300SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
8301 EVT VT = N1.getValueType();
8302
8303 // fold (or x, undef) -> -1
8304 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
8305 return DAG.getAllOnesConstant(DL, VT);
8306
8307 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
8308 return V;
8309
8310 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
8311 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
8312 // Don't increase # computations.
8313 (N0->hasOneUse() || N1->hasOneUse())) {
8314 // We can only do this xform if we know that bits from X that are set in C2
8315 // but not in C1 are already zero. Likewise for Y.
8316 if (const ConstantSDNode *N0O1C =
8318 if (const ConstantSDNode *N1O1C =
8320 // We can only do this xform if we know that bits from X that are set in
8321 // C2 but not in C1 are already zero. Likewise for Y.
8322 const APInt &LHSMask = N0O1C->getAPIntValue();
8323 const APInt &RHSMask = N1O1C->getAPIntValue();
8324
8325 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
8326 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
8327 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8328 N0.getOperand(0), N1.getOperand(0));
8329 return DAG.getNode(ISD::AND, DL, VT, X,
8330 DAG.getConstant(LHSMask | RHSMask, DL, VT));
8331 }
8332 }
8333 }
8334 }
8335
8336 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
8337 if (N0.getOpcode() == ISD::AND &&
8338 N1.getOpcode() == ISD::AND &&
8339 N0.getOperand(0) == N1.getOperand(0) &&
8340 // Don't increase # computations.
8341 (N0->hasOneUse() || N1->hasOneUse())) {
8342 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8343 N0.getOperand(1), N1.getOperand(1));
8344 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
8345 }
8346
8347 return SDValue();
8348}
8349
8350/// OR combines for which the commuted variant will be tried as well.
8352 SDNode *N) {
8353 EVT VT = N0.getValueType();
8354 unsigned BW = VT.getScalarSizeInBits();
8355 SDLoc DL(N);
8356
8357 auto peekThroughResize = [](SDValue V) {
8358 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
8359 return V->getOperand(0);
8360 return V;
8361 };
8362
8363 SDValue N0Resized = peekThroughResize(N0);
8364 if (N0Resized.getOpcode() == ISD::AND) {
8365 SDValue N1Resized = peekThroughResize(N1);
8366 SDValue N00 = N0Resized.getOperand(0);
8367 SDValue N01 = N0Resized.getOperand(1);
8368
8369 // fold or (and x, y), x --> x
8370 if (N00 == N1Resized || N01 == N1Resized)
8371 return N1;
8372
8373 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
8374 // TODO: Set AllowUndefs = true.
8375 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
8376 /* AllowUndefs */ false)) {
8377 if (peekThroughResize(NotOperand) == N1Resized)
8378 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
8379 N1);
8380 }
8381
8382 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
8383 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
8384 /* AllowUndefs */ false)) {
8385 if (peekThroughResize(NotOperand) == N1Resized)
8386 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
8387 N1);
8388 }
8389 }
8390
8391 SDValue X, Y;
8392
8393 // fold or (xor X, N1), N1 --> or X, N1
8394 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
8395 return DAG.getNode(ISD::OR, DL, VT, X, N1);
8396
8397 // fold or (xor x, y), (x and/or y) --> or x, y
8398 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
8399 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
8401 return DAG.getNode(ISD::OR, DL, VT, X, Y);
8402
8403 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8404 return R;
8405
8406 auto peekThroughZext = [](SDValue V) {
8407 if (V->getOpcode() == ISD::ZERO_EXTEND)
8408 return V->getOperand(0);
8409 return V;
8410 };
8411
8412 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
8413 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
8414 N0.getOperand(0) == N1.getOperand(0) &&
8415 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8416 return N0;
8417
8418 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
8419 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
8420 N0.getOperand(1) == N1.getOperand(0) &&
8421 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8422 return N0;
8423
8424 // Attempt to match a legalized build_pair-esque pattern:
8425 // or(shl(aext(Hi),BW/2),zext(Lo))
8426 SDValue Lo, Hi;
8427 if (sd_match(N0,
8429 sd_match(N1, m_ZExt(m_Value(Lo))) &&
8430 Lo.getScalarValueSizeInBits() == (BW / 2) &&
8431 Lo.getValueType() == Hi.getValueType()) {
8432 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
8433 SDValue NotLo, NotHi;
8434 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
8435 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
8436 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
8437 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
8438 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8439 DAG.getShiftAmountConstant(BW / 2, VT, DL));
8440 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
8441 }
8442 }
8443
8444 return SDValue();
8445}
8446
8447SDValue DAGCombiner::visitOR(SDNode *N) {
8448 SDValue N0 = N->getOperand(0);
8449 SDValue N1 = N->getOperand(1);
8450 EVT VT = N1.getValueType();
8451 SDLoc DL(N);
8452
8453 // x | x --> x
8454 if (N0 == N1)
8455 return N0;
8456
8457 // fold (or c1, c2) -> c1|c2
8458 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
8459 return C;
8460
8461 // canonicalize constant to RHS
8464 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
8465
8466 // fold vector ops
8467 if (VT.isVector()) {
8468 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8469 return FoldedVOp;
8470
8471 // fold (or x, 0) -> x, vector edition
8473 return N0;
8474
8475 // fold (or x, -1) -> -1, vector edition
8477 // do not return N1, because undef node may exist in N1
8478 return DAG.getAllOnesConstant(DL, N1.getValueType());
8479
8480 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8481 // Do this only if the resulting type / shuffle is legal.
8482 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
8483 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
8484 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
8485 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
8486 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
8487 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8488 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
8489 // Ensure both shuffles have a zero input.
8490 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8491 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
8492 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
8493 bool CanFold = true;
8494 int NumElts = VT.getVectorNumElements();
8495 SmallVector<int, 4> Mask(NumElts, -1);
8496
8497 for (int i = 0; i != NumElts; ++i) {
8498 int M0 = SV0->getMaskElt(i);
8499 int M1 = SV1->getMaskElt(i);
8500
8501 // Determine if either index is pointing to a zero vector.
8502 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
8503 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
8504
8505 // If one element is zero and the otherside is undef, keep undef.
8506 // This also handles the case that both are undef.
8507 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
8508 continue;
8509
8510 // Make sure only one of the elements is zero.
8511 if (M0Zero == M1Zero) {
8512 CanFold = false;
8513 break;
8514 }
8515
8516 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
8517
8518 // We have a zero and non-zero element. If the non-zero came from
8519 // SV0 make the index a LHS index. If it came from SV1, make it
8520 // a RHS index. We need to mod by NumElts because we don't care
8521 // which operand it came from in the original shuffles.
8522 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
8523 }
8524
8525 if (CanFold) {
8526 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
8527 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
8528 SDValue LegalShuffle =
8529 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
8530 if (LegalShuffle)
8531 return LegalShuffle;
8532 }
8533 }
8534 }
8535 }
8536
8537 // fold (or x, 0) -> x
8538 if (isNullConstant(N1))
8539 return N0;
8540
8541 // fold (or x, -1) -> -1
8542 if (isAllOnesConstant(N1))
8543 return N1;
8544
8545 if (SDValue NewSel = foldBinOpIntoSelect(N))
8546 return NewSel;
8547
8548 // fold (or x, c) -> c iff (x & ~c) == 0
8549 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
8550 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8551 return N1;
8552
8553 if (SDValue R = foldAndOrOfSETCC(N, DAG))
8554 return R;
8555
8556 if (SDValue Combined = visitORLike(N0, N1, DL))
8557 return Combined;
8558
8559 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8560 return Combined;
8561
8562 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
8563 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
8564 return BSwap;
8565 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
8566 return BSwap;
8567
8568 // reassociate or
8569 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8570 return ROR;
8571
8572 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8573 if (SDValue SD =
8574 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
8575 return SD;
8576
8577 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8578 // iff (c1 & c2) != 0 or c1/c2 are undef.
8579 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
8580 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8581 };
8582 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8583 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
8584 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
8585 {N1, N0.getOperand(1)})) {
8586 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8587 AddToWorklist(IOR.getNode());
8588 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8589 }
8590 }
8591
8592 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8593 return Combined;
8594 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8595 return Combined;
8596
8597 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8598 if (N0.getOpcode() == N1.getOpcode())
8599 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8600 return V;
8601
8602 // See if this is some rotate idiom.
8603 if (SDValue Rot = MatchRotate(N0, N1, DL, /*FromAdd=*/false))
8604 return Rot;
8605
8606 if (SDValue Load = MatchLoadCombine(N))
8607 return Load;
8608
8609 // Simplify the operands using demanded-bits information.
8611 return SDValue(N, 0);
8612
8613 // If OR can be rewritten into ADD, try combines based on ADD.
8614 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8615 DAG.isADDLike(SDValue(N, 0)))
8616 if (SDValue Combined = visitADDLike(N))
8617 return Combined;
8618
8619 // Postpone until legalization completed to avoid interference with bswap
8620 // folding
8621 if (LegalOperations || VT.isVector())
8622 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8623 return R;
8624
8625 if (VT.isScalarInteger() && VT != MVT::i1)
8626 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
8627 return R;
8628
8629 return SDValue();
8630}
8631
8633 SDValue &Mask) {
8634 if (Op.getOpcode() == ISD::AND &&
8635 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8636 Mask = Op.getOperand(1);
8637 return Op.getOperand(0);
8638 }
8639 return Op;
8640}
8641
8642/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8643static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8644 SDValue &Mask) {
8645 Op = stripConstantMask(DAG, Op, Mask);
8646 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8647 Shift = Op;
8648 return true;
8649 }
8650 return false;
8651}
8652
8653/// Helper function for visitOR to extract the needed side of a rotate idiom
8654/// from a shl/srl/mul/udiv. This is meant to handle cases where
8655/// InstCombine merged some outside op with one of the shifts from
8656/// the rotate pattern.
8657/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8658/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8659/// patterns:
8660///
8661/// (or (add v v) (shrl v bitwidth-1)):
8662/// expands (add v v) -> (shl v 1)
8663///
8664/// (or (mul v c0) (shrl (mul v c1) c2)):
8665/// expands (mul v c0) -> (shl (mul v c1) c3)
8666///
8667/// (or (udiv v c0) (shl (udiv v c1) c2)):
8668/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8669///
8670/// (or (shl v c0) (shrl (shl v c1) c2)):
8671/// expands (shl v c0) -> (shl (shl v c1) c3)
8672///
8673/// (or (shrl v c0) (shl (shrl v c1) c2)):
8674/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8675///
8676/// Such that in all cases, c3+c2==bitwidth(op v c1).
8678 SDValue ExtractFrom, SDValue &Mask,
8679 const SDLoc &DL) {
8680 assert(OppShift && ExtractFrom && "Empty SDValue");
8681 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8682 return SDValue();
8683
8684 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8685
8686 // Value and Type of the shift.
8687 SDValue OppShiftLHS = OppShift.getOperand(0);
8688 EVT ShiftedVT = OppShiftLHS.getValueType();
8689
8690 // Amount of the existing shift.
8691 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8692
8693 // (add v v) -> (shl v 1)
8694 // TODO: Should this be a general DAG canonicalization?
8695 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8696 ExtractFrom.getOpcode() == ISD::ADD &&
8697 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8698 ExtractFrom.getOperand(0) == OppShiftLHS &&
8699 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8700 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8701 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8702
8703 // Preconditions:
8704 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8705 //
8706 // Find opcode of the needed shift to be extracted from (op0 v c0).
8707 unsigned Opcode = ISD::DELETED_NODE;
8708 bool IsMulOrDiv = false;
8709 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8710 // opcode or its arithmetic (mul or udiv) variant.
8711 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8712 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8713 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8714 return false;
8715 Opcode = NeededShift;
8716 return true;
8717 };
8718 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8719 // that the needed shift can be extracted from.
8720 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8721 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8722 return SDValue();
8723
8724 // op0 must be the same opcode on both sides, have the same LHS argument,
8725 // and produce the same value type.
8726 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8727 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8728 ShiftedVT != ExtractFrom.getValueType())
8729 return SDValue();
8730
8731 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8732 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8733 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8734 ConstantSDNode *ExtractFromCst =
8735 isConstOrConstSplat(ExtractFrom.getOperand(1));
8736 // TODO: We should be able to handle non-uniform constant vectors for these values
8737 // Check that we have constant values.
8738 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8739 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8740 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8741 return SDValue();
8742
8743 // Compute the shift amount we need to extract to complete the rotate.
8744 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8745 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8746 return SDValue();
8747 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8748 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8749 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8750 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8751 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8752
8753 // Now try extract the needed shift from the ExtractFrom op and see if the
8754 // result matches up with the existing shift's LHS op.
8755 if (IsMulOrDiv) {
8756 // Op to extract from is a mul or udiv by a constant.
8757 // Check:
8758 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8759 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8760 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8761 NeededShiftAmt.getZExtValue());
8762 APInt ResultAmt;
8763 APInt Rem;
8764 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8765 if (Rem != 0 || ResultAmt != OppLHSAmt)
8766 return SDValue();
8767 } else {
8768 // Op to extract from is a shift by a constant.
8769 // Check:
8770 // c2 - (bitwidth(op0 v c0) - c1) == c0
8771 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8772 ExtractFromAmt.getBitWidth()))
8773 return SDValue();
8774 }
8775
8776 // Return the expanded shift op that should allow a rotate to be formed.
8777 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8778 EVT ResVT = ExtractFrom.getValueType();
8779 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8780 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8781}
8782
8783// Return true if we can prove that, whenever Neg and Pos are both in the
8784// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8785// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8786//
8787// (or (shift1 X, Neg), (shift2 X, Pos))
8788//
8789// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8790// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8791// to consider shift amounts with defined behavior.
8792//
8793// The IsRotate flag should be set when the LHS of both shifts is the same.
8794// Otherwise if matching a general funnel shift, it should be clear.
8795static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8796 SelectionDAG &DAG, bool IsRotate, bool FromAdd) {
8797 const auto &TLI = DAG.getTargetLoweringInfo();
8798 // If EltSize is a power of 2 then:
8799 //
8800 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8801 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8802 //
8803 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8804 // for the stronger condition:
8805 //
8806 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8807 //
8808 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8809 // we can just replace Neg with Neg' for the rest of the function.
8810 //
8811 // In other cases we check for the even stronger condition:
8812 //
8813 // Neg == EltSize - Pos [B]
8814 //
8815 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8816 // behavior if Pos == 0 (and consequently Neg == EltSize).
8817 //
8818 // We could actually use [A] whenever EltSize is a power of 2, but the
8819 // only extra cases that it would match are those uninteresting ones
8820 // where Neg and Pos are never in range at the same time. E.g. for
8821 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8822 // as well as (sub 32, Pos), but:
8823 //
8824 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8825 //
8826 // always invokes undefined behavior for 32-bit X.
8827 //
8828 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8829 // This allows us to peek through any operations that only affect Mask's
8830 // un-demanded bits.
8831 //
8832 // NOTE: We can only do this when matching operations which won't modify the
8833 // least Log2(EltSize) significant bits and not a general funnel shift.
8834 unsigned MaskLoBits = 0;
8835 if (IsRotate && !FromAdd && isPowerOf2_64(EltSize)) {
8836 unsigned Bits = Log2_64(EltSize);
8837 unsigned NegBits = Neg.getScalarValueSizeInBits();
8838 if (NegBits >= Bits) {
8839 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8840 if (SDValue Inner =
8842 Neg = Inner;
8843 MaskLoBits = Bits;
8844 }
8845 }
8846 }
8847
8848 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8849 if (Neg.getOpcode() != ISD::SUB)
8850 return false;
8852 if (!NegC)
8853 return false;
8854 SDValue NegOp1 = Neg.getOperand(1);
8855
8856 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8857 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8858 // are redundant for the purpose of the equality.
8859 if (MaskLoBits) {
8860 unsigned PosBits = Pos.getScalarValueSizeInBits();
8861 if (PosBits >= MaskLoBits) {
8862 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8863 if (SDValue Inner =
8865 Pos = Inner;
8866 }
8867 }
8868 }
8869
8870 // The condition we need is now:
8871 //
8872 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8873 //
8874 // If NegOp1 == Pos then we need:
8875 //
8876 // EltSize & Mask == NegC & Mask
8877 //
8878 // (because "x & Mask" is a truncation and distributes through subtraction).
8879 //
8880 // We also need to account for a potential truncation of NegOp1 if the amount
8881 // has already been legalized to a shift amount type.
8882 APInt Width;
8883 if ((Pos == NegOp1) ||
8884 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8885 Width = NegC->getAPIntValue();
8886
8887 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8888 // Then the condition we want to prove becomes:
8889 //
8890 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8891 //
8892 // which, again because "x & Mask" is a truncation, becomes:
8893 //
8894 // NegC & Mask == (EltSize - PosC) & Mask
8895 // EltSize & Mask == (NegC + PosC) & Mask
8896 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8897 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8898 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8899 else
8900 return false;
8901 } else
8902 return false;
8903
8904 // Now we just need to check that EltSize & Mask == Width & Mask.
8905 if (MaskLoBits)
8906 // EltSize & Mask is 0 since Mask is EltSize - 1.
8907 return Width.getLoBits(MaskLoBits) == 0;
8908 return Width == EltSize;
8909}
8910
8911// A subroutine of MatchRotate used once we have found an OR of two opposite
8912// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8913// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8914// former being preferred if supported. InnerPos and InnerNeg are Pos and
8915// Neg with outer conversions stripped away.
8916SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8917 SDValue Neg, SDValue InnerPos,
8918 SDValue InnerNeg, bool FromAdd,
8919 bool HasPos, unsigned PosOpcode,
8920 unsigned NegOpcode, const SDLoc &DL) {
8921 // fold (or/add (shl x, (*ext y)),
8922 // (srl x, (*ext (sub 32, y)))) ->
8923 // (rotl x, y) or (rotr x, (sub 32, y))
8924 //
8925 // fold (or/add (shl x, (*ext (sub 32, y))),
8926 // (srl x, (*ext y))) ->
8927 // (rotr x, y) or (rotl x, (sub 32, y))
8928 EVT VT = Shifted.getValueType();
8929 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8930 /*IsRotate*/ true, FromAdd))
8931 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8932 HasPos ? Pos : Neg);
8933
8934 return SDValue();
8935}
8936
8937// A subroutine of MatchRotate used once we have found an OR of two opposite
8938// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8939// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8940// former being preferred if supported. InnerPos and InnerNeg are Pos and
8941// Neg with outer conversions stripped away.
8942// TODO: Merge with MatchRotatePosNeg.
8943SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8944 SDValue Neg, SDValue InnerPos,
8945 SDValue InnerNeg, bool FromAdd,
8946 bool HasPos, unsigned PosOpcode,
8947 unsigned NegOpcode, const SDLoc &DL) {
8948 EVT VT = N0.getValueType();
8949 unsigned EltBits = VT.getScalarSizeInBits();
8950
8951 // fold (or/add (shl x0, (*ext y)),
8952 // (srl x1, (*ext (sub 32, y)))) ->
8953 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8954 //
8955 // fold (or/add (shl x0, (*ext (sub 32, y))),
8956 // (srl x1, (*ext y))) ->
8957 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8958 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1,
8959 FromAdd))
8960 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8961 HasPos ? Pos : Neg);
8962
8963 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8964 // so for now just use the PosOpcode case if its legal.
8965 // TODO: When can we use the NegOpcode case?
8966 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8967 SDValue X;
8968 // fold (or/add (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8969 // -> (fshl x0, x1, y)
8970 if (sd_match(N1, m_Srl(m_Value(X), m_One())) &&
8971 sd_match(InnerNeg,
8972 m_Xor(m_Specific(InnerPos), m_SpecificInt(EltBits - 1))) &&
8974 return DAG.getNode(ISD::FSHL, DL, VT, N0, X, Pos);
8975 }
8976
8977 // fold (or/add (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8978 // -> (fshr x0, x1, y)
8979 if (sd_match(N0, m_Shl(m_Value(X), m_One())) &&
8980 sd_match(InnerPos,
8981 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8983 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8984 }
8985
8986 // fold (or/add (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8987 // -> (fshr x0, x1, y)
8988 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8989 if (sd_match(N0, m_Add(m_Value(X), m_Deferred(X))) &&
8990 sd_match(InnerPos,
8991 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8993 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8994 }
8995 }
8996
8997 return SDValue();
8998}
8999
9000// MatchRotate - Handle an 'or' or 'add' of two operands. If this is one of the
9001// many idioms for rotate, and if the target supports rotation instructions,
9002// generate a rot[lr]. This also matches funnel shift patterns, similar to
9003// rotation but with different shifted sources.
9004SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
9005 bool FromAdd) {
9006 EVT VT = LHS.getValueType();
9007
9008 // The target must have at least one rotate/funnel flavor.
9009 // We still try to match rotate by constant pre-legalization.
9010 // TODO: Support pre-legalization funnel-shift by constant.
9011 bool HasROTL = hasOperation(ISD::ROTL, VT);
9012 bool HasROTR = hasOperation(ISD::ROTR, VT);
9013 bool HasFSHL = hasOperation(ISD::FSHL, VT);
9014 bool HasFSHR = hasOperation(ISD::FSHR, VT);
9015
9016 // If the type is going to be promoted and the target has enabled custom
9017 // lowering for rotate, allow matching rotate by non-constants. Only allow
9018 // this for scalar types.
9019 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
9023 }
9024
9025 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
9026 return SDValue();
9027
9028 // Check for truncated rotate.
9029 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
9030 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
9031 assert(LHS.getValueType() == RHS.getValueType());
9032 if (SDValue Rot =
9033 MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL, FromAdd))
9034 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
9035 }
9036
9037 // Match "(X shl/srl V1) & V2" where V2 may not be present.
9038 SDValue LHSShift; // The shift.
9039 SDValue LHSMask; // AND value if any.
9040 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
9041
9042 SDValue RHSShift; // The shift.
9043 SDValue RHSMask; // AND value if any.
9044 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
9045
9046 // If neither side matched a rotate half, bail
9047 if (!LHSShift && !RHSShift)
9048 return SDValue();
9049
9050 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
9051 // side of the rotate, so try to handle that here. In all cases we need to
9052 // pass the matched shift from the opposite side to compute the opcode and
9053 // needed shift amount to extract. We still want to do this if both sides
9054 // matched a rotate half because one half may be a potential overshift that
9055 // can be broken down (ie if InstCombine merged two shl or srl ops into a
9056 // single one).
9057
9058 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
9059 if (LHSShift)
9060 if (SDValue NewRHSShift =
9061 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
9062 RHSShift = NewRHSShift;
9063 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
9064 if (RHSShift)
9065 if (SDValue NewLHSShift =
9066 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
9067 LHSShift = NewLHSShift;
9068
9069 // If a side is still missing, nothing else we can do.
9070 if (!RHSShift || !LHSShift)
9071 return SDValue();
9072
9073 // At this point we've matched or extracted a shift op on each side.
9074
9075 if (LHSShift.getOpcode() == RHSShift.getOpcode())
9076 return SDValue(); // Shifts must disagree.
9077
9078 // Canonicalize shl to left side in a shl/srl pair.
9079 if (RHSShift.getOpcode() == ISD::SHL) {
9080 std::swap(LHS, RHS);
9081 std::swap(LHSShift, RHSShift);
9082 std::swap(LHSMask, RHSMask);
9083 }
9084
9085 // Something has gone wrong - we've lost the shl/srl pair - bail.
9086 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
9087 return SDValue();
9088
9089 unsigned EltSizeInBits = VT.getScalarSizeInBits();
9090 SDValue LHSShiftArg = LHSShift.getOperand(0);
9091 SDValue LHSShiftAmt = LHSShift.getOperand(1);
9092 SDValue RHSShiftArg = RHSShift.getOperand(0);
9093 SDValue RHSShiftAmt = RHSShift.getOperand(1);
9094
9095 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
9096 ConstantSDNode *RHS) {
9097 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
9098 };
9099
9100 auto ApplyMasks = [&](SDValue Res) {
9101 // If there is an AND of either shifted operand, apply it to the result.
9102 if (LHSMask.getNode() || RHSMask.getNode()) {
9105
9106 if (LHSMask.getNode()) {
9107 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
9108 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
9109 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
9110 }
9111 if (RHSMask.getNode()) {
9112 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
9113 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
9114 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
9115 }
9116
9117 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
9118 }
9119
9120 return Res;
9121 };
9122
9123 // TODO: Support pre-legalization funnel-shift by constant.
9124 bool IsRotate = LHSShiftArg == RHSShiftArg;
9125 if (!IsRotate && !(HasFSHL || HasFSHR)) {
9126 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
9127 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9128 // Look for a disguised rotate by constant.
9129 // The common shifted operand X may be hidden inside another 'or'.
9130 SDValue X, Y;
9131 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
9132 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
9133 return false;
9134 if (CommonOp == Or.getOperand(0)) {
9135 X = CommonOp;
9136 Y = Or.getOperand(1);
9137 return true;
9138 }
9139 if (CommonOp == Or.getOperand(1)) {
9140 X = CommonOp;
9141 Y = Or.getOperand(0);
9142 return true;
9143 }
9144 return false;
9145 };
9146
9147 SDValue Res;
9148 if (matchOr(LHSShiftArg, RHSShiftArg)) {
9149 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
9150 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9151 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
9152 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
9153 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
9154 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
9155 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9156 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
9157 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
9158 } else {
9159 return SDValue();
9160 }
9161
9162 return ApplyMasks(Res);
9163 }
9164
9165 return SDValue(); // Requires funnel shift support.
9166 }
9167
9168 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotl x, C1)
9169 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotr x, C2)
9170 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
9171 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
9172 // iff C1+C2 == EltSizeInBits
9173 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9174 SDValue Res;
9175 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
9176 bool UseROTL = !LegalOperations || HasROTL;
9177 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
9178 UseROTL ? LHSShiftAmt : RHSShiftAmt);
9179 } else {
9180 bool UseFSHL = !LegalOperations || HasFSHL;
9181 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
9182 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
9183 }
9184
9185 return ApplyMasks(Res);
9186 }
9187
9188 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
9189 // shift.
9190 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
9191 return SDValue();
9192
9193 // If there is a mask here, and we have a variable shift, we can't be sure
9194 // that we're masking out the right stuff.
9195 if (LHSMask.getNode() || RHSMask.getNode())
9196 return SDValue();
9197
9198 // If the shift amount is sign/zext/any-extended just peel it off.
9199 SDValue LExtOp0 = LHSShiftAmt;
9200 SDValue RExtOp0 = RHSShiftAmt;
9201 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9202 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9203 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9204 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
9205 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9206 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9207 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9208 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
9209 LExtOp0 = LHSShiftAmt.getOperand(0);
9210 RExtOp0 = RHSShiftAmt.getOperand(0);
9211 }
9212
9213 if (IsRotate && (HasROTL || HasROTR)) {
9214 if (SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
9215 LExtOp0, RExtOp0, FromAdd, HasROTL,
9217 return TryL;
9218
9219 if (SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
9220 RExtOp0, LExtOp0, FromAdd, HasROTR,
9222 return TryR;
9223 }
9224
9225 if (SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt,
9226 RHSShiftAmt, LExtOp0, RExtOp0, FromAdd,
9227 HasFSHL, ISD::FSHL, ISD::FSHR, DL))
9228 return TryL;
9229
9230 if (SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt,
9231 LHSShiftAmt, RExtOp0, LExtOp0, FromAdd,
9232 HasFSHR, ISD::FSHR, ISD::FSHL, DL))
9233 return TryR;
9234
9235 return SDValue();
9236}
9237
9238/// Recursively traverses the expression calculating the origin of the requested
9239/// byte of the given value. Returns std::nullopt if the provider can't be
9240/// calculated.
9241///
9242/// For all the values except the root of the expression, we verify that the
9243/// value has exactly one use and if not then return std::nullopt. This way if
9244/// the origin of the byte is returned it's guaranteed that the values which
9245/// contribute to the byte are not used outside of this expression.
9246
9247/// However, there is a special case when dealing with vector loads -- we allow
9248/// more than one use if the load is a vector type. Since the values that
9249/// contribute to the byte ultimately come from the ExtractVectorElements of the
9250/// Load, we don't care if the Load has uses other than ExtractVectorElements,
9251/// because those operations are independent from the pattern to be combined.
9252/// For vector loads, we simply care that the ByteProviders are adjacent
9253/// positions of the same vector, and their index matches the byte that is being
9254/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
9255/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
9256/// byte position we are trying to provide for the LoadCombine. If these do
9257/// not match, then we can not combine the vector loads. \p Index uses the
9258/// byte position we are trying to provide for and is matched against the
9259/// shl and load size. The \p Index algorithm ensures the requested byte is
9260/// provided for by the pattern, and the pattern does not over provide bytes.
9261///
9262///
9263/// The supported LoadCombine pattern for vector loads is as follows
9264/// or
9265/// / \
9266/// or shl
9267/// / \ |
9268/// or shl zext
9269/// / \ | |
9270/// shl zext zext EVE*
9271/// | | | |
9272/// zext EVE* EVE* LOAD
9273/// | | |
9274/// EVE* LOAD LOAD
9275/// |
9276/// LOAD
9277///
9278/// *ExtractVectorElement
9280
9281static std::optional<SDByteProvider>
9282calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
9283 std::optional<uint64_t> VectorIndex,
9284 unsigned StartingIndex = 0) {
9285
9286 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
9287 if (Depth == 10)
9288 return std::nullopt;
9289
9290 // Only allow multiple uses if the instruction is a vector load (in which
9291 // case we will use the load for every ExtractVectorElement)
9292 if (Depth && !Op.hasOneUse() &&
9293 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
9294 return std::nullopt;
9295
9296 // Fail to combine if we have encountered anything but a LOAD after handling
9297 // an ExtractVectorElement.
9298 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
9299 return std::nullopt;
9300
9301 unsigned BitWidth = Op.getScalarValueSizeInBits();
9302 if (BitWidth % 8 != 0)
9303 return std::nullopt;
9304 unsigned ByteWidth = BitWidth / 8;
9305 assert(Index < ByteWidth && "invalid index requested");
9306 (void) ByteWidth;
9307
9308 switch (Op.getOpcode()) {
9309 case ISD::OR: {
9310 auto LHS =
9311 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
9312 if (!LHS)
9313 return std::nullopt;
9314 auto RHS =
9315 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
9316 if (!RHS)
9317 return std::nullopt;
9318
9319 if (LHS->isConstantZero())
9320 return RHS;
9321 if (RHS->isConstantZero())
9322 return LHS;
9323 return std::nullopt;
9324 }
9325 case ISD::SHL: {
9326 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9327 if (!ShiftOp)
9328 return std::nullopt;
9329
9330 uint64_t BitShift = ShiftOp->getZExtValue();
9331
9332 if (BitShift % 8 != 0)
9333 return std::nullopt;
9334 uint64_t ByteShift = BitShift / 8;
9335
9336 // If we are shifting by an amount greater than the index we are trying to
9337 // provide, then do not provide anything. Otherwise, subtract the index by
9338 // the amount we shifted by.
9339 return Index < ByteShift
9341 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
9342 Depth + 1, VectorIndex, Index);
9343 }
9344 case ISD::ANY_EXTEND:
9345 case ISD::SIGN_EXTEND:
9346 case ISD::ZERO_EXTEND: {
9347 SDValue NarrowOp = Op->getOperand(0);
9348 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9349 if (NarrowBitWidth % 8 != 0)
9350 return std::nullopt;
9351 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9352
9353 if (Index >= NarrowByteWidth)
9354 return Op.getOpcode() == ISD::ZERO_EXTEND
9355 ? std::optional<SDByteProvider>(
9357 : std::nullopt;
9358 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
9359 StartingIndex);
9360 }
9361 case ISD::BSWAP:
9362 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
9363 Depth + 1, VectorIndex, StartingIndex);
9365 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9366 if (!OffsetOp)
9367 return std::nullopt;
9368
9369 VectorIndex = OffsetOp->getZExtValue();
9370
9371 SDValue NarrowOp = Op->getOperand(0);
9372 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9373 if (NarrowBitWidth % 8 != 0)
9374 return std::nullopt;
9375 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9376 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
9377 // type, leaving the high bits undefined.
9378 if (Index >= NarrowByteWidth)
9379 return std::nullopt;
9380
9381 // Check to see if the position of the element in the vector corresponds
9382 // with the byte we are trying to provide for. In the case of a vector of
9383 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
9384 // the element will provide a range of bytes. For example, if we have a
9385 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
9386 // 3).
9387 if (*VectorIndex * NarrowByteWidth > StartingIndex)
9388 return std::nullopt;
9389 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
9390 return std::nullopt;
9391
9392 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
9393 VectorIndex, StartingIndex);
9394 }
9395 case ISD::LOAD: {
9396 auto L = cast<LoadSDNode>(Op.getNode());
9397 if (!L->isSimple() || L->isIndexed())
9398 return std::nullopt;
9399
9400 unsigned NarrowBitWidth = L->getMemoryVT().getScalarSizeInBits();
9401 if (NarrowBitWidth % 8 != 0)
9402 return std::nullopt;
9403 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9404
9405 // If the width of the load does not reach byte we are trying to provide for
9406 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
9407 // question
9408 if (Index >= NarrowByteWidth)
9409 return L->getExtensionType() == ISD::ZEXTLOAD
9410 ? std::optional<SDByteProvider>(
9412 : std::nullopt;
9413
9414 unsigned BPVectorIndex = VectorIndex.value_or(0U);
9415 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
9416 }
9417 }
9418
9419 return std::nullopt;
9420}
9421
9422static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
9423 return i;
9424}
9425
9426static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
9427 return BW - i - 1;
9428}
9429
9430// Check if the bytes offsets we are looking at match with either big or
9431// little endian value loaded. Return true for big endian, false for little
9432// endian, and std::nullopt if match failed.
9433static std::optional<bool> isBigEndian(ArrayRef<int64_t> ByteOffsets,
9434 int64_t FirstOffset) {
9435 // The endian can be decided only when it is 2 bytes at least.
9436 unsigned Width = ByteOffsets.size();
9437 if (Width < 2)
9438 return std::nullopt;
9439
9440 bool BigEndian = true, LittleEndian = true;
9441 for (unsigned i = 0; i < Width; i++) {
9442 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9443 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
9444 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
9445 if (!BigEndian && !LittleEndian)
9446 return std::nullopt;
9447 }
9448
9449 assert((BigEndian != LittleEndian) && "It should be either big endian or"
9450 "little endian");
9451 return BigEndian;
9452}
9453
9454// Look through one layer of truncate or extend.
9456 switch (Value.getOpcode()) {
9457 case ISD::TRUNCATE:
9458 case ISD::ZERO_EXTEND:
9459 case ISD::SIGN_EXTEND:
9460 case ISD::ANY_EXTEND:
9461 return Value.getOperand(0);
9462 }
9463 return SDValue();
9464}
9465
9466/// Match a pattern where a wide type scalar value is stored by several narrow
9467/// stores. Fold it into a single store or a BSWAP and a store if the targets
9468/// supports it.
9469///
9470/// Assuming little endian target:
9471/// i8 *p = ...
9472/// i32 val = ...
9473/// p[0] = (val >> 0) & 0xFF;
9474/// p[1] = (val >> 8) & 0xFF;
9475/// p[2] = (val >> 16) & 0xFF;
9476/// p[3] = (val >> 24) & 0xFF;
9477/// =>
9478/// *((i32)p) = val;
9479///
9480/// i8 *p = ...
9481/// i32 val = ...
9482/// p[0] = (val >> 24) & 0xFF;
9483/// p[1] = (val >> 16) & 0xFF;
9484/// p[2] = (val >> 8) & 0xFF;
9485/// p[3] = (val >> 0) & 0xFF;
9486/// =>
9487/// *((i32)p) = BSWAP(val);
9488SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
9489 // The matching looks for "store (trunc x)" patterns that appear early but are
9490 // likely to be replaced by truncating store nodes during combining.
9491 // TODO: If there is evidence that running this later would help, this
9492 // limitation could be removed. Legality checks may need to be added
9493 // for the created store and optional bswap/rotate.
9494 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
9495 return SDValue();
9496
9497 // We only handle merging simple stores of 1-4 bytes.
9498 // TODO: Allow unordered atomics when wider type is legal (see D66309)
9499 EVT MemVT = N->getMemoryVT();
9500 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
9501 !N->isSimple() || N->isIndexed())
9502 return SDValue();
9503
9504 // Collect all of the stores in the chain, upto the maximum store width (i64).
9505 SDValue Chain = N->getChain();
9507 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
9508 unsigned MaxWideNumBits = 64;
9509 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9510 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
9511 // All stores must be the same size to ensure that we are writing all of the
9512 // bytes in the wide value.
9513 // This store should have exactly one use as a chain operand for another
9514 // store in the merging set. If there are other chain uses, then the
9515 // transform may not be safe because order of loads/stores outside of this
9516 // set may not be preserved.
9517 // TODO: We could allow multiple sizes by tracking each stored byte.
9518 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9519 Store->isIndexed() || !Store->hasOneUse())
9520 return SDValue();
9521 Stores.push_back(Store);
9522 Chain = Store->getChain();
9523 if (MaxStores < Stores.size())
9524 return SDValue();
9525 }
9526 // There is no reason to continue if we do not have at least a pair of stores.
9527 if (Stores.size() < 2)
9528 return SDValue();
9529
9530 // Handle simple types only.
9531 LLVMContext &Context = *DAG.getContext();
9532 unsigned NumStores = Stores.size();
9533 unsigned WideNumBits = NumStores * NarrowNumBits;
9534 if (WideNumBits != 16 && WideNumBits != 32 && WideNumBits != 64)
9535 return SDValue();
9536
9537 // Check if all bytes of the source value that we are looking at are stored
9538 // to the same base address. Collect offsets from Base address into OffsetMap.
9539 SDValue SourceValue;
9540 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
9541 int64_t FirstOffset = INT64_MAX;
9542 StoreSDNode *FirstStore = nullptr;
9543 std::optional<BaseIndexOffset> Base;
9544 for (auto *Store : Stores) {
9545 // All the stores store different parts of the CombinedValue. A truncate is
9546 // required to get the partial value.
9547 SDValue Trunc = Store->getValue();
9548 if (Trunc.getOpcode() != ISD::TRUNCATE)
9549 return SDValue();
9550 // Other than the first/last part, a shift operation is required to get the
9551 // offset.
9552 int64_t Offset = 0;
9553 SDValue WideVal = Trunc.getOperand(0);
9554 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
9555 isa<ConstantSDNode>(WideVal.getOperand(1))) {
9556 // The shift amount must be a constant multiple of the narrow type.
9557 // It is translated to the offset address in the wide source value "y".
9558 //
9559 // x = srl y, ShiftAmtC
9560 // i8 z = trunc x
9561 // store z, ...
9562 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
9563 if (ShiftAmtC % NarrowNumBits != 0)
9564 return SDValue();
9565
9566 // Make sure we aren't reading bits that are shifted in.
9567 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9568 return SDValue();
9569
9570 Offset = ShiftAmtC / NarrowNumBits;
9571 WideVal = WideVal.getOperand(0);
9572 }
9573
9574 // Stores must share the same source value with different offsets.
9575 if (!SourceValue)
9576 SourceValue = WideVal;
9577 else if (SourceValue != WideVal) {
9578 // Truncate and extends can be stripped to see if the values are related.
9579 if (stripTruncAndExt(SourceValue) != WideVal &&
9580 stripTruncAndExt(WideVal) != SourceValue)
9581 return SDValue();
9582
9583 if (WideVal.getScalarValueSizeInBits() >
9584 SourceValue.getScalarValueSizeInBits())
9585 SourceValue = WideVal;
9586
9587 // Give up if the source value type is smaller than the store size.
9588 if (SourceValue.getScalarValueSizeInBits() < WideNumBits)
9589 return SDValue();
9590 }
9591
9592 // Stores must share the same base address.
9593 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
9594 int64_t ByteOffsetFromBase = 0;
9595 if (!Base)
9596 Base = Ptr;
9597 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9598 return SDValue();
9599
9600 // Remember the first store.
9601 if (ByteOffsetFromBase < FirstOffset) {
9602 FirstStore = Store;
9603 FirstOffset = ByteOffsetFromBase;
9604 }
9605 // Map the offset in the store and the offset in the combined value, and
9606 // early return if it has been set before.
9607 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9608 return SDValue();
9609 OffsetMap[Offset] = ByteOffsetFromBase;
9610 }
9611
9612 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
9613
9614 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9615 assert(FirstStore && "First store must be set");
9616
9617 // Check that a store of the wide type is both allowed and fast on the target
9618 const DataLayout &Layout = DAG.getDataLayout();
9619 unsigned Fast = 0;
9620 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9621 *FirstStore->getMemOperand(), &Fast);
9622 if (!Allowed || !Fast)
9623 return SDValue();
9624
9625 // Check if the pieces of the value are going to the expected places in memory
9626 // to merge the stores.
9627 auto checkOffsets = [&](bool MatchLittleEndian) {
9628 if (MatchLittleEndian) {
9629 for (unsigned i = 0; i != NumStores; ++i)
9630 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9631 return false;
9632 } else { // MatchBigEndian by reversing loop counter.
9633 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9634 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9635 return false;
9636 }
9637 return true;
9638 };
9639
9640 // Check if the offsets line up for the native data layout of this target.
9641 bool NeedBswap = false;
9642 bool NeedRotate = false;
9643 if (!checkOffsets(Layout.isLittleEndian())) {
9644 // Special-case: check if byte offsets line up for the opposite endian.
9645 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9646 NeedBswap = true;
9647 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9648 NeedRotate = true;
9649 else
9650 return SDValue();
9651 }
9652
9653 SDLoc DL(N);
9654 if (WideVT != SourceValue.getValueType()) {
9655 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9656 "Unexpected store value to merge");
9657 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9658 }
9659
9660 // Before legalize we can introduce illegal bswaps/rotates which will be later
9661 // converted to an explicit bswap sequence. This way we end up with a single
9662 // store and byte shuffling instead of several stores and byte shuffling.
9663 if (NeedBswap) {
9664 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9665 } else if (NeedRotate) {
9666 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9667 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9668 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9669 }
9670
9671 SDValue NewStore =
9672 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9673 FirstStore->getPointerInfo(), FirstStore->getAlign());
9674
9675 // Rely on other DAG combine rules to remove the other individual stores.
9676 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9677 return NewStore;
9678}
9679
9680/// Match a pattern where a wide type scalar value is loaded by several narrow
9681/// loads and combined by shifts and ors. Fold it into a single load or a load
9682/// and a BSWAP if the targets supports it.
9683///
9684/// Assuming little endian target:
9685/// i8 *a = ...
9686/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9687/// =>
9688/// i32 val = *((i32)a)
9689///
9690/// i8 *a = ...
9691/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9692/// =>
9693/// i32 val = BSWAP(*((i32)a))
9694///
9695/// TODO: This rule matches complex patterns with OR node roots and doesn't
9696/// interact well with the worklist mechanism. When a part of the pattern is
9697/// updated (e.g. one of the loads) its direct users are put into the worklist,
9698/// but the root node of the pattern which triggers the load combine is not
9699/// necessarily a direct user of the changed node. For example, once the address
9700/// of t28 load is reassociated load combine won't be triggered:
9701/// t25: i32 = add t4, Constant:i32<2>
9702/// t26: i64 = sign_extend t25
9703/// t27: i64 = add t2, t26
9704/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9705/// t29: i32 = zero_extend t28
9706/// t32: i32 = shl t29, Constant:i8<8>
9707/// t33: i32 = or t23, t32
9708/// As a possible fix visitLoad can check if the load can be a part of a load
9709/// combine pattern and add corresponding OR roots to the worklist.
9710SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9711 assert(N->getOpcode() == ISD::OR &&
9712 "Can only match load combining against OR nodes");
9713
9714 // Handles simple types only
9715 EVT VT = N->getValueType(0);
9716 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9717 return SDValue();
9718 unsigned ByteWidth = VT.getSizeInBits() / 8;
9719
9720 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9721 auto MemoryByteOffset = [&](SDByteProvider P) {
9722 assert(P.hasSrc() && "Must be a memory byte provider");
9723 auto *Load = cast<LoadSDNode>(P.Src.value());
9724
9725 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9726
9727 assert(LoadBitWidth % 8 == 0 &&
9728 "can only analyze providers for individual bytes not bit");
9729 unsigned LoadByteWidth = LoadBitWidth / 8;
9730 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9731 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9732 };
9733
9734 std::optional<BaseIndexOffset> Base;
9735 SDValue Chain;
9736
9737 SmallPtrSet<LoadSDNode *, 8> Loads;
9738 std::optional<SDByteProvider> FirstByteProvider;
9739 int64_t FirstOffset = INT64_MAX;
9740
9741 // Check if all the bytes of the OR we are looking at are loaded from the same
9742 // base address. Collect bytes offsets from Base address in ByteOffsets.
9743 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9744 unsigned ZeroExtendedBytes = 0;
9745 for (int i = ByteWidth - 1; i >= 0; --i) {
9746 auto P =
9747 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9748 /*StartingIndex*/ i);
9749 if (!P)
9750 return SDValue();
9751
9752 if (P->isConstantZero()) {
9753 // It's OK for the N most significant bytes to be 0, we can just
9754 // zero-extend the load.
9755 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9756 return SDValue();
9757 continue;
9758 }
9759 assert(P->hasSrc() && "provenance should either be memory or zero");
9760 auto *L = cast<LoadSDNode>(P->Src.value());
9761
9762 // All loads must share the same chain
9763 SDValue LChain = L->getChain();
9764 if (!Chain)
9765 Chain = LChain;
9766 else if (Chain != LChain)
9767 return SDValue();
9768
9769 // Loads must share the same base address
9770 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
9771 int64_t ByteOffsetFromBase = 0;
9772
9773 // For vector loads, the expected load combine pattern will have an
9774 // ExtractElement for each index in the vector. While each of these
9775 // ExtractElements will be accessing the same base address as determined
9776 // by the load instruction, the actual bytes they interact with will differ
9777 // due to different ExtractElement indices. To accurately determine the
9778 // byte position of an ExtractElement, we offset the base load ptr with
9779 // the index multiplied by the byte size of each element in the vector.
9780 if (L->getMemoryVT().isVector()) {
9781 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9782 if (LoadWidthInBit % 8 != 0)
9783 return SDValue();
9784 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9785 Ptr.addToOffset(ByteOffsetFromVector);
9786 }
9787
9788 if (!Base)
9789 Base = Ptr;
9790
9791 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9792 return SDValue();
9793
9794 // Calculate the offset of the current byte from the base address
9795 ByteOffsetFromBase += MemoryByteOffset(*P);
9796 ByteOffsets[i] = ByteOffsetFromBase;
9797
9798 // Remember the first byte load
9799 if (ByteOffsetFromBase < FirstOffset) {
9800 FirstByteProvider = P;
9801 FirstOffset = ByteOffsetFromBase;
9802 }
9803
9804 Loads.insert(L);
9805 }
9806
9807 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9808 "memory, so there must be at least one load which produces the value");
9809 assert(Base && "Base address of the accessed memory location must be set");
9810 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9811
9812 bool NeedsZext = ZeroExtendedBytes > 0;
9813
9814 EVT MemVT =
9815 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9816
9817 if (!MemVT.isSimple())
9818 return SDValue();
9819
9820 // Before legalize we can introduce too wide illegal loads which will be later
9821 // split into legal sized loads. This enables us to combine i64 load by i8
9822 // patterns to a couple of i32 loads on 32 bit targets.
9823 if (LegalOperations &&
9824 !TLI.isLoadExtLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, VT,
9825 MemVT))
9826 return SDValue();
9827
9828 // Check if the bytes of the OR we are looking at match with either big or
9829 // little endian value load
9830 std::optional<bool> IsBigEndian = isBigEndian(
9831 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9832 if (!IsBigEndian)
9833 return SDValue();
9834
9835 assert(FirstByteProvider && "must be set");
9836
9837 // Ensure that the first byte is loaded from zero offset of the first load.
9838 // So the combined value can be loaded from the first load address.
9839 if (MemoryByteOffset(*FirstByteProvider) != 0)
9840 return SDValue();
9841 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9842
9843 // The node we are looking at matches with the pattern, check if we can
9844 // replace it with a single (possibly zero-extended) load and bswap + shift if
9845 // needed.
9846
9847 // If the load needs byte swap check if the target supports it
9848 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9849
9850 // Before legalize we can introduce illegal bswaps which will be later
9851 // converted to an explicit bswap sequence. This way we end up with a single
9852 // load and byte shuffling instead of several loads and byte shuffling.
9853 // We do not introduce illegal bswaps when zero-extending as this tends to
9854 // introduce too many arithmetic instructions.
9855 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9856 !TLI.isOperationLegal(ISD::BSWAP, VT))
9857 return SDValue();
9858
9859 // If we need to bswap and zero extend, we have to insert a shift. Check that
9860 // it is legal.
9861 if (NeedsBswap && NeedsZext && LegalOperations &&
9862 !TLI.isOperationLegal(ISD::SHL, VT))
9863 return SDValue();
9864
9865 // Check that a load of the wide type is both allowed and fast on the target
9866 unsigned Fast = 0;
9867 bool Allowed =
9868 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9869 *FirstLoad->getMemOperand(), &Fast);
9870 if (!Allowed || !Fast)
9871 return SDValue();
9872
9873 SDValue NewLoad =
9874 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9875 Chain, FirstLoad->getBasePtr(),
9876 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9877
9878 // Transfer chain users from old loads to the new load.
9879 for (LoadSDNode *L : Loads)
9880 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9881
9882 if (!NeedsBswap)
9883 return NewLoad;
9884
9885 SDValue ShiftedLoad =
9886 NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9887 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9888 VT, SDLoc(N)))
9889 : NewLoad;
9890 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9891}
9892
9893// If the target has andn, bsl, or a similar bit-select instruction,
9894// we want to unfold masked merge, with canonical pattern of:
9895// | A | |B|
9896// ((x ^ y) & m) ^ y
9897// | D |
9898// Into:
9899// (x & m) | (y & ~m)
9900// If y is a constant, m is not a 'not', and the 'andn' does not work with
9901// immediates, we unfold into a different pattern:
9902// ~(~x & m) & (m | y)
9903// If x is a constant, m is a 'not', and the 'andn' does not work with
9904// immediates, we unfold into a different pattern:
9905// (x | ~m) & ~(~m & ~y)
9906// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9907// the very least that breaks andnpd / andnps patterns, and because those
9908// patterns are simplified in IR and shouldn't be created in the DAG
9909SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9910 assert(N->getOpcode() == ISD::XOR);
9911
9912 // Don't touch 'not' (i.e. where y = -1).
9913 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9914 return SDValue();
9915
9916 EVT VT = N->getValueType(0);
9917
9918 // There are 3 commutable operators in the pattern,
9919 // so we have to deal with 8 possible variants of the basic pattern.
9920 SDValue X, Y, M;
9921 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9922 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9923 return false;
9924 SDValue Xor = And.getOperand(XorIdx);
9925 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9926 return false;
9927 SDValue Xor0 = Xor.getOperand(0);
9928 SDValue Xor1 = Xor.getOperand(1);
9929 // Don't touch 'not' (i.e. where y = -1).
9930 if (isAllOnesOrAllOnesSplat(Xor1))
9931 return false;
9932 if (Other == Xor0)
9933 std::swap(Xor0, Xor1);
9934 if (Other != Xor1)
9935 return false;
9936 X = Xor0;
9937 Y = Xor1;
9938 M = And.getOperand(XorIdx ? 0 : 1);
9939 return true;
9940 };
9941
9942 SDValue N0 = N->getOperand(0);
9943 SDValue N1 = N->getOperand(1);
9944 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9945 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9946 return SDValue();
9947
9948 // Don't do anything if the mask is constant. This should not be reachable.
9949 // InstCombine should have already unfolded this pattern, and DAGCombiner
9950 // probably shouldn't produce it, too.
9951 if (isa<ConstantSDNode>(M.getNode()))
9952 return SDValue();
9953
9954 // We can transform if the target has AndNot
9955 if (!TLI.hasAndNot(M))
9956 return SDValue();
9957
9958 SDLoc DL(N);
9959
9960 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9961 // a bitwise not that would already allow ANDN to be used.
9962 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9963 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9964 // If not, we need to do a bit more work to make sure andn is still used.
9965 SDValue NotX = DAG.getNOT(DL, X, VT);
9966 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9967 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9968 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9969 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9970 }
9971
9972 // If X is a constant and M is a bitwise not, check that 'andn' works with
9973 // immediates.
9974 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9975 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9976 // If not, we need to do a bit more work to make sure andn is still used.
9977 SDValue NotM = M.getOperand(0);
9978 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9979 SDValue NotY = DAG.getNOT(DL, Y, VT);
9980 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9981 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9982 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9983 }
9984
9985 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9986 SDValue NotM = DAG.getNOT(DL, M, VT);
9987 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9988
9989 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9990}
9991
9992SDValue DAGCombiner::visitXOR(SDNode *N) {
9993 SDValue N0 = N->getOperand(0);
9994 SDValue N1 = N->getOperand(1);
9995 EVT VT = N0.getValueType();
9996 SDLoc DL(N);
9997
9998 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9999 if (N0.isUndef() && N1.isUndef())
10000 return DAG.getConstant(0, DL, VT);
10001
10002 // fold (xor x, undef) -> undef
10003 if (N0.isUndef())
10004 return N0;
10005 if (N1.isUndef())
10006 return N1;
10007
10008 // fold (xor c1, c2) -> c1^c2
10009 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
10010 return C;
10011
10012 // canonicalize constant to RHS
10015 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
10016
10017 // fold vector ops
10018 if (VT.isVector()) {
10019 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10020 return FoldedVOp;
10021
10022 // fold (xor x, 0) -> x, vector edition
10024 return N0;
10025 }
10026
10027 // fold (xor x, 0) -> x
10028 if (isNullConstant(N1))
10029 return N0;
10030
10031 if (SDValue NewSel = foldBinOpIntoSelect(N))
10032 return NewSel;
10033
10034 // reassociate xor
10035 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
10036 return RXOR;
10037
10038 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
10039 if (SDValue SD =
10040 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
10041 return SD;
10042
10043 // fold (a^b) -> (a|b) iff a and b share no bits.
10044 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
10045 DAG.haveNoCommonBitsSet(N0, N1))
10046 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
10047
10048 // look for 'add-like' folds:
10049 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
10050 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
10052 if (SDValue Combined = visitADDLike(N))
10053 return Combined;
10054
10055 // fold not (setcc x, y, cc) -> setcc x y !cc
10056 // Avoid breaking: and (not(setcc x, y, cc), z) -> andn for vec
10057 unsigned N0Opcode = N0.getOpcode();
10058 SDValue LHS, RHS, CC;
10059 if (TLI.isConstTrueVal(N1) &&
10060 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) &&
10061 !(VT.isVector() && TLI.hasAndNot(SDValue(N, 0)) && N->hasOneUse() &&
10062 N->use_begin()->getUser()->getOpcode() == ISD::AND)) {
10064 LHS.getValueType());
10065 if (!LegalOperations ||
10066 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
10067 switch (N0Opcode) {
10068 default:
10069 llvm_unreachable("Unhandled SetCC Equivalent!");
10070 case ISD::SETCC:
10071 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
10072 case ISD::SELECT_CC:
10073 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
10074 N0.getOperand(3), NotCC);
10075 case ISD::STRICT_FSETCC:
10076 case ISD::STRICT_FSETCCS: {
10077 if (N0.hasOneUse()) {
10078 // FIXME Can we handle multiple uses? Could we token factor the chain
10079 // results from the new/old setcc?
10080 SDValue SetCC =
10081 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
10082 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
10083 CombineTo(N, SetCC);
10084 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
10085 recursivelyDeleteUnusedNodes(N0.getNode());
10086 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10087 }
10088 break;
10089 }
10090 }
10091 }
10092 }
10093
10094 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
10095 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10096 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
10097 SDValue V = N0.getOperand(0);
10098 SDLoc DL0(N0);
10099 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
10100 DAG.getConstant(1, DL0, V.getValueType()));
10101 AddToWorklist(V.getNode());
10102 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
10103 }
10104
10105 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
10106 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
10107 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
10108 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
10109 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
10110 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
10111 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10112 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
10113 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
10114 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10115 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10116 }
10117 }
10118 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
10119 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
10120 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
10121 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
10122 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
10123 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
10124 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10125 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
10126 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
10127 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10128 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10129 }
10130 }
10131
10132 // fold (not (sub Y, X)) -> (add X, ~Y) if Y is a constant
10133 if (N0.getOpcode() == ISD::SUB && isAllOnesConstant(N1)) {
10134 SDValue Y = N0.getOperand(0);
10135 SDValue X = N0.getOperand(1);
10136
10137 if (auto *YConst = dyn_cast<ConstantSDNode>(Y)) {
10138 APInt NotYValue = ~YConst->getAPIntValue();
10139 SDValue NotY = DAG.getConstant(NotYValue, DL, VT);
10140 return DAG.getNode(ISD::ADD, DL, VT, X, NotY, N->getFlags());
10141 }
10142 }
10143
10144 // fold (not (add X, -1)) -> (neg X)
10145 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isAllOnesConstant(N1) &&
10147 return DAG.getNegative(N0.getOperand(0), DL, VT);
10148 }
10149
10150 // fold (xor (and x, y), y) -> (and (not x), y)
10151 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
10152 SDValue X = N0.getOperand(0);
10153 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
10154 AddToWorklist(NotX.getNode());
10155 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
10156 }
10157
10158 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
10159 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
10160 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
10161 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
10162 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
10163 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
10164 SDValue S0 = S.getOperand(0);
10165 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
10166 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
10167 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
10168 return DAG.getNode(ISD::ABS, DL, VT, S0);
10169 }
10170 }
10171
10172 // fold (xor x, x) -> 0
10173 if (N0 == N1)
10174 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
10175
10176 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
10177 // Here is a concrete example of this equivalence:
10178 // i16 x == 14
10179 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
10180 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
10181 //
10182 // =>
10183 //
10184 // i16 ~1 == 0b1111111111111110
10185 // i16 rol(~1, 14) == 0b1011111111111111
10186 //
10187 // Some additional tips to help conceptualize this transform:
10188 // - Try to see the operation as placing a single zero in a value of all ones.
10189 // - There exists no value for x which would allow the result to contain zero.
10190 // - Values of x larger than the bitwidth are undefined and do not require a
10191 // consistent result.
10192 // - Pushing the zero left requires shifting one bits in from the right.
10193 // A rotate left of ~1 is a nice way of achieving the desired result.
10194 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
10196 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
10197 N0.getOperand(1));
10198 }
10199
10200 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
10201 if (N0Opcode == N1.getOpcode())
10202 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
10203 return V;
10204
10205 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
10206 return R;
10207 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
10208 return R;
10209 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
10210 return R;
10211
10212 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
10213 if (SDValue MM = unfoldMaskedMerge(N))
10214 return MM;
10215
10216 // Simplify the expression using non-local knowledge.
10218 return SDValue(N, 0);
10219
10220 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
10221 return Combined;
10222
10223 // fold (xor (smin(x, C), C)) -> select (x < C), xor(x, C), 0
10224 // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0
10225 // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0
10226 // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0
10227 SDValue Op0;
10228 if (sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Specific(N1)),
10229 m_SMax(m_Value(Op0), m_Specific(N1)),
10230 m_UMin(m_Value(Op0), m_Specific(N1)),
10231 m_UMax(m_Value(Op0), m_Specific(N1)))))) {
10232
10233 if (isa<ConstantSDNode>(N1) ||
10235 // For vectors, only optimize when the constant is zero or all-ones to
10236 // avoid generating more instructions
10237 if (VT.isVector()) {
10238 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10239 if (!N1C || (!N1C->isZero() && !N1C->isAllOnes()))
10240 return SDValue();
10241 }
10242
10243 // Avoid the fold if the minmax operation is legal and select is expensive
10244 if (TLI.isOperationLegal(N0.getOpcode(), VT) &&
10246 return SDValue();
10247
10248 EVT CCVT = getSetCCResultType(VT);
10249 ISD::CondCode CC;
10250 switch (N0.getOpcode()) {
10251 case ISD::SMIN:
10252 CC = ISD::SETLT;
10253 break;
10254 case ISD::SMAX:
10255 CC = ISD::SETGT;
10256 break;
10257 case ISD::UMIN:
10258 CC = ISD::SETULT;
10259 break;
10260 case ISD::UMAX:
10261 CC = ISD::SETUGT;
10262 break;
10263 }
10264 SDValue FN1 = DAG.getFreeze(N1);
10265 SDValue Cmp = DAG.getSetCC(DL, CCVT, Op0, FN1, CC);
10266 SDValue XorXC = DAG.getNode(ISD::XOR, DL, VT, Op0, FN1);
10267 SDValue Zero = DAG.getConstant(0, DL, VT);
10268 return DAG.getSelect(DL, VT, Cmp, XorXC, Zero);
10269 }
10270 }
10271
10272 return SDValue();
10273}
10274
10275/// If we have a shift-by-constant of a bitwise logic op that itself has a
10276/// shift-by-constant operand with identical opcode, we may be able to convert
10277/// that into 2 independent shifts followed by the logic op. This is a
10278/// throughput improvement.
10280 // Match a one-use bitwise logic op.
10281 SDValue LogicOp = Shift->getOperand(0);
10282 if (!LogicOp.hasOneUse())
10283 return SDValue();
10284
10285 unsigned LogicOpcode = LogicOp.getOpcode();
10286 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
10287 LogicOpcode != ISD::XOR)
10288 return SDValue();
10289
10290 // Find a matching one-use shift by constant.
10291 unsigned ShiftOpcode = Shift->getOpcode();
10292 SDValue C1 = Shift->getOperand(1);
10293 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
10294 assert(C1Node && "Expected a shift with constant operand");
10295 const APInt &C1Val = C1Node->getAPIntValue();
10296 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
10297 const APInt *&ShiftAmtVal) {
10298 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
10299 return false;
10300
10301 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
10302 if (!ShiftCNode)
10303 return false;
10304
10305 // Capture the shifted operand and shift amount value.
10306 ShiftOp = V.getOperand(0);
10307 ShiftAmtVal = &ShiftCNode->getAPIntValue();
10308
10309 // Shift amount types do not have to match their operand type, so check that
10310 // the constants are the same width.
10311 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
10312 return false;
10313
10314 // The fold is not valid if the sum of the shift values doesn't fit in the
10315 // given shift amount type.
10316 bool Overflow = false;
10317 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
10318 if (Overflow)
10319 return false;
10320
10321 // The fold is not valid if the sum of the shift values exceeds bitwidth.
10322 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
10323 return false;
10324
10325 return true;
10326 };
10327
10328 // Logic ops are commutative, so check each operand for a match.
10329 SDValue X, Y;
10330 const APInt *C0Val;
10331 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
10332 Y = LogicOp.getOperand(1);
10333 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
10334 Y = LogicOp.getOperand(0);
10335 else
10336 return SDValue();
10337
10338 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
10339 SDLoc DL(Shift);
10340 EVT VT = Shift->getValueType(0);
10341 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
10342 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
10343 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
10344 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
10345 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
10346 LogicOp->getFlags());
10347}
10348
10349/// Handle transforms common to the three shifts, when the shift amount is a
10350/// constant.
10351/// We are looking for: (shift being one of shl/sra/srl)
10352/// shift (binop X, C0), C1
10353/// And want to transform into:
10354/// binop (shift X, C1), (shift C0, C1)
10355SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
10356 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
10357
10358 // Do not turn a 'not' into a regular xor.
10359 if (isBitwiseNot(N->getOperand(0)))
10360 return SDValue();
10361
10362 // The inner binop must be one-use, since we want to replace it.
10363 SDValue LHS = N->getOperand(0);
10364 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
10365 return SDValue();
10366
10367 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
10368 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
10369 return R;
10370
10371 // We want to pull some binops through shifts, so that we have (and (shift))
10372 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
10373 // thing happens with address calculations, so it's important to canonicalize
10374 // it.
10375 switch (LHS.getOpcode()) {
10376 default:
10377 return SDValue();
10378 case ISD::OR:
10379 case ISD::XOR:
10380 case ISD::AND:
10381 break;
10382 case ISD::ADD:
10383 if (N->getOpcode() != ISD::SHL)
10384 return SDValue(); // only shl(add) not sr[al](add).
10385 break;
10386 }
10387
10388 // FIXME: disable this unless the input to the binop is a shift by a constant
10389 // or is copy/select. Enable this in other cases when figure out it's exactly
10390 // profitable.
10391 SDValue BinOpLHSVal = LHS.getOperand(0);
10392 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
10393 BinOpLHSVal.getOpcode() == ISD::SRA ||
10394 BinOpLHSVal.getOpcode() == ISD::SRL) &&
10395 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
10396 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
10397 BinOpLHSVal.getOpcode() == ISD::SELECT;
10398
10399 if (!IsShiftByConstant && !IsCopyOrSelect)
10400 return SDValue();
10401
10402 if (IsCopyOrSelect && N->hasOneUse())
10403 return SDValue();
10404
10405 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
10406 SDLoc DL(N);
10407 EVT VT = N->getValueType(0);
10408 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
10409 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
10410 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
10411 N->getOperand(1));
10412 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
10413 }
10414
10415 return SDValue();
10416}
10417
10418SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
10419 assert(N->getOpcode() == ISD::TRUNCATE);
10420 assert(N->getOperand(0).getOpcode() == ISD::AND);
10421
10422 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
10423 EVT TruncVT = N->getValueType(0);
10424 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
10425 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
10426 SDValue N01 = N->getOperand(0).getOperand(1);
10427 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
10428 SDLoc DL(N);
10429 SDValue N00 = N->getOperand(0).getOperand(0);
10430 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
10431 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
10432 AddToWorklist(Trunc00.getNode());
10433 AddToWorklist(Trunc01.getNode());
10434 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
10435 }
10436 }
10437
10438 return SDValue();
10439}
10440
10441SDValue DAGCombiner::visitRotate(SDNode *N) {
10442 SDLoc dl(N);
10443 SDValue N0 = N->getOperand(0);
10444 SDValue N1 = N->getOperand(1);
10445 EVT VT = N->getValueType(0);
10446 unsigned Bitsize = VT.getScalarSizeInBits();
10447
10448 // fold (rot x, 0) -> x
10449 if (isNullOrNullSplat(N1))
10450 return N0;
10451
10452 // fold (rot x, c) -> x iff (c % BitSize) == 0
10453 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
10454 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
10455 if (DAG.MaskedValueIsZero(N1, ModuloMask))
10456 return N0;
10457 }
10458
10459 // fold (rot x, c) -> (rot x, c % BitSize)
10460 bool OutOfRange = false;
10461 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
10462 OutOfRange |= C->getAPIntValue().uge(Bitsize);
10463 return true;
10464 };
10465 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
10466 EVT AmtVT = N1.getValueType();
10467 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
10468 if (SDValue Amt =
10469 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
10470 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
10471 }
10472
10473 // rot i16 X, 8 --> bswap X
10474 auto *RotAmtC = isConstOrConstSplat(N1);
10475 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
10476 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
10477 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
10478
10479 // Simplify the operands using demanded-bits information.
10481 return SDValue(N, 0);
10482
10483 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
10484 if (N1.getOpcode() == ISD::TRUNCATE &&
10485 N1.getOperand(0).getOpcode() == ISD::AND) {
10486 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10487 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
10488 }
10489
10490 unsigned NextOp = N0.getOpcode();
10491
10492 // fold (rot* (rot* x, c2), c1)
10493 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10494 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
10495 bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
10497 if (C1 && C2 && N1.getValueType() == N0.getOperand(1).getValueType()) {
10498 EVT ShiftVT = N1.getValueType();
10499 bool SameSide = (N->getOpcode() == NextOp);
10500 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
10501 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
10502 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10503 {N1, BitsizeC});
10504 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10505 {N0.getOperand(1), BitsizeC});
10506 if (Norm1 && Norm2)
10507 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
10508 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
10509 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
10510 {CombinedShift, BitsizeC});
10511 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
10512 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
10513 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10514 CombinedShiftNorm);
10515 }
10516 }
10517 }
10518 return SDValue();
10519}
10520
10521SDValue DAGCombiner::visitSHL(SDNode *N) {
10522 SDValue N0 = N->getOperand(0);
10523 SDValue N1 = N->getOperand(1);
10524 if (SDValue V = DAG.simplifyShift(N0, N1))
10525 return V;
10526
10527 SDLoc DL(N);
10528 EVT VT = N0.getValueType();
10529 EVT ShiftVT = N1.getValueType();
10530 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10531
10532 // fold (shl c1, c2) -> c1<<c2
10533 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
10534 return C;
10535
10536 // fold vector ops
10537 if (VT.isVector()) {
10538 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10539 return FoldedVOp;
10540
10541 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
10542 // If setcc produces all-one true value then:
10543 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10544 if (N1CV && N1CV->isConstant()) {
10545 if (N0.getOpcode() == ISD::AND) {
10546 SDValue N00 = N0->getOperand(0);
10547 SDValue N01 = N0->getOperand(1);
10548 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
10549
10550 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10553 if (SDValue C =
10554 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
10555 return DAG.getNode(ISD::AND, DL, VT, N00, C);
10556 }
10557 }
10558 }
10559 }
10560
10561 if (SDValue NewSel = foldBinOpIntoSelect(N))
10562 return NewSel;
10563
10564 // if (shl x, c) is known to be zero, return 0
10565 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10566 return DAG.getConstant(0, DL, VT);
10567
10568 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10569 if (N1.getOpcode() == ISD::TRUNCATE &&
10570 N1.getOperand(0).getOpcode() == ISD::AND) {
10571 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10572 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
10573 }
10574
10575 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10576 if (N0.getOpcode() == ISD::SHL) {
10577 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10578 ConstantSDNode *RHS) {
10579 APInt c1 = LHS->getAPIntValue();
10580 APInt c2 = RHS->getAPIntValue();
10581 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10582 return (c1 + c2).uge(OpSizeInBits);
10583 };
10584 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10585 return DAG.getConstant(0, DL, VT);
10586
10587 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10588 ConstantSDNode *RHS) {
10589 APInt c1 = LHS->getAPIntValue();
10590 APInt c2 = RHS->getAPIntValue();
10591 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10592 return (c1 + c2).ult(OpSizeInBits);
10593 };
10594 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10595 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10596 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
10597 }
10598 }
10599
10600 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10601 // For this to be valid, the second form must not preserve any of the bits
10602 // that are shifted out by the inner shift in the first form. This means
10603 // the outer shift size must be >= the number of bits added by the ext.
10604 // As a corollary, we don't care what kind of ext it is.
10605 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
10606 N0.getOpcode() == ISD::ANY_EXTEND ||
10607 N0.getOpcode() == ISD::SIGN_EXTEND) &&
10608 N0.getOperand(0).getOpcode() == ISD::SHL) {
10609 SDValue N0Op0 = N0.getOperand(0);
10610 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10611 EVT InnerVT = N0Op0.getValueType();
10612 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
10613
10614 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10615 ConstantSDNode *RHS) {
10616 APInt c1 = LHS->getAPIntValue();
10617 APInt c2 = RHS->getAPIntValue();
10618 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10619 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10620 (c1 + c2).uge(OpSizeInBits);
10621 };
10622 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
10623 /*AllowUndefs*/ false,
10624 /*AllowTypeMismatch*/ true))
10625 return DAG.getConstant(0, DL, VT);
10626
10627 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10628 ConstantSDNode *RHS) {
10629 APInt c1 = LHS->getAPIntValue();
10630 APInt c2 = RHS->getAPIntValue();
10631 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10632 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10633 (c1 + c2).ult(OpSizeInBits);
10634 };
10635 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10636 /*AllowUndefs*/ false,
10637 /*AllowTypeMismatch*/ true)) {
10638 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10639 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10640 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10641 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10642 }
10643 }
10644
10645 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10646 // Only fold this if the inner zext has no other uses to avoid increasing
10647 // the total number of instructions.
10648 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10649 N0.getOperand(0).getOpcode() == ISD::SRL) {
10650 SDValue N0Op0 = N0.getOperand(0);
10651 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10652
10653 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10654 APInt c1 = LHS->getAPIntValue();
10655 APInt c2 = RHS->getAPIntValue();
10656 zeroExtendToMatch(c1, c2);
10657 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10658 };
10659 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10660 /*AllowUndefs*/ false,
10661 /*AllowTypeMismatch*/ true)) {
10662 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10663 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10664 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10665 AddToWorklist(NewSHL.getNode());
10666 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10667 }
10668 }
10669
10670 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10671 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10672 ConstantSDNode *RHS) {
10673 const APInt &LHSC = LHS->getAPIntValue();
10674 const APInt &RHSC = RHS->getAPIntValue();
10675 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10676 LHSC.getZExtValue() <= RHSC.getZExtValue();
10677 };
10678
10679 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10680 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10681 if (N0->getFlags().hasExact()) {
10682 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10683 /*AllowUndefs*/ false,
10684 /*AllowTypeMismatch*/ true)) {
10685 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10686 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10687 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10688 }
10689 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10690 /*AllowUndefs*/ false,
10691 /*AllowTypeMismatch*/ true)) {
10692 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10693 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10694 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10695 }
10696 }
10697
10698 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10699 // (and (srl x, (sub c1, c2), MASK)
10700 // Only fold this if the inner shift has no other uses -- if it does,
10701 // folding this will increase the total number of instructions.
10702 if (N0.getOpcode() == ISD::SRL &&
10703 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10705 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10706 /*AllowUndefs*/ false,
10707 /*AllowTypeMismatch*/ true)) {
10708 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10709 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10710 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10711 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10712 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10713 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10714 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10715 }
10716 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10717 /*AllowUndefs*/ false,
10718 /*AllowTypeMismatch*/ true)) {
10719 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10720 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10721 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10722 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10723 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10724 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10725 }
10726 }
10727 }
10728
10729 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10730 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10731 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10732 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10733 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10734 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10735 }
10736
10737 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10738 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10739 // Variant of version done on multiply, except mul by a power of 2 is turned
10740 // into a shift.
10741 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10742 TLI.isDesirableToCommuteWithShift(N, Level)) {
10743 SDValue N01 = N0.getOperand(1);
10744 if (SDValue Shl1 =
10745 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10746 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10747 AddToWorklist(Shl0.getNode());
10748 SDNodeFlags Flags;
10749 // Preserve the disjoint flag for Or.
10750 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10752 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10753 }
10754 }
10755
10756 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10757 // TODO: Add zext/add_nuw variant with suitable test coverage
10758 // TODO: Should we limit this with isLegalAddImmediate?
10759 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10760 N0.getOperand(0).getOpcode() == ISD::ADD &&
10761 N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
10762 TLI.isDesirableToCommuteWithShift(N, Level)) {
10763 SDValue Add = N0.getOperand(0);
10764 SDLoc DL(N0);
10765 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10766 {Add.getOperand(1)})) {
10767 if (SDValue ShlC =
10768 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10769 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10770 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10771 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10772 }
10773 }
10774 }
10775
10776 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10777 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10778 SDValue N01 = N0.getOperand(1);
10779 if (SDValue Shl =
10780 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10781 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10782 }
10783
10784 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10785 if (N1C && !N1C->isOpaque())
10786 if (SDValue NewSHL = visitShiftByConstant(N))
10787 return NewSHL;
10788
10789 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10790 // target.
10791 if (((N1.getOpcode() == ISD::CTTZ &&
10792 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10794 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10796 SDValue Y = N1.getOperand(0);
10797 SDLoc DL(N);
10798 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10799 SDValue And =
10800 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10801 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10802 }
10803
10805 return SDValue(N, 0);
10806
10807 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10808 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10809 const APInt &C0 = N0.getConstantOperandAPInt(0);
10810 const APInt &C1 = N1C->getAPIntValue();
10811 return DAG.getVScale(DL, VT, C0 << C1);
10812 }
10813
10814 SDValue X;
10815 APInt VS0;
10816
10817 // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1))
10818 if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) {
10819 SDNodeFlags Flags;
10820 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
10821 N0->getFlags().hasNoUnsignedWrap());
10822
10823 SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue());
10824 return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags);
10825 }
10826
10827 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10828 APInt ShlVal;
10829 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10830 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10831 const APInt &C0 = N0.getConstantOperandAPInt(0);
10832 if (ShlVal.ult(C0.getBitWidth())) {
10833 APInt NewStep = C0 << ShlVal;
10834 return DAG.getStepVector(DL, VT, NewStep);
10835 }
10836 }
10837
10838 return SDValue();
10839}
10840
10841// Transform a right shift of a multiply into a multiply-high.
10842// Examples:
10843// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10844// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10846 const TargetLowering &TLI) {
10847 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10848 "SRL or SRA node is required here!");
10849
10850 // Check the shift amount. Proceed with the transformation if the shift
10851 // amount is constant.
10852 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10853 if (!ShiftAmtSrc)
10854 return SDValue();
10855
10856 // The operation feeding into the shift must be a multiply.
10857 SDValue ShiftOperand = N->getOperand(0);
10858 if (ShiftOperand.getOpcode() != ISD::MUL)
10859 return SDValue();
10860
10861 // Both operands must be equivalent extend nodes.
10862 SDValue LeftOp = ShiftOperand.getOperand(0);
10863 SDValue RightOp = ShiftOperand.getOperand(1);
10864
10865 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10866 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10867
10868 if (!IsSignExt && !IsZeroExt)
10869 return SDValue();
10870
10871 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10872 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10873
10874 // return true if U may use the lower bits of its operands
10875 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10876 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10877 return true;
10878 }
10879 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10880 if (!UShiftAmtSrc) {
10881 return true;
10882 }
10883 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10884 return UShiftAmt < NarrowVTSize;
10885 };
10886
10887 // If the lower part of the MUL is also used and MUL_LOHI is supported
10888 // do not introduce the MULH in favor of MUL_LOHI
10889 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10890 if (!ShiftOperand.hasOneUse() &&
10891 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10892 llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
10893 return SDValue();
10894 }
10895
10896 SDValue MulhRightOp;
10898 unsigned ActiveBits = IsSignExt
10899 ? Constant->getAPIntValue().getSignificantBits()
10900 : Constant->getAPIntValue().getActiveBits();
10901 if (ActiveBits > NarrowVTSize)
10902 return SDValue();
10903 MulhRightOp = DAG.getConstant(
10904 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10905 NarrowVT);
10906 } else {
10907 if (LeftOp.getOpcode() != RightOp.getOpcode())
10908 return SDValue();
10909 // Check that the two extend nodes are the same type.
10910 if (NarrowVT != RightOp.getOperand(0).getValueType())
10911 return SDValue();
10912 MulhRightOp = RightOp.getOperand(0);
10913 }
10914
10915 EVT WideVT = LeftOp.getValueType();
10916 // Proceed with the transformation if the wide types match.
10917 assert((WideVT == RightOp.getValueType()) &&
10918 "Cannot have a multiply node with two different operand types.");
10919
10920 // Proceed with the transformation if the wide type is twice as large
10921 // as the narrow type.
10922 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10923 return SDValue();
10924
10925 // Check the shift amount with the narrow type size.
10926 // Proceed with the transformation if the shift amount is the width
10927 // of the narrow type.
10928 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10929 if (ShiftAmt != NarrowVTSize)
10930 return SDValue();
10931
10932 // If the operation feeding into the MUL is a sign extend (sext),
10933 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10934 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10935
10936 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10937 // or if it is a vector type then we could transform to an acceptable type and
10938 // rely on legalization to split/combine the result.
10939 EVT TransformVT = NarrowVT;
10940 if (NarrowVT.isVector()) {
10941 TransformVT = TLI.getLegalTypeToTransformTo(*DAG.getContext(), NarrowVT);
10942 if (TransformVT.getScalarType() != NarrowVT.getScalarType())
10943 return SDValue();
10944 }
10945 if (!TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10946 return SDValue();
10947
10948 SDValue Result =
10949 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10950 bool IsSigned = N->getOpcode() == ISD::SRA;
10951 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10952}
10953
10954// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10955// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10957 unsigned Opcode = N->getOpcode();
10958 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10959 return SDValue();
10960
10961 SDValue N0 = N->getOperand(0);
10962 EVT VT = N->getValueType(0);
10963 SDLoc DL(N);
10964 SDValue X, Y;
10965
10966 // If both operands are bswap/bitreverse, ignore the multiuse
10968 m_UnaryOp(Opcode, m_Value(Y))))))
10969 return DAG.getNode(N0.getOpcode(), DL, VT, X, Y);
10970
10971 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10973 m_OneUse(m_UnaryOp(Opcode, m_Value(X))), m_Value(Y))))) {
10974 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, Y);
10975 return DAG.getNode(N0.getOpcode(), DL, VT, X, NewBitReorder);
10976 }
10977
10978 return SDValue();
10979}
10980
10981SDValue DAGCombiner::visitSRA(SDNode *N) {
10982 SDValue N0 = N->getOperand(0);
10983 SDValue N1 = N->getOperand(1);
10984 if (SDValue V = DAG.simplifyShift(N0, N1))
10985 return V;
10986
10987 SDLoc DL(N);
10988 EVT VT = N0.getValueType();
10989 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10990
10991 // fold (sra c1, c2) -> (sra c1, c2)
10992 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10993 return C;
10994
10995 // Arithmetic shifting an all-sign-bit value is a no-op.
10996 // fold (sra 0, x) -> 0
10997 // fold (sra -1, x) -> -1
10998 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10999 return N0;
11000
11001 // fold vector ops
11002 if (VT.isVector())
11003 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
11004 return FoldedVOp;
11005
11006 if (SDValue NewSel = foldBinOpIntoSelect(N))
11007 return NewSel;
11008
11009 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11010
11011 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
11012 // clamp (add c1, c2) to max shift.
11013 if (N0.getOpcode() == ISD::SRA) {
11014 EVT ShiftVT = N1.getValueType();
11015 EVT ShiftSVT = ShiftVT.getScalarType();
11016 SmallVector<SDValue, 16> ShiftValues;
11017
11018 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
11019 APInt c1 = LHS->getAPIntValue();
11020 APInt c2 = RHS->getAPIntValue();
11021 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11022 APInt Sum = c1 + c2;
11023 unsigned ShiftSum =
11024 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
11025 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
11026 return true;
11027 };
11028 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
11029 SDValue ShiftValue;
11030 if (N1.getOpcode() == ISD::BUILD_VECTOR)
11031 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
11032 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
11033 assert(ShiftValues.size() == 1 &&
11034 "Expected matchBinaryPredicate to return one element for "
11035 "SPLAT_VECTORs");
11036 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
11037 } else
11038 ShiftValue = ShiftValues[0];
11039 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
11040 }
11041 }
11042
11043 // fold (sra (xor (sra x, c1), -1), c2) -> (xor (sra x, c3), -1)
11044 // This allows merging two arithmetic shifts even when there's a NOT in
11045 // between.
11046 SDValue X;
11047 APInt C1;
11048 if (N1C && sd_match(N0, m_OneUse(m_Not(
11049 m_OneUse(m_Sra(m_Value(X), m_ConstInt(C1))))))) {
11050 APInt C2 = N1C->getAPIntValue();
11051 zeroExtendToMatch(C1, C2, 1 /* Overflow Bit */);
11052 APInt Sum = C1 + C2;
11053 unsigned ShiftSum = Sum.getLimitedValue(OpSizeInBits - 1);
11054 SDValue NewShift = DAG.getNode(
11055 ISD::SRA, DL, VT, X, DAG.getShiftAmountConstant(ShiftSum, VT, DL));
11056 return DAG.getNOT(DL, NewShift, VT);
11057 }
11058
11059 // fold (sra (shl X, m), (sub result_size, n))
11060 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
11061 // result_size - n != m.
11062 // If truncate is free for the target sext(shl) is likely to result in better
11063 // code.
11064 if (N0.getOpcode() == ISD::SHL && N1C) {
11065 // Get the two constants of the shifts, CN0 = m, CN = n.
11066 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
11067 if (N01C) {
11068 LLVMContext &Ctx = *DAG.getContext();
11069 // Determine what the truncate's result bitsize and type would be.
11070 EVT TruncVT = VT.changeElementType(
11071 Ctx, EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()));
11072
11073 // Determine the residual right-shift amount.
11074 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
11075
11076 // If the shift is not a no-op (in which case this should be just a sign
11077 // extend already), the truncated to type is legal, sign_extend is legal
11078 // on that type, and the truncate to that type is both legal and free,
11079 // perform the transform.
11080 if ((ShiftAmt > 0) &&
11083 TLI.isTruncateFree(VT, TruncVT)) {
11084 SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
11085 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
11086 N0.getOperand(0), Amt);
11087 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
11088 Shift);
11089 return DAG.getNode(ISD::SIGN_EXTEND, DL,
11090 N->getValueType(0), Trunc);
11091 }
11092 }
11093 }
11094
11095 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
11096 // sra (add (shl X, N1C), AddC), N1C -->
11097 // sext (add (trunc X to (width - N1C)), AddC')
11098 // sra (sub AddC, (shl X, N1C)), N1C -->
11099 // sext (sub AddC1',(trunc X to (width - N1C)))
11100 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
11101 N0.hasOneUse()) {
11102 bool IsAdd = N0.getOpcode() == ISD::ADD;
11103 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
11104 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
11105 Shl.hasOneUse()) {
11106 // TODO: AddC does not need to be a splat.
11107 if (ConstantSDNode *AddC =
11108 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
11109 // Determine what the truncate's type would be and ask the target if
11110 // that is a free operation.
11111 LLVMContext &Ctx = *DAG.getContext();
11112 unsigned ShiftAmt = N1C->getZExtValue();
11113 EVT TruncVT = VT.changeElementType(
11114 Ctx, EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt));
11115
11116 // TODO: The simple type check probably belongs in the default hook
11117 // implementation and/or target-specific overrides (because
11118 // non-simple types likely require masking when legalized), but
11119 // that restriction may conflict with other transforms.
11120 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
11121 TLI.isTruncateFree(VT, TruncVT)) {
11122 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
11123 SDValue ShiftC =
11124 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
11125 TruncVT.getScalarSizeInBits()),
11126 DL, TruncVT);
11127 SDValue Add;
11128 if (IsAdd)
11129 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
11130 else
11131 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
11132 return DAG.getSExtOrTrunc(Add, DL, VT);
11133 }
11134 }
11135 }
11136 }
11137
11138 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
11139 if (N1.getOpcode() == ISD::TRUNCATE &&
11140 N1.getOperand(0).getOpcode() == ISD::AND) {
11141 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11142 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
11143 }
11144
11145 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
11146 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
11147 // if c1 is equal to the number of bits the trunc removes
11148 // TODO - support non-uniform vector shift amounts.
11149 if (N0.getOpcode() == ISD::TRUNCATE &&
11150 (N0.getOperand(0).getOpcode() == ISD::SRL ||
11151 N0.getOperand(0).getOpcode() == ISD::SRA) &&
11152 N0.getOperand(0).hasOneUse() &&
11153 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
11154 SDValue N0Op0 = N0.getOperand(0);
11155 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
11156 EVT LargeVT = N0Op0.getValueType();
11157 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
11158 if (LargeShift->getAPIntValue() == TruncBits) {
11159 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
11160 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
11161 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
11162 DAG.getConstant(TruncBits, DL, LargeShiftVT));
11163 SDValue SRA =
11164 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
11165 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
11166 }
11167 }
11168 }
11169
11170 // Simplify, based on bits shifted out of the LHS.
11172 return SDValue(N, 0);
11173
11174 // If the sign bit is known to be zero, switch this to a SRL.
11175 if (DAG.SignBitIsZero(N0))
11176 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
11177
11178 if (N1C && !N1C->isOpaque())
11179 if (SDValue NewSRA = visitShiftByConstant(N))
11180 return NewSRA;
11181
11182 // Try to transform this shift into a multiply-high if
11183 // it matches the appropriate pattern detected in combineShiftToMULH.
11184 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11185 return MULH;
11186
11187 // Attempt to convert a sra of a load into a narrower sign-extending load.
11188 if (SDValue NarrowLoad = reduceLoadWidth(N))
11189 return NarrowLoad;
11190
11191 if (SDValue AVG = foldShiftToAvg(N, DL))
11192 return AVG;
11193
11194 return SDValue();
11195}
11196
11197SDValue DAGCombiner::visitSRL(SDNode *N) {
11198 SDValue N0 = N->getOperand(0);
11199 SDValue N1 = N->getOperand(1);
11200 if (SDValue V = DAG.simplifyShift(N0, N1))
11201 return V;
11202
11203 SDLoc DL(N);
11204 EVT VT = N0.getValueType();
11205 EVT ShiftVT = N1.getValueType();
11206 unsigned OpSizeInBits = VT.getScalarSizeInBits();
11207
11208 // fold (srl c1, c2) -> c1 >>u c2
11209 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
11210 return C;
11211
11212 // fold vector ops
11213 if (VT.isVector())
11214 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
11215 return FoldedVOp;
11216
11217 if (SDValue NewSel = foldBinOpIntoSelect(N))
11218 return NewSel;
11219
11220 // if (srl x, c) is known to be zero, return 0
11221 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11222 if (N1C &&
11223 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
11224 return DAG.getConstant(0, DL, VT);
11225
11226 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
11227 if (N0.getOpcode() == ISD::SRL) {
11228 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
11229 ConstantSDNode *RHS) {
11230 APInt c1 = LHS->getAPIntValue();
11231 APInt c2 = RHS->getAPIntValue();
11232 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11233 return (c1 + c2).uge(OpSizeInBits);
11234 };
11235 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
11236 return DAG.getConstant(0, DL, VT);
11237
11238 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
11239 ConstantSDNode *RHS) {
11240 APInt c1 = LHS->getAPIntValue();
11241 APInt c2 = RHS->getAPIntValue();
11242 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11243 return (c1 + c2).ult(OpSizeInBits);
11244 };
11245 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
11246 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
11247 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
11248 }
11249 }
11250
11251 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
11252 N0.getOperand(0).getOpcode() == ISD::SRL) {
11253 SDValue InnerShift = N0.getOperand(0);
11254 // TODO - support non-uniform vector shift amounts.
11255 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
11256 uint64_t c1 = N001C->getZExtValue();
11257 uint64_t c2 = N1C->getZExtValue();
11258 EVT InnerShiftVT = InnerShift.getValueType();
11259 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
11260 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
11261 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
11262 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
11263 if (c1 + OpSizeInBits == InnerShiftSize) {
11264 if (c1 + c2 >= InnerShiftSize)
11265 return DAG.getConstant(0, DL, VT);
11266 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11267 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11268 InnerShift.getOperand(0), NewShiftAmt);
11269 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
11270 }
11271 // In the more general case, we can clear the high bits after the shift:
11272 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
11273 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
11274 c1 + c2 < InnerShiftSize) {
11275 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11276 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11277 InnerShift.getOperand(0), NewShiftAmt);
11278 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
11279 OpSizeInBits - c2),
11280 DL, InnerShiftVT);
11281 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
11282 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
11283 }
11284 }
11285 }
11286
11287 if (N0.getOpcode() == ISD::SHL) {
11288 // fold (srl (shl nuw x, c), c) -> x
11289 if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
11290 return N0.getOperand(0);
11291
11292 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
11293 // (and (srl x, (sub c2, c1), MASK)
11294 if ((N0.getOperand(1) == N1 || N0->hasOneUse()) &&
11296 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
11297 ConstantSDNode *RHS) {
11298 const APInt &LHSC = LHS->getAPIntValue();
11299 const APInt &RHSC = RHS->getAPIntValue();
11300 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
11301 LHSC.getZExtValue() <= RHSC.getZExtValue();
11302 };
11303 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
11304 /*AllowUndefs*/ false,
11305 /*AllowTypeMismatch*/ true)) {
11306 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11307 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
11308 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11309 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
11310 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
11311 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
11312 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11313 }
11314 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
11315 /*AllowUndefs*/ false,
11316 /*AllowTypeMismatch*/ true)) {
11317 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11318 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
11319 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11320 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
11321 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
11322 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11323 }
11324 }
11325 }
11326
11327 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
11328 // TODO - support non-uniform vector shift amounts.
11329 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
11330 // Shifting in all undef bits?
11331 EVT SmallVT = N0.getOperand(0).getValueType();
11332 unsigned BitSize = SmallVT.getScalarSizeInBits();
11333 if (N1C->getAPIntValue().uge(BitSize))
11334 return DAG.getUNDEF(VT);
11335
11336 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
11337 uint64_t ShiftAmt = N1C->getZExtValue();
11338 SDLoc DL0(N0);
11339 SDValue SmallShift =
11340 DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
11341 DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
11342 AddToWorklist(SmallShift.getNode());
11343 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
11344 return DAG.getNode(ISD::AND, DL, VT,
11345 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
11346 DAG.getConstant(Mask, DL, VT));
11347 }
11348 }
11349
11350 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
11351 // bit, which is unmodified by sra.
11352 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
11353 if (N0.getOpcode() == ISD::SRA)
11354 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
11355 }
11356
11357 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
11358 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
11359 if (N1C && N0.getOpcode() == ISD::CTLZ &&
11360 isPowerOf2_32(OpSizeInBits) &&
11361 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
11362 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
11363
11364 // If any of the input bits are KnownOne, then the input couldn't be all
11365 // zeros, thus the result of the srl will always be zero.
11366 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
11367
11368 // If all of the bits input the to ctlz node are known to be zero, then
11369 // the result of the ctlz is "32" and the result of the shift is one.
11370 APInt UnknownBits = ~Known.Zero;
11371 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
11372
11373 // Otherwise, check to see if there is exactly one bit input to the ctlz.
11374 if (UnknownBits.isPowerOf2()) {
11375 // Okay, we know that only that the single bit specified by UnknownBits
11376 // could be set on input to the CTLZ node. If this bit is set, the SRL
11377 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
11378 // to an SRL/XOR pair, which is likely to simplify more.
11379 unsigned ShAmt = UnknownBits.countr_zero();
11380 SDValue Op = N0.getOperand(0);
11381
11382 if (ShAmt) {
11383 SDLoc DL(N0);
11384 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
11385 DAG.getShiftAmountConstant(ShAmt, VT, DL));
11386 AddToWorklist(Op.getNode());
11387 }
11388 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
11389 }
11390 }
11391
11392 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
11393 if (N1.getOpcode() == ISD::TRUNCATE &&
11394 N1.getOperand(0).getOpcode() == ISD::AND) {
11395 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11396 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
11397 }
11398
11399 // fold (srl (logic_op x, (shl (zext y), c1)), c1)
11400 // -> (logic_op (srl x, c1), (zext y))
11401 // c1 <= leadingzeros(zext(y))
11402 SDValue X, ZExtY;
11403 if (N1C && sd_match(N0, m_OneUse(m_BitwiseLogic(
11404 m_Value(X),
11407 m_Specific(N1))))))) {
11408 unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
11410 if (N1C->getZExtValue() <= NumLeadingZeros)
11411 return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
11412 DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
11413 }
11414
11415 // fold operands of srl based on knowledge that the low bits are not
11416 // demanded.
11418 return SDValue(N, 0);
11419
11420 if (N1C && !N1C->isOpaque())
11421 if (SDValue NewSRL = visitShiftByConstant(N))
11422 return NewSRL;
11423
11424 // Attempt to convert a srl of a load into a narrower zero-extending load.
11425 if (SDValue NarrowLoad = reduceLoadWidth(N))
11426 return NarrowLoad;
11427
11428 // Here is a common situation. We want to optimize:
11429 //
11430 // %a = ...
11431 // %b = and i32 %a, 2
11432 // %c = srl i32 %b, 1
11433 // brcond i32 %c ...
11434 //
11435 // into
11436 //
11437 // %a = ...
11438 // %b = and %a, 2
11439 // %c = setcc eq %b, 0
11440 // brcond %c ...
11441 //
11442 // However when after the source operand of SRL is optimized into AND, the SRL
11443 // itself may not be optimized further. Look for it and add the BRCOND into
11444 // the worklist.
11445 //
11446 // The also tends to happen for binary operations when SimplifyDemandedBits
11447 // is involved.
11448 //
11449 // FIXME: This is unecessary if we process the DAG in topological order,
11450 // which we plan to do. This workaround can be removed once the DAG is
11451 // processed in topological order.
11452 if (N->hasOneUse()) {
11453 SDNode *User = *N->user_begin();
11454
11455 // Look pass the truncate.
11456 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
11457 User = *User->user_begin();
11458
11459 if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
11460 User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
11461 AddToWorklist(User);
11462 }
11463
11464 // Try to transform this shift into a multiply-high if
11465 // it matches the appropriate pattern detected in combineShiftToMULH.
11466 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11467 return MULH;
11468
11469 if (SDValue AVG = foldShiftToAvg(N, DL))
11470 return AVG;
11471
11472 SDValue Y;
11473 if (VT.getScalarSizeInBits() % 2 == 0 && N1C) {
11474 // Fold clmul(zext(x), zext(y)) >> (BW - 1 | BW) -> clmul(r|h)(x, y).
11475 unsigned HalfBW = VT.getScalarSizeInBits() / 2;
11476 if (sd_match(N0, m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
11477 X.getScalarValueSizeInBits() == HalfBW &&
11478 Y.getScalarValueSizeInBits() == HalfBW) {
11479 if (N1C->getZExtValue() == HalfBW - 1)
11480 return DAG.getNode(
11481 ISD::ZERO_EXTEND, DL, VT,
11482 DAG.getNode(ISD::CLMULR, DL, X.getValueType(), X, Y));
11483 if (N1C->getZExtValue() == HalfBW)
11484 return DAG.getNode(
11485 ISD::ZERO_EXTEND, DL, VT,
11486 DAG.getNode(ISD::CLMULH, DL, X.getValueType(), X, Y));
11487 }
11488 }
11489
11490 // Fold bitreverse(clmul(bitreverse(x), bitreverse(y))) >> 1 ->
11491 // clmulh(x, y).
11492 if (N1C && N1C->getZExtValue() == 1 &&
11494 m_BitReverse(m_Value(Y))))))
11495 return DAG.getNode(ISD::CLMULH, DL, VT, X, Y);
11496
11497 return SDValue();
11498}
11499
11500SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
11501 EVT VT = N->getValueType(0);
11502 SDValue N0 = N->getOperand(0);
11503 SDValue N1 = N->getOperand(1);
11504 SDValue N2 = N->getOperand(2);
11505 bool IsFSHL = N->getOpcode() == ISD::FSHL;
11506 unsigned BitWidth = VT.getScalarSizeInBits();
11507 SDLoc DL(N);
11508
11509 // fold (fshl/fshr C0, C1, C2) -> C3
11510 if (SDValue C =
11511 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
11512 return C;
11513
11514 // fold (fshl N0, N1, 0) -> N0
11515 // fold (fshr N0, N1, 0) -> N1
11517 if (DAG.MaskedValueIsZero(
11518 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
11519 return IsFSHL ? N0 : N1;
11520
11521 auto IsUndefOrZero = [](SDValue V) {
11522 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
11523 };
11524
11525 // TODO - support non-uniform vector shift amounts.
11526 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
11527 EVT ShAmtTy = N2.getValueType();
11528
11529 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
11530 if (Cst->getAPIntValue().uge(BitWidth)) {
11531 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
11532 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
11533 DAG.getConstant(RotAmt, DL, ShAmtTy));
11534 }
11535
11536 unsigned ShAmt = Cst->getZExtValue();
11537 if (ShAmt == 0)
11538 return IsFSHL ? N0 : N1;
11539
11540 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
11541 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
11542 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
11543 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
11544 if (IsUndefOrZero(N0))
11545 return DAG.getNode(
11546 ISD::SRL, DL, VT, N1,
11547 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
11548 if (IsUndefOrZero(N1))
11549 return DAG.getNode(
11550 ISD::SHL, DL, VT, N0,
11551 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
11552
11553 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11554 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11555 // TODO - bigendian support once we have test coverage.
11556 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11557 // TODO - permit LHS EXTLOAD if extensions are shifted out.
11558 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
11559 !DAG.getDataLayout().isBigEndian()) {
11560 auto *LHS = dyn_cast<LoadSDNode>(N0);
11561 auto *RHS = dyn_cast<LoadSDNode>(N1);
11562 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11563 LHS->getAddressSpace() == RHS->getAddressSpace() &&
11564 (LHS->hasNUsesOfValue(1, 0) || RHS->hasNUsesOfValue(1, 0)) &&
11566 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
11567 SDLoc DL(RHS);
11568 uint64_t PtrOff =
11569 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11570 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11571 unsigned Fast = 0;
11572 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
11573 RHS->getAddressSpace(), NewAlign,
11574 RHS->getMemOperand()->getFlags(), &Fast) &&
11575 Fast) {
11576 SDValue NewPtr = DAG.getMemBasePlusOffset(
11577 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11578 AddToWorklist(NewPtr.getNode());
11579 SDValue Load = DAG.getLoad(
11580 VT, DL, RHS->getChain(), NewPtr,
11581 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11582 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11583 DAG.makeEquivalentMemoryOrdering(LHS, Load.getValue(1));
11584 DAG.makeEquivalentMemoryOrdering(RHS, Load.getValue(1));
11585 return Load;
11586 }
11587 }
11588 }
11589 }
11590 }
11591
11592 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11593 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11594 // iff We know the shift amount is in range.
11595 // TODO: when is it worth doing SUB(BW, N2) as well?
11596 if (isPowerOf2_32(BitWidth)) {
11597 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11598 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11599 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
11600 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11601 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
11602 }
11603
11604 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11605 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11606 // TODO: Investigate flipping this rotate if only one is legal.
11607 // If funnel shift is legal as well we might be better off avoiding
11608 // non-constant (BW - N2).
11609 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
11610 if (N0 == N1 && hasOperation(RotOpc, VT))
11611 return DAG.getNode(RotOpc, DL, VT, N0, N2);
11612
11613 // Simplify, based on bits shifted out of N0/N1.
11615 return SDValue(N, 0);
11616
11617 return SDValue();
11618}
11619
11620SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
11621 SDValue N0 = N->getOperand(0);
11622 SDValue N1 = N->getOperand(1);
11623 if (SDValue V = DAG.simplifyShift(N0, N1))
11624 return V;
11625
11626 SDLoc DL(N);
11627 EVT VT = N0.getValueType();
11628
11629 // fold (*shlsat c1, c2) -> c1<<c2
11630 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11631 return C;
11632
11633 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11634
11635 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
11636 // fold (sshlsat x, c) -> (shl x, c)
11637 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11638 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11639 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11640
11641 // fold (ushlsat x, c) -> (shl x, c)
11642 if (N->getOpcode() == ISD::USHLSAT && N1C &&
11643 N1C->getAPIntValue().ule(
11645 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11646 }
11647
11648 return SDValue();
11649}
11650
11651// Given a ABS node, detect the following patterns:
11652// (ABS (SUB (EXTEND a), (EXTEND b))).
11653// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
11654// Generates UABD/SABD instruction.
11655SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
11656 EVT SrcVT = N->getValueType(0);
11657
11658 if (N->getOpcode() == ISD::TRUNCATE)
11659 N = N->getOperand(0).getNode();
11660
11661 EVT VT = N->getValueType(0);
11662 SDValue Op0, Op1;
11663
11664 if (!sd_match(N, m_Abs(m_Sub(m_Value(Op0), m_Value(Op1)))))
11665 return SDValue();
11666
11667 SDValue AbsOp0 = N->getOperand(0);
11668 unsigned Opc0 = Op0.getOpcode();
11669
11670 // Check if the operands of the sub are (zero|sign)-extended, otherwise
11671 // fallback to ValueTracking.
11672 if (Opc0 != Op1.getOpcode() ||
11673 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
11674 Opc0 != ISD::SIGN_EXTEND_INREG)) {
11675 // fold (abs (sub nsw x, y)) -> abds(x, y)
11676 // Don't fold this for unsupported types as we lose the NSW handling.
11677 if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
11678 (AbsOp0->getFlags().hasNoSignedWrap() ||
11679 DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
11680 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
11681 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11682 }
11683 // fold (abs (sub x, y)) -> abdu(x, y)
11684 if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
11685 DAG.SignBitIsZero(Op1)) {
11686 SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
11687 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11688 }
11689 return SDValue();
11690 }
11691
11692 EVT VT0, VT1;
11693 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
11694 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
11695 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
11696 } else {
11697 VT0 = Op0.getOperand(0).getValueType();
11698 VT1 = Op1.getOperand(0).getValueType();
11699 }
11700 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
11701
11702 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11703 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11704 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
11705 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11706 (VT1 == MaxVT || Op1->hasOneUse()) &&
11707 (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
11708 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
11709 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
11710 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
11711 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
11712 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11713 }
11714
11715 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11716 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11717 if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
11718 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
11719 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11720 }
11721
11722 return SDValue();
11723}
11724
11725SDValue DAGCombiner::visitABS(SDNode *N) {
11726 SDValue N0 = N->getOperand(0);
11727 EVT VT = N->getValueType(0);
11728 SDLoc DL(N);
11729
11730 // fold (abs c1) -> c2
11731 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
11732 return C;
11733 // fold (abs (abs x)) -> (abs x)
11734 if (N0.getOpcode() == ISD::ABS)
11735 return N0;
11736 // fold (abs x) -> x iff not-negative
11737 if (DAG.SignBitIsZero(N0))
11738 return N0;
11739
11740 if (SDValue ABD = foldABSToABD(N, DL))
11741 return ABD;
11742
11743 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11744 // iff zero_extend/truncate are free.
11745 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11746 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11747 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11748 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11749 hasOperation(ISD::ABS, ExtVT)) {
11750 return DAG.getNode(
11751 ISD::ZERO_EXTEND, DL, VT,
11752 DAG.getNode(ISD::ABS, DL, ExtVT,
11753 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11754 }
11755 }
11756
11757 return SDValue();
11758}
11759
11760SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11761 SDValue N0 = N->getOperand(0);
11762 EVT VT = N->getValueType(0);
11763 SDLoc DL(N);
11764
11765 // fold (bswap c1) -> c2
11766 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11767 return C;
11768 // fold (bswap (bswap x)) -> x
11769 if (N0.getOpcode() == ISD::BSWAP)
11770 return N0.getOperand(0);
11771
11772 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11773 // isn't supported, it will be expanded to bswap followed by a manual reversal
11774 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11775 // the two bswaps if the bitreverse gets expanded.
11776 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11777 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11778 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11779 }
11780
11781 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11782 // iff x >= bw/2 (i.e. lower half is known zero)
11783 unsigned BW = VT.getScalarSizeInBits();
11784 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11785 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11786 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11787 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11788 ShAmt->getZExtValue() >= (BW / 2) &&
11789 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11790 TLI.isTruncateFree(VT, HalfVT) &&
11791 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11792 SDValue Res = N0.getOperand(0);
11793 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11794 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11795 DAG.getShiftAmountConstant(NewShAmt, VT, DL));
11796 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11797 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11798 return DAG.getZExtOrTrunc(Res, DL, VT);
11799 }
11800 }
11801
11802 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11803 // inverse-shift-of-bswap:
11804 // bswap (X u<< C) --> (bswap X) u>> C
11805 // bswap (X u>> C) --> (bswap X) u<< C
11806 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11807 N0.hasOneUse()) {
11808 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11809 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11810 ShAmt->getZExtValue() % 8 == 0) {
11811 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11812 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11813 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11814 }
11815 }
11816
11817 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11818 return V;
11819
11820 return SDValue();
11821}
11822
11823SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11824 SDValue N0 = N->getOperand(0);
11825 EVT VT = N->getValueType(0);
11826 SDLoc DL(N);
11827
11828 // fold (bitreverse c1) -> c2
11829 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11830 return C;
11831
11832 // fold (bitreverse (bitreverse x)) -> x
11833 if (N0.getOpcode() == ISD::BITREVERSE)
11834 return N0.getOperand(0);
11835
11836 SDValue X, Y;
11837
11838 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11839 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11841 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11842
11843 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11844 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11846 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11847
11848 // fold bitreverse(clmul(bitreverse(x), bitreverse(y))) -> clmulr(x, y)
11850 return DAG.getNode(ISD::CLMULR, DL, VT, X, Y);
11851
11852 return SDValue();
11853}
11854
11855// Fold (ctlz (xor x, (sra x, bitwidth-1))) -> (add (ctls x), 1).
11856// Fold (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x)
11857SDValue DAGCombiner::foldCTLZToCTLS(SDValue Src, const SDLoc &DL) {
11858 EVT VT = Src.getValueType();
11859
11860 auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
11861 if ((LK.first != TargetLoweringBase::TypeLegal &&
11863 !TLI.isOperationLegalOrCustom(ISD::CTLS, LK.second))
11864 return SDValue();
11865
11866 unsigned BitWidth = VT.getScalarSizeInBits();
11867
11868 bool NeedAdd = true;
11869
11870 SDValue X;
11872 m_SpecificInt(1))))) {
11873 NeedAdd = false;
11874 Src = X;
11875 }
11876
11877 if (!sd_match(Src,
11880 m_SpecificInt(BitWidth - 1)))))))
11881 return SDValue();
11882
11883 SDValue Res = DAG.getNode(ISD::CTLS, DL, VT, X);
11884 if (!NeedAdd)
11885 return Res;
11886
11887 return DAG.getNode(ISD::ADD, DL, VT, Res, DAG.getConstant(1, DL, VT));
11888}
11889
11890SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11891 SDValue N0 = N->getOperand(0);
11892 EVT VT = N->getValueType(0);
11893 SDLoc DL(N);
11894
11895 // fold (ctlz c1) -> c2
11896 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11897 return C;
11898
11899 // If the value is known never to be zero, switch to the undef version.
11900 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11901 if (DAG.isKnownNeverZero(N0))
11902 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11903
11904 if (SDValue V = foldCTLZToCTLS(N0, DL))
11905 return V;
11906
11907 return SDValue();
11908}
11909
11910SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11911 SDValue N0 = N->getOperand(0);
11912 EVT VT = N->getValueType(0);
11913 SDLoc DL(N);
11914
11915 // fold (ctlz_zero_undef c1) -> c2
11916 if (SDValue C =
11918 return C;
11919
11920 if (SDValue V = foldCTLZToCTLS(N0, DL))
11921 return V;
11922
11923 return SDValue();
11924}
11925
11926SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11927 SDValue N0 = N->getOperand(0);
11928 EVT VT = N->getValueType(0);
11929 SDLoc DL(N);
11930
11931 // fold (cttz c1) -> c2
11932 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11933 return C;
11934
11935 // If the value is known never to be zero, switch to the undef version.
11936 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11937 if (DAG.isKnownNeverZero(N0))
11938 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11939
11940 return SDValue();
11941}
11942
11943SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11944 SDValue N0 = N->getOperand(0);
11945 EVT VT = N->getValueType(0);
11946 SDLoc DL(N);
11947
11948 // fold (cttz_zero_undef c1) -> c2
11949 if (SDValue C =
11951 return C;
11952 return SDValue();
11953}
11954
11955SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11956 SDValue N0 = N->getOperand(0);
11957 EVT VT = N->getValueType(0);
11958 unsigned NumBits = VT.getScalarSizeInBits();
11959 SDLoc DL(N);
11960
11961 // fold (ctpop c1) -> c2
11962 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11963 return C;
11964
11965 // If the source is being shifted, but doesn't affect any active bits,
11966 // then we can call CTPOP on the shift source directly.
11967 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11968 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11969 const APInt &Amt = AmtC->getAPIntValue();
11970 if (Amt.ult(NumBits)) {
11971 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11972 if ((N0.getOpcode() == ISD::SRL &&
11973 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11974 (N0.getOpcode() == ISD::SHL &&
11975 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11976 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11977 }
11978 }
11979 }
11980 }
11981
11982 // If the upper bits are known to be zero, then see if its profitable to
11983 // only count the lower bits.
11984 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11985 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11986 if (hasOperation(ISD::CTPOP, HalfVT) &&
11987 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11988 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11989 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11990 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11991 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11992 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11993 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11994 }
11995 }
11996 }
11997
11998 return SDValue();
11999}
12000
12002 SDValue RHS, const SDNodeFlags Flags,
12003 const TargetLowering &TLI) {
12004 EVT VT = LHS.getValueType();
12005 if (!VT.isFloatingPoint())
12006 return false;
12007
12008 return Flags.hasNoSignedZeros() &&
12010 (Flags.hasNoNaNs() ||
12011 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
12012}
12013
12015 SDValue RHS, SDValue True, SDValue False,
12016 ISD::CondCode CC,
12017 const TargetLowering &TLI,
12018 SelectionDAG &DAG) {
12019 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
12020 switch (CC) {
12021 case ISD::SETOLT:
12022 case ISD::SETOLE:
12023 case ISD::SETLT:
12024 case ISD::SETLE:
12025 case ISD::SETULT:
12026 case ISD::SETULE: {
12027 // Since it's known never nan to get here already, either fminnum or
12028 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
12029 // expanded in terms of it.
12030 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
12031 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
12032 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
12033
12034 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
12035 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
12036 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
12037 return SDValue();
12038 }
12039 case ISD::SETOGT:
12040 case ISD::SETOGE:
12041 case ISD::SETGT:
12042 case ISD::SETGE:
12043 case ISD::SETUGT:
12044 case ISD::SETUGE: {
12045 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
12046 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
12047 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
12048
12049 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
12050 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
12051 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
12052 return SDValue();
12053 }
12054 default:
12055 return SDValue();
12056 }
12057}
12058
12059// Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
12060SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
12061 const unsigned Opcode = N->getOpcode();
12062 if (Opcode != ISD::SRA && Opcode != ISD::SRL)
12063 return SDValue();
12064
12065 EVT VT = N->getValueType(0);
12066 bool IsUnsigned = Opcode == ISD::SRL;
12067
12068 // Captured values.
12069 SDValue A, B, Add;
12070
12071 // Match floor average as it is common to both floor/ceil avgs.
12072 if (sd_match(N, m_BinOp(Opcode,
12074 m_One()))) {
12075 // Decide whether signed or unsigned.
12076 unsigned FloorISD = IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS;
12077 if (!hasOperation(FloorISD, VT))
12078 return SDValue();
12079
12080 // Can't optimize adds that may wrap.
12081 if ((IsUnsigned && !Add->getFlags().hasNoUnsignedWrap()) ||
12082 (!IsUnsigned && !Add->getFlags().hasNoSignedWrap()))
12083 return SDValue();
12084
12085 return DAG.getNode(FloorISD, DL, N->getValueType(0), {A, B});
12086 }
12087
12088 return SDValue();
12089}
12090
12091SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) {
12092 unsigned Opc = N->getOpcode();
12093 SDValue X, Y, Z;
12094 if (sd_match(
12096 return DAG.getNode(Opc, DL, VT, X,
12097 DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
12098
12100 m_Value(Z)))))
12101 return DAG.getNode(Opc, DL, VT, X,
12102 DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
12103
12104 return SDValue();
12105}
12106
12107/// Generate Min/Max node
12108SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
12109 SDValue RHS, SDValue True,
12110 SDValue False, ISD::CondCode CC) {
12111 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
12112 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
12113
12114 // If we can't directly match this, try to see if we can pull an fneg out of
12115 // the select.
12117 True, DAG, LegalOperations, ForCodeSize);
12118 if (!NegTrue)
12119 return SDValue();
12120
12121 HandleSDNode NegTrueHandle(NegTrue);
12122
12123 // Try to unfold an fneg from the select if we are comparing the negated
12124 // constant.
12125 //
12126 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
12127 //
12128 // TODO: Handle fabs
12129 if (LHS == NegTrue) {
12130 // If we can't directly match this, try to see if we can pull an fneg out of
12131 // the select.
12133 RHS, DAG, LegalOperations, ForCodeSize);
12134 if (NegRHS) {
12135 HandleSDNode NegRHSHandle(NegRHS);
12136 if (NegRHS == False) {
12137 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
12138 False, CC, TLI, DAG);
12139 if (Combined)
12140 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
12141 }
12142 }
12143 }
12144
12145 return SDValue();
12146}
12147
12148/// If a (v)select has a condition value that is a sign-bit test, try to smear
12149/// the condition operand sign-bit across the value width and use it as a mask.
12151 SelectionDAG &DAG) {
12152 SDValue Cond = N->getOperand(0);
12153 SDValue C1 = N->getOperand(1);
12154 SDValue C2 = N->getOperand(2);
12156 return SDValue();
12157
12158 EVT VT = N->getValueType(0);
12159 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
12160 VT != Cond.getOperand(0).getValueType())
12161 return SDValue();
12162
12163 // The inverted-condition + commuted-select variants of these patterns are
12164 // canonicalized to these forms in IR.
12165 SDValue X = Cond.getOperand(0);
12166 SDValue CondC = Cond.getOperand(1);
12167 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12168 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
12170 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
12171 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
12172 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
12173 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
12174 }
12175 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
12176 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
12177 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
12178 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
12179 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
12180 }
12181 return SDValue();
12182}
12183
12185 const TargetLowering &TLI) {
12186 if (!TLI.convertSelectOfConstantsToMath(VT))
12187 return false;
12188
12189 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
12190 return true;
12192 return true;
12193
12194 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12195 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
12196 return true;
12197 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
12198 return true;
12199
12200 return false;
12201}
12202
12203SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
12204 SDValue Cond = N->getOperand(0);
12205 SDValue N1 = N->getOperand(1);
12206 SDValue N2 = N->getOperand(2);
12207 EVT VT = N->getValueType(0);
12208 EVT CondVT = Cond.getValueType();
12209 SDLoc DL(N);
12210
12211 if (!VT.isInteger())
12212 return SDValue();
12213
12214 auto *C1 = dyn_cast<ConstantSDNode>(N1);
12215 auto *C2 = dyn_cast<ConstantSDNode>(N2);
12216 if (!C1 || !C2)
12217 return SDValue();
12218
12219 if (CondVT != MVT::i1 || LegalOperations) {
12220 // fold (select Cond, 0, 1) -> (xor Cond, 1)
12221 // We can't do this reliably if integer based booleans have different contents
12222 // to floating point based booleans. This is because we can't tell whether we
12223 // have an integer-based boolean or a floating-point-based boolean unless we
12224 // can find the SETCC that produced it and inspect its operands. This is
12225 // fairly easy if C is the SETCC node, but it can potentially be
12226 // undiscoverable (or not reasonably discoverable). For example, it could be
12227 // in another basic block or it could require searching a complicated
12228 // expression.
12229 if (CondVT.isInteger() &&
12230 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
12232 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
12234 C1->isZero() && C2->isOne()) {
12235 SDValue NotCond =
12236 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
12237 if (VT.bitsEq(CondVT))
12238 return NotCond;
12239 return DAG.getZExtOrTrunc(NotCond, DL, VT);
12240 }
12241
12242 return SDValue();
12243 }
12244
12245 // Only do this before legalization to avoid conflicting with target-specific
12246 // transforms in the other direction (create a select from a zext/sext). There
12247 // is also a target-independent combine here in DAGCombiner in the other
12248 // direction for (select Cond, -1, 0) when the condition is not i1.
12249 assert(CondVT == MVT::i1 && !LegalOperations);
12250
12251 // select Cond, 1, 0 --> zext (Cond)
12252 if (C1->isOne() && C2->isZero())
12253 return DAG.getZExtOrTrunc(Cond, DL, VT);
12254
12255 // select Cond, -1, 0 --> sext (Cond)
12256 if (C1->isAllOnes() && C2->isZero())
12257 return DAG.getSExtOrTrunc(Cond, DL, VT);
12258
12259 // select Cond, 0, 1 --> zext (!Cond)
12260 if (C1->isZero() && C2->isOne()) {
12261 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12262 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
12263 return NotCond;
12264 }
12265
12266 // select Cond, 0, -1 --> sext (!Cond)
12267 if (C1->isZero() && C2->isAllOnes()) {
12268 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12269 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12270 return NotCond;
12271 }
12272
12273 // Use a target hook because some targets may prefer to transform in the
12274 // other direction.
12276 return SDValue();
12277
12278 // For any constants that differ by 1, we can transform the select into
12279 // an extend and add.
12280 const APInt &C1Val = C1->getAPIntValue();
12281 const APInt &C2Val = C2->getAPIntValue();
12282
12283 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
12284 if (C1Val - 1 == C2Val) {
12285 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12286 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12287 }
12288
12289 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
12290 if (C1Val + 1 == C2Val) {
12291 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12292 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12293 }
12294
12295 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
12296 if (C1Val.isPowerOf2() && C2Val.isZero()) {
12297 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12298 SDValue ShAmtC =
12299 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
12300 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
12301 }
12302
12303 // select Cond, -1, C --> or (sext Cond), C
12304 if (C1->isAllOnes()) {
12305 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12306 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
12307 }
12308
12309 // select Cond, C, -1 --> or (sext (not Cond)), C
12310 if (C2->isAllOnes()) {
12311 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12312 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12313 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
12314 }
12315
12317 return V;
12318
12319 return SDValue();
12320}
12321
12322template <class MatchContextClass>
12324 SelectionDAG &DAG) {
12325 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
12326 N->getOpcode() == ISD::VP_SELECT) &&
12327 "Expected a (v)(vp.)select");
12328 SDValue Cond = N->getOperand(0);
12329 SDValue T = N->getOperand(1), F = N->getOperand(2);
12330 EVT VT = N->getValueType(0);
12331 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12332 MatchContextClass matcher(DAG, TLI, N);
12333
12334 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
12335 return SDValue();
12336
12337 // select Cond, Cond, F --> or Cond, freeze(F)
12338 // select Cond, 1, F --> or Cond, freeze(F)
12339 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
12340 return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
12341
12342 // select Cond, T, Cond --> and Cond, freeze(T)
12343 // select Cond, T, 0 --> and Cond, freeze(T)
12344 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
12345 return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
12346
12347 // select Cond, T, 1 --> or (not Cond), freeze(T)
12348 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
12349 SDValue NotCond =
12350 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12351 return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
12352 }
12353
12354 // select Cond, 0, F --> and (not Cond), freeze(F)
12355 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
12356 SDValue NotCond =
12357 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12358 return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
12359 }
12360
12361 return SDValue();
12362}
12363
12365 SDValue N0 = N->getOperand(0);
12366 SDValue N1 = N->getOperand(1);
12367 SDValue N2 = N->getOperand(2);
12368 EVT VT = N->getValueType(0);
12369 unsigned EltSizeInBits = VT.getScalarSizeInBits();
12370
12371 SDValue Cond0, Cond1;
12372 ISD::CondCode CC;
12373 if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
12374 m_CondCode(CC)))) ||
12375 VT != Cond0.getValueType())
12376 return SDValue();
12377
12378 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
12379 // compare is inverted from that pattern ("Cond0 s> -1").
12380 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
12381 ; // This is the pattern we are looking for.
12382 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
12383 std::swap(N1, N2);
12384 else
12385 return SDValue();
12386
12387 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
12388 if (isNullOrNullSplat(N2)) {
12389 SDLoc DL(N);
12390 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12391 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12392 return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
12393 }
12394
12395 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
12396 if (isAllOnesOrAllOnesSplat(N1)) {
12397 SDLoc DL(N);
12398 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12399 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12400 return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
12401 }
12402
12403 // If we have to invert the sign bit mask, only do that transform if the
12404 // target has a bitwise 'and not' instruction (the invert is free).
12405 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
12406 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12407 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
12408 SDLoc DL(N);
12409 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12410 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12411 SDValue Not = DAG.getNOT(DL, Sra, VT);
12412 return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
12413 }
12414
12415 // TODO: There's another pattern in this family, but it may require
12416 // implementing hasOrNot() to check for profitability:
12417 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
12418
12419 return SDValue();
12420}
12421
12422// Match SELECTs with absolute difference patterns.
12423// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12424// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12425// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12426// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12427SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
12428 SDValue False, ISD::CondCode CC,
12429 const SDLoc &DL) {
12430 bool IsSigned = isSignedIntSetCC(CC);
12431 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12432 EVT VT = LHS.getValueType();
12433
12434 if (LegalOperations && !hasOperation(ABDOpc, VT))
12435 return SDValue();
12436
12437 switch (CC) {
12438 case ISD::SETGT:
12439 case ISD::SETGE:
12440 case ISD::SETUGT:
12441 case ISD::SETUGE:
12446 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12451 hasOperation(ABDOpc, VT))
12452 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12453 break;
12454 case ISD::SETLT:
12455 case ISD::SETLE:
12456 case ISD::SETULT:
12457 case ISD::SETULE:
12462 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12467 hasOperation(ABDOpc, VT))
12468 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12469 break;
12470 default:
12471 break;
12472 }
12473
12474 return SDValue();
12475}
12476
12477// ([v]select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12478// ([v]select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12479SDValue DAGCombiner::foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
12480 SDValue False, ISD::CondCode CC,
12481 const SDLoc &DL) {
12482 APInt C;
12483 EVT VT = True.getValueType();
12484 if (sd_match(RHS, m_ConstInt(C)) && hasUMin(VT)) {
12485 if (CC == ISD::SETUGT && LHS == False &&
12486 sd_match(True, m_Add(m_Specific(False), m_SpecificInt(~C)))) {
12487 SDValue AddC = DAG.getConstant(~C, DL, VT);
12488 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, False, AddC);
12489 return DAG.getNode(ISD::UMIN, DL, VT, Add, False);
12490 }
12491 if (CC == ISD::SETULT && LHS == True &&
12492 sd_match(False, m_Add(m_Specific(True), m_SpecificInt(-C)))) {
12493 SDValue AddC = DAG.getConstant(-C, DL, VT);
12494 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, True, AddC);
12495 return DAG.getNode(ISD::UMIN, DL, VT, True, Add);
12496 }
12497 }
12498 return SDValue();
12499}
12500
12501SDValue DAGCombiner::visitSELECT(SDNode *N) {
12502 SDValue N0 = N->getOperand(0);
12503 SDValue N1 = N->getOperand(1);
12504 SDValue N2 = N->getOperand(2);
12505 EVT VT = N->getValueType(0);
12506 EVT VT0 = N0.getValueType();
12507 SDLoc DL(N);
12508 SDNodeFlags Flags = N->getFlags();
12509
12510 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12511 return V;
12512
12514 return V;
12515
12516 // select (not Cond), N1, N2 -> select Cond, N2, N1
12517 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12518 return DAG.getSelect(DL, VT, F, N2, N1, Flags);
12519
12520 if (SDValue V = foldSelectOfConstants(N))
12521 return V;
12522
12523 // If we can fold this based on the true/false value, do so.
12524 if (SimplifySelectOps(N, N1, N2))
12525 return SDValue(N, 0); // Don't revisit N.
12526
12527 if (VT0 == MVT::i1) {
12528 // The code in this block deals with the following 2 equivalences:
12529 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
12530 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
12531 // The target can specify its preferred form with the
12532 // shouldNormalizeToSelectSequence() callback. However we always transform
12533 // to the right anyway if we find the inner select exists in the DAG anyway
12534 // and we always transform to the left side if we know that we can further
12535 // optimize the combination of the conditions.
12536 bool normalizeToSequence =
12538 // select (and Cond0, Cond1), X, Y
12539 // -> select Cond0, (select Cond1, X, Y), Y
12540 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
12541 SDValue Cond0 = N0->getOperand(0);
12542 SDValue Cond1 = N0->getOperand(1);
12543 SDValue InnerSelect =
12544 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
12545 if (normalizeToSequence || !InnerSelect.use_empty())
12546 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
12547 InnerSelect, N2, Flags);
12548 // Cleanup on failure.
12549 if (InnerSelect.use_empty())
12550 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12551 }
12552 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
12553 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
12554 SDValue Cond0 = N0->getOperand(0);
12555 SDValue Cond1 = N0->getOperand(1);
12556 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
12557 Cond1, N1, N2, Flags);
12558 if (normalizeToSequence || !InnerSelect.use_empty())
12559 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
12560 InnerSelect, Flags);
12561 // Cleanup on failure.
12562 if (InnerSelect.use_empty())
12563 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12564 }
12565
12566 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
12567 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
12568 SDValue N1_0 = N1->getOperand(0);
12569 SDValue N1_1 = N1->getOperand(1);
12570 SDValue N1_2 = N1->getOperand(2);
12571 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
12572 // Create the actual and node if we can generate good code for it.
12573 if (!normalizeToSequence) {
12574 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
12575 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
12576 N2, Flags);
12577 }
12578 // Otherwise see if we can optimize the "and" to a better pattern.
12579 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
12580 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
12581 N2, Flags);
12582 }
12583 }
12584 }
12585 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
12586 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
12587 SDValue N2_0 = N2->getOperand(0);
12588 SDValue N2_1 = N2->getOperand(1);
12589 SDValue N2_2 = N2->getOperand(2);
12590 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
12591 // Create the actual or node if we can generate good code for it.
12592 if (!normalizeToSequence) {
12593 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
12594 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
12595 N2_2, Flags);
12596 }
12597 // Otherwise see if we can optimize to a better pattern.
12598 if (SDValue Combined = visitORLike(N0, N2_0, DL))
12599 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
12600 N2_2, Flags);
12601 }
12602 }
12603
12604 // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
12605 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12606 N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
12607 N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12608 N2.getOperand(1) == N1.getOperand(0) &&
12609 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12610 return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
12611
12612 // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
12613 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12614 N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
12615 N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12616 N2.getOperand(1) == N1.getOperand(0) &&
12617 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12618 return DAG.getNegative(
12619 DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
12620 DL, VT);
12621 }
12622
12623 // Fold selects based on a setcc into other things, such as min/max/abs.
12624 if (N0.getOpcode() == ISD::SETCC) {
12625 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
12627
12628 // select (fcmp lt x, y), x, y -> fminnum x, y
12629 // select (fcmp gt x, y), x, y -> fmaxnum x, y
12630 //
12631 // This is OK if we don't care what happens if either operand is a NaN.
12632 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
12633 if (SDValue FMinMax =
12634 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
12635 return FMinMax;
12636
12637 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
12638 // This is conservatively limited to pre-legal-operations to give targets
12639 // a chance to reverse the transform if they want to do that. Also, it is
12640 // unlikely that the pattern would be formed late, so it's probably not
12641 // worth going through the other checks.
12642 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
12643 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
12644 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
12645 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
12646 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
12647 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12648 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12649 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12650 //
12651 // The IR equivalent of this transform would have this form:
12652 // %a = add %x, C
12653 // %c = icmp ugt %x, ~C
12654 // %r = select %c, -1, %a
12655 // =>
12656 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
12657 // %u0 = extractvalue %u, 0
12658 // %u1 = extractvalue %u, 1
12659 // %r = select %u1, -1, %u0
12660 SDVTList VTs = DAG.getVTList(VT, VT0);
12661 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
12662 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
12663 }
12664 }
12665
12666 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
12667 (!LegalOperations &&
12669 // Any flags available in a select/setcc fold will be on the setcc as they
12670 // migrated from fcmp
12671 return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
12672 N0.getOperand(2), N0->getFlags());
12673 }
12674
12675 if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
12676 return ABD;
12677
12678 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
12679 return NewSel;
12680
12681 // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12682 // (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12683 if (SDValue UMin = foldSelectToUMin(Cond0, Cond1, N1, N2, CC, DL))
12684 return UMin;
12685 }
12686
12687 if (!VT.isVector())
12688 if (SDValue BinOp = foldSelectOfBinops(N))
12689 return BinOp;
12690
12691 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
12692 return R;
12693
12694 return SDValue();
12695}
12696
12697// This function assumes all the vselect's arguments are CONCAT_VECTOR
12698// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
12700 SDLoc DL(N);
12701 SDValue Cond = N->getOperand(0);
12702 SDValue LHS = N->getOperand(1);
12703 SDValue RHS = N->getOperand(2);
12704 EVT VT = N->getValueType(0);
12705 int NumElems = VT.getVectorNumElements();
12706 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
12707 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
12708 Cond.getOpcode() == ISD::BUILD_VECTOR);
12709
12710 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
12711 // binary ones here.
12712 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
12713 return SDValue();
12714
12715 // We're sure we have an even number of elements due to the
12716 // concat_vectors we have as arguments to vselect.
12717 // Skip BV elements until we find one that's not an UNDEF
12718 // After we find an UNDEF element, keep looping until we get to half the
12719 // length of the BV and see if all the non-undef nodes are the same.
12720 ConstantSDNode *BottomHalf = nullptr;
12721 for (int i = 0; i < NumElems / 2; ++i) {
12722 if (Cond->getOperand(i)->isUndef())
12723 continue;
12724
12725 if (BottomHalf == nullptr)
12726 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12727 else if (Cond->getOperand(i).getNode() != BottomHalf)
12728 return SDValue();
12729 }
12730
12731 // Do the same for the second half of the BuildVector
12732 ConstantSDNode *TopHalf = nullptr;
12733 for (int i = NumElems / 2; i < NumElems; ++i) {
12734 if (Cond->getOperand(i)->isUndef())
12735 continue;
12736
12737 if (TopHalf == nullptr)
12738 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12739 else if (Cond->getOperand(i).getNode() != TopHalf)
12740 return SDValue();
12741 }
12742
12743 assert(TopHalf && BottomHalf &&
12744 "One half of the selector was all UNDEFs and the other was all the "
12745 "same value. This should have been addressed before this function.");
12746 return DAG.getNode(
12748 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
12749 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
12750}
12751
12752bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
12753 SelectionDAG &DAG, const SDLoc &DL) {
12754
12755 // Only perform the transformation when existing operands can be reused.
12756 if (IndexIsScaled)
12757 return false;
12758
12759 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
12760 return false;
12761
12762 EVT VT = BasePtr.getValueType();
12763
12764 if (SDValue SplatVal = DAG.getSplatValue(Index);
12765 SplatVal && !isNullConstant(SplatVal) &&
12766 SplatVal.getValueType() == VT) {
12767 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12768 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
12769 return true;
12770 }
12771
12772 if (Index.getOpcode() != ISD::ADD)
12773 return false;
12774
12775 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
12776 SplatVal && SplatVal.getValueType() == VT) {
12777 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12778 Index = Index.getOperand(1);
12779 return true;
12780 }
12781 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
12782 SplatVal && SplatVal.getValueType() == VT) {
12783 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12784 Index = Index.getOperand(0);
12785 return true;
12786 }
12787 return false;
12788}
12789
12790// Fold sext/zext of index into index type.
12791bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
12792 SelectionDAG &DAG) {
12793 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12794
12795 // It's always safe to look through zero extends.
12796 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
12797 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12798 IndexType = ISD::UNSIGNED_SCALED;
12799 Index = Index.getOperand(0);
12800 return true;
12801 }
12802 if (ISD::isIndexTypeSigned(IndexType)) {
12803 IndexType = ISD::UNSIGNED_SCALED;
12804 return true;
12805 }
12806 }
12807
12808 // It's only safe to look through sign extends when Index is signed.
12809 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
12810 ISD::isIndexTypeSigned(IndexType) &&
12811 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12812 Index = Index.getOperand(0);
12813 return true;
12814 }
12815
12816 return false;
12817}
12818
12819SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
12820 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
12821 SDValue Mask = MSC->getMask();
12822 SDValue Chain = MSC->getChain();
12823 SDValue Index = MSC->getIndex();
12824 SDValue Scale = MSC->getScale();
12825 SDValue StoreVal = MSC->getValue();
12826 SDValue BasePtr = MSC->getBasePtr();
12827 SDValue VL = MSC->getVectorLength();
12828 ISD::MemIndexType IndexType = MSC->getIndexType();
12829 SDLoc DL(N);
12830
12831 // Zap scatters with a zero mask.
12833 return Chain;
12834
12835 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12836 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12837 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12838 DL, Ops, MSC->getMemOperand(), IndexType);
12839 }
12840
12841 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12842 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12843 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12844 DL, Ops, MSC->getMemOperand(), IndexType);
12845 }
12846
12847 return SDValue();
12848}
12849
12850SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
12851 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
12852 SDValue Mask = MSC->getMask();
12853 SDValue Chain = MSC->getChain();
12854 SDValue Index = MSC->getIndex();
12855 SDValue Scale = MSC->getScale();
12856 SDValue StoreVal = MSC->getValue();
12857 SDValue BasePtr = MSC->getBasePtr();
12858 ISD::MemIndexType IndexType = MSC->getIndexType();
12859 SDLoc DL(N);
12860
12861 // Zap scatters with a zero mask.
12863 return Chain;
12864
12865 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12866 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12867 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12868 DL, Ops, MSC->getMemOperand(), IndexType,
12869 MSC->isTruncatingStore());
12870 }
12871
12872 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12873 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12874 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12875 DL, Ops, MSC->getMemOperand(), IndexType,
12876 MSC->isTruncatingStore());
12877 }
12878
12879 return SDValue();
12880}
12881
12882SDValue DAGCombiner::visitMSTORE(SDNode *N) {
12883 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
12884 SDValue Mask = MST->getMask();
12885 SDValue Chain = MST->getChain();
12886 SDValue Value = MST->getValue();
12887 SDValue Ptr = MST->getBasePtr();
12888
12889 // Zap masked stores with a zero mask.
12891 return Chain;
12892
12893 // Remove a masked store if base pointers and masks are equal.
12894 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
12895 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
12896 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12897 !MST->getBasePtr().isUndef() &&
12898 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12899 MST1->getMemoryVT().getStoreSize()) ||
12901 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12902 MST->getMemoryVT().getStoreSize())) {
12903 CombineTo(MST1, MST1->getChain());
12904 if (N->getOpcode() != ISD::DELETED_NODE)
12905 AddToWorklist(N);
12906 return SDValue(N, 0);
12907 }
12908 }
12909
12910 // If this is a masked load with an all ones mask, we can use a unmasked load.
12911 // FIXME: Can we do this for indexed, compressing, or truncating stores?
12912 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12913 !MST->isCompressingStore() && !MST->isTruncatingStore())
12914 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12915 MST->getBasePtr(), MST->getPointerInfo(),
12916 MST->getBaseAlign(), MST->getMemOperand()->getFlags(),
12917 MST->getAAInfo());
12918
12919 // Try transforming N to an indexed store.
12920 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12921 return SDValue(N, 0);
12922
12923 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12924 Value.getValueType().isInteger() &&
12926 !cast<ConstantSDNode>(Value)->isOpaque())) {
12927 APInt TruncDemandedBits =
12928 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12930
12931 // See if we can simplify the operation with
12932 // SimplifyDemandedBits, which only works if the value has a single use.
12933 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12934 // Re-visit the store if anything changed and the store hasn't been merged
12935 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12936 // node back to the worklist if necessary, but we also need to re-visit
12937 // the Store node itself.
12938 if (N->getOpcode() != ISD::DELETED_NODE)
12939 AddToWorklist(N);
12940 return SDValue(N, 0);
12941 }
12942 }
12943
12944 // If this is a TRUNC followed by a masked store, fold this into a masked
12945 // truncating store. We can do this even if this is already a masked
12946 // truncstore.
12947 // TODO: Try combine to masked compress store if possiable.
12948 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12949 MST->isUnindexed() && !MST->isCompressingStore() &&
12950 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12951 MST->getMemoryVT(), LegalOperations)) {
12952 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12953 Value.getOperand(0).getValueType());
12954 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12955 MST->getOffset(), Mask, MST->getMemoryVT(),
12956 MST->getMemOperand(), MST->getAddressingMode(),
12957 /*IsTruncating=*/true);
12958 }
12959
12960 return SDValue();
12961}
12962
12963SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12964 auto *SST = cast<VPStridedStoreSDNode>(N);
12965 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12966 // Combine strided stores with unit-stride to a regular VP store.
12967 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12968 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12969 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12970 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12971 SST->getVectorLength(), SST->getMemoryVT(),
12972 SST->getMemOperand(), SST->getAddressingMode(),
12973 SST->isTruncatingStore(), SST->isCompressingStore());
12974 }
12975 return SDValue();
12976}
12977
12978SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
12979 SDLoc DL(N);
12980 SDValue Vec = N->getOperand(0);
12981 SDValue Mask = N->getOperand(1);
12982 SDValue Passthru = N->getOperand(2);
12983 EVT VecVT = Vec.getValueType();
12984
12985 bool HasPassthru = !Passthru.isUndef();
12986
12987 APInt SplatVal;
12988 if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
12989 return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
12990
12991 if (Vec.isUndef() || Mask.isUndef())
12992 return Passthru;
12993
12994 // No need for potentially expensive compress if the mask is constant.
12997 EVT ScalarVT = VecVT.getVectorElementType();
12998 unsigned NumSelected = 0;
12999 unsigned NumElmts = VecVT.getVectorNumElements();
13000 for (unsigned I = 0; I < NumElmts; ++I) {
13001 SDValue MaskI = Mask.getOperand(I);
13002 // We treat undef mask entries as "false".
13003 if (MaskI.isUndef())
13004 continue;
13005
13006 if (TLI.isConstTrueVal(MaskI)) {
13007 SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
13008 DAG.getVectorIdxConstant(I, DL));
13009 Ops.push_back(VecI);
13010 NumSelected++;
13011 }
13012 }
13013 for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
13014 SDValue Val =
13015 HasPassthru
13016 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
13017 DAG.getVectorIdxConstant(Rest, DL))
13018 : DAG.getUNDEF(ScalarVT);
13019 Ops.push_back(Val);
13020 }
13021 return DAG.getBuildVector(VecVT, DL, Ops);
13022 }
13023
13024 return SDValue();
13025}
13026
13027SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
13028 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
13029 SDValue Mask = MGT->getMask();
13030 SDValue Chain = MGT->getChain();
13031 SDValue Index = MGT->getIndex();
13032 SDValue Scale = MGT->getScale();
13033 SDValue BasePtr = MGT->getBasePtr();
13034 SDValue VL = MGT->getVectorLength();
13035 ISD::MemIndexType IndexType = MGT->getIndexType();
13036 SDLoc DL(N);
13037
13038 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
13039 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
13040 return DAG.getGatherVP(
13041 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
13042 Ops, MGT->getMemOperand(), IndexType);
13043 }
13044
13045 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
13046 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
13047 return DAG.getGatherVP(
13048 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
13049 Ops, MGT->getMemOperand(), IndexType);
13050 }
13051
13052 return SDValue();
13053}
13054
13055SDValue DAGCombiner::visitMGATHER(SDNode *N) {
13056 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
13057 SDValue Mask = MGT->getMask();
13058 SDValue Chain = MGT->getChain();
13059 SDValue Index = MGT->getIndex();
13060 SDValue Scale = MGT->getScale();
13061 SDValue PassThru = MGT->getPassThru();
13062 SDValue BasePtr = MGT->getBasePtr();
13063 ISD::MemIndexType IndexType = MGT->getIndexType();
13064 SDLoc DL(N);
13065
13066 // Zap gathers with a zero mask.
13068 return CombineTo(N, PassThru, MGT->getChain());
13069
13070 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
13071 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
13072 return DAG.getMaskedGather(
13073 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
13074 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
13075 }
13076
13077 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
13078 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
13079 return DAG.getMaskedGather(
13080 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
13081 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
13082 }
13083
13084 return SDValue();
13085}
13086
13087SDValue DAGCombiner::visitMLOAD(SDNode *N) {
13088 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
13089 SDValue Mask = MLD->getMask();
13090
13091 // Zap masked loads with a zero mask.
13093 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
13094
13095 // If this is a masked load with an all ones mask, we can use a unmasked load.
13096 // FIXME: Can we do this for indexed, expanding, or extending loads?
13097 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
13098 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
13099 SDValue NewLd = DAG.getLoad(
13100 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
13101 MLD->getPointerInfo(), MLD->getBaseAlign(),
13102 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
13103 return CombineTo(N, NewLd, NewLd.getValue(1));
13104 }
13105
13106 // Try transforming N to an indexed load.
13107 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13108 return SDValue(N, 0);
13109
13110 return SDValue();
13111}
13112
13113SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
13114 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
13115 SDValue Chain = HG->getChain();
13116 SDValue Inc = HG->getInc();
13117 SDValue Mask = HG->getMask();
13118 SDValue BasePtr = HG->getBasePtr();
13119 SDValue Index = HG->getIndex();
13120 SDLoc DL(HG);
13121
13122 EVT MemVT = HG->getMemoryVT();
13123 EVT DataVT = Index.getValueType();
13124 MachineMemOperand *MMO = HG->getMemOperand();
13125 ISD::MemIndexType IndexType = HG->getIndexType();
13126
13128 return Chain;
13129
13130 if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) ||
13131 refineIndexType(Index, IndexType, DataVT, DAG)) {
13132 SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
13133 HG->getScale(), HG->getIntID()};
13134 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
13135 MMO, IndexType);
13136 }
13137
13138 return SDValue();
13139}
13140
13141SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
13142 if (SDValue Res = foldPartialReduceMLAMulOp(N))
13143 return Res;
13144 if (SDValue Res = foldPartialReduceAdd(N))
13145 return Res;
13146 return SDValue();
13147}
13148
13149// partial_reduce_*mla(acc, mul(*ext(a), *ext(b)), splat(1))
13150// -> partial_reduce_*mla(acc, a, b)
13151//
13152// partial_reduce_*mla(acc, mul(*ext(x), splat(C)), splat(1))
13153// -> partial_reduce_*mla(acc, x, splat(C))
13154//
13155// partial_reduce_*mla(acc, sel(p, mul(*ext(a), *ext(b)), splat(0)), splat(1))
13156// -> partial_reduce_*mla(acc, sel(p, a, splat(0)), b)
13157//
13158// partial_reduce_*mla(acc, sel(p, mul(*ext(a), splat(C)), splat(0)), splat(1))
13159// -> partial_reduce_*mla(acc, sel(p, a, splat(0)), splat(C))
13160SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
13161 SDLoc DL(N);
13162 auto *Context = DAG.getContext();
13163 SDValue Acc = N->getOperand(0);
13164 SDValue Op1 = N->getOperand(1);
13165 SDValue Op2 = N->getOperand(2);
13166 unsigned Opc = Op1->getOpcode();
13167
13168 // Handle predication by moving the SELECT into the operand of the MUL.
13169 SDValue Pred;
13170 if (Opc == ISD::VSELECT && (isZeroOrZeroSplat(Op1->getOperand(2)) ||
13171 isZeroOrZeroSplatFP(Op1->getOperand(2)))) {
13172 Pred = Op1->getOperand(0);
13173 Op1 = Op1->getOperand(1);
13174 Opc = Op1->getOpcode();
13175 }
13176
13177 if (Opc != ISD::MUL && Opc != ISD::FMUL && Opc != ISD::SHL)
13178 return SDValue();
13179
13180 SDValue LHS = Op1->getOperand(0);
13181 SDValue RHS = Op1->getOperand(1);
13182
13183 // Try to treat (shl %a, %c) as (mul %a, (1 << %c)) for constant %c.
13184 if (Opc == ISD::SHL) {
13185 APInt C;
13186 if (!ISD::isConstantSplatVector(RHS.getNode(), C))
13187 return SDValue();
13188
13189 RHS =
13190 DAG.getSplatVector(RHS.getValueType(), DL,
13191 DAG.getConstant(APInt(C.getBitWidth(), 1).shl(C), DL,
13192 RHS.getValueType().getScalarType()));
13193 Opc = ISD::MUL;
13194 }
13195
13196 if (!(Opc == ISD::MUL && llvm::isOneOrOneSplat(Op2)) &&
13198 return SDValue();
13199
13200 auto IsIntOrFPExtOpcode = [](unsigned int Opcode) {
13201 return (ISD::isExtOpcode(Opcode) || Opcode == ISD::FP_EXTEND);
13202 };
13203
13204 unsigned LHSOpcode = LHS->getOpcode();
13205 if (!IsIntOrFPExtOpcode(LHSOpcode))
13206 return SDValue();
13207
13208 SDValue LHSExtOp = LHS->getOperand(0);
13209 EVT LHSExtOpVT = LHSExtOp.getValueType();
13210
13211 // When Pred is non-zero, set Op = select(Pred, Op, splat(0)) and freeze
13212 // OtherOp to keep the same semantics when moving the selects into the MUL
13213 // operands.
13214 auto ApplyPredicate = [&](SDValue &Op, SDValue &OtherOp) {
13215 if (Pred) {
13216 EVT OpVT = Op.getValueType();
13217 SDValue Zero = OpVT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, OpVT)
13218 : DAG.getConstant(0, DL, OpVT);
13219 Op = DAG.getSelect(DL, OpVT, Pred, Op, Zero);
13220 OtherOp = DAG.getFreeze(OtherOp);
13221 }
13222 };
13223
13224 // partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
13225 // -> partial_reduce_*mla(acc, x, C)
13226 APInt C;
13227 if (ISD::isConstantSplatVector(RHS.getNode(), C)) {
13228 // TODO: Make use of partial_reduce_sumla here
13229 APInt CTrunc = C.trunc(LHSExtOpVT.getScalarSizeInBits());
13230 unsigned LHSBits = LHS.getValueType().getScalarSizeInBits();
13231 if ((LHSOpcode != ISD::ZERO_EXTEND || CTrunc.zext(LHSBits) != C) &&
13232 (LHSOpcode != ISD::SIGN_EXTEND || CTrunc.sext(LHSBits) != C))
13233 return SDValue();
13234
13235 unsigned NewOpcode = LHSOpcode == ISD::SIGN_EXTEND
13238
13239 // Only perform these combines if the target supports folding
13240 // the extends into the operation.
13242 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13243 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
13244 return SDValue();
13245
13246 SDValue C = DAG.getConstant(CTrunc, DL, LHSExtOpVT);
13247 ApplyPredicate(C, LHSExtOp);
13248 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, LHSExtOp, C);
13249 }
13250
13251 unsigned RHSOpcode = RHS->getOpcode();
13252 if (!IsIntOrFPExtOpcode(RHSOpcode))
13253 return SDValue();
13254
13255 SDValue RHSExtOp = RHS->getOperand(0);
13256 if (LHSExtOpVT != RHSExtOp.getValueType())
13257 return SDValue();
13258
13259 unsigned NewOpc;
13260 if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::SIGN_EXTEND)
13261 NewOpc = ISD::PARTIAL_REDUCE_SMLA;
13262 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13263 NewOpc = ISD::PARTIAL_REDUCE_UMLA;
13264 else if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13266 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::SIGN_EXTEND) {
13268 std::swap(LHSExtOp, RHSExtOp);
13269 } else if (LHSOpcode == ISD::FP_EXTEND && RHSOpcode == ISD::FP_EXTEND) {
13270 NewOpc = ISD::PARTIAL_REDUCE_FMLA;
13271 } else
13272 return SDValue();
13273 // For a 2-stage extend the signedness of both of the extends must match
13274 // If the mul has the same type, there is no outer extend, and thus we
13275 // can simply use the inner extends to pick the result node.
13276 // TODO: extend to handle nonneg zext as sext
13277 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13278 if (Op1.getValueType().getVectorElementType() != AccElemVT &&
13279 NewOpc != N->getOpcode())
13280 return SDValue();
13281
13282 // Only perform these combines if the target supports folding
13283 // the extends into the operation.
13285 NewOpc, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13286 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
13287 return SDValue();
13288
13289 ApplyPredicate(RHSExtOp, LHSExtOp);
13290 return DAG.getNode(NewOpc, DL, N->getValueType(0), Acc, LHSExtOp, RHSExtOp);
13291}
13292
13293// partial.reduce.*mla(acc, *ext(op), splat(1))
13294// -> partial.reduce.*mla(acc, op, splat(trunc(1)))
13295// partial.reduce.sumla(acc, sext(op), splat(1))
13296// -> partial.reduce.smla(acc, op, splat(trunc(1)))
13297//
13298// partial.reduce.*mla(acc, sel(p, *ext(op), splat(0)), splat(1))
13299// -> partial.reduce.*mla(acc, sel(p, op, splat(0)), splat(trunc(1)))
13300SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
13301 SDLoc DL(N);
13302 SDValue Acc = N->getOperand(0);
13303 SDValue Op1 = N->getOperand(1);
13304 SDValue Op2 = N->getOperand(2);
13305
13307 return SDValue();
13308
13309 SDValue Pred;
13310 unsigned Op1Opcode = Op1.getOpcode();
13311 if (Op1Opcode == ISD::VSELECT && (isZeroOrZeroSplat(Op1->getOperand(2)) ||
13312 isZeroOrZeroSplatFP(Op1->getOperand(2)))) {
13313 Pred = Op1->getOperand(0);
13314 Op1 = Op1->getOperand(1);
13315 Op1Opcode = Op1->getOpcode();
13316 }
13317
13318 if (!ISD::isExtOpcode(Op1Opcode) && Op1Opcode != ISD::FP_EXTEND)
13319 return SDValue();
13320
13321 bool Op1IsSigned =
13322 Op1Opcode == ISD::SIGN_EXTEND || Op1Opcode == ISD::FP_EXTEND;
13323 bool NodeIsSigned = N->getOpcode() != ISD::PARTIAL_REDUCE_UMLA;
13324 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13325 if (Op1IsSigned != NodeIsSigned &&
13326 Op1.getValueType().getVectorElementType() != AccElemVT)
13327 return SDValue();
13328
13329 unsigned NewOpcode = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
13331 : Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA
13333
13334 SDValue UnextOp1 = Op1.getOperand(0);
13335 EVT UnextOp1VT = UnextOp1.getValueType();
13336 auto *Context = DAG.getContext();
13338 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13339 TLI.getTypeToTransformTo(*Context, UnextOp1VT)))
13340 return SDValue();
13341
13342 SDValue Constant = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
13343 ? DAG.getConstantFP(1, DL, UnextOp1VT)
13344 : DAG.getConstant(1, DL, UnextOp1VT);
13345
13346 if (Pred) {
13347 SDValue Zero = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
13348 ? DAG.getConstantFP(0, DL, UnextOp1VT)
13349 : DAG.getConstant(0, DL, UnextOp1VT);
13350 Constant = DAG.getSelect(DL, UnextOp1VT, Pred, Constant, Zero);
13351 }
13352 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, UnextOp1,
13353 Constant);
13354}
13355
13356SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
13357 auto *SLD = cast<VPStridedLoadSDNode>(N);
13358 EVT EltVT = SLD->getValueType(0).getVectorElementType();
13359 // Combine strided loads with unit-stride to a regular VP load.
13360 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
13361 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
13362 SDValue NewLd = DAG.getLoadVP(
13363 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
13364 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
13365 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
13366 SLD->getMemOperand(), SLD->isExpandingLoad());
13367 return CombineTo(N, NewLd, NewLd.getValue(1));
13368 }
13369 return SDValue();
13370}
13371
13372/// A vector select of 2 constant vectors can be simplified to math/logic to
13373/// avoid a variable select instruction and possibly avoid constant loads.
13374SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
13375 SDValue Cond = N->getOperand(0);
13376 SDValue N1 = N->getOperand(1);
13377 SDValue N2 = N->getOperand(2);
13378 EVT VT = N->getValueType(0);
13379 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
13383 return SDValue();
13384
13385 // Check if we can use the condition value to increment/decrement a single
13386 // constant value. This simplifies a select to an add and removes a constant
13387 // load/materialization from the general case.
13388 bool AllAddOne = true;
13389 bool AllSubOne = true;
13390 unsigned Elts = VT.getVectorNumElements();
13391 for (unsigned i = 0; i != Elts; ++i) {
13392 SDValue N1Elt = N1.getOperand(i);
13393 SDValue N2Elt = N2.getOperand(i);
13394 if (N1Elt.isUndef())
13395 continue;
13396 // N2 should not contain undef values since it will be reused in the fold.
13397 if (N2Elt.isUndef() || N1Elt.getValueType() != N2Elt.getValueType()) {
13398 AllAddOne = false;
13399 AllSubOne = false;
13400 break;
13401 }
13402
13403 const APInt &C1 = N1Elt->getAsAPIntVal();
13404 const APInt &C2 = N2Elt->getAsAPIntVal();
13405 if (C1 != C2 + 1)
13406 AllAddOne = false;
13407 if (C1 != C2 - 1)
13408 AllSubOne = false;
13409 }
13410
13411 // Further simplifications for the extra-special cases where the constants are
13412 // all 0 or all -1 should be implemented as folds of these patterns.
13413 SDLoc DL(N);
13414 if (AllAddOne || AllSubOne) {
13415 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
13416 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
13417 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
13418 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
13419 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
13420 }
13421
13422 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
13423 APInt Pow2C;
13424 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
13425 isNullOrNullSplat(N2)) {
13426 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
13427 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
13428 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
13429 }
13430
13432 return V;
13433
13434 // The general case for select-of-constants:
13435 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
13436 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
13437 // leave that to a machine-specific pass.
13438 return SDValue();
13439}
13440
13441SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
13442 SDValue N0 = N->getOperand(0);
13443 SDValue N1 = N->getOperand(1);
13444 SDValue N2 = N->getOperand(2);
13445 SDLoc DL(N);
13446
13447 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13448 return V;
13449
13451 return V;
13452
13453 return SDValue();
13454}
13455
13457 SDValue FVal,
13458 const TargetLowering &TLI,
13459 SelectionDAG &DAG,
13460 const SDLoc &DL) {
13461 EVT VT = TVal.getValueType();
13462 if (!TLI.isTypeLegal(VT))
13463 return SDValue();
13464
13465 EVT CondVT = Cond.getValueType();
13466 assert(CondVT.isVector() && "Vector select expects a vector selector!");
13467
13468 bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode());
13469 bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode());
13470 bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode());
13471 bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode());
13472
13473 // no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return
13474 if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne)
13475 return SDValue();
13476
13477 // select Cond, 0, 0 → 0
13478 if (IsTAllZero && IsFAllZero) {
13479 return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, VT)
13480 : DAG.getConstant(0, DL, VT);
13481 }
13482
13483 // check select(setgt lhs, -1), 1, -1 --> or (sra lhs, bitwidth - 1), 1
13484 APInt TValAPInt;
13485 if (Cond.getOpcode() == ISD::SETCC &&
13486 Cond.getOperand(2) == DAG.getCondCode(ISD::SETGT) &&
13487 Cond.getOperand(0).getValueType() == VT && VT.isSimple() &&
13488 ISD::isConstantSplatVector(TVal.getNode(), TValAPInt) &&
13489 TValAPInt.isOne() &&
13490 ISD::isConstantSplatVectorAllOnes(Cond.getOperand(1).getNode()) &&
13492 return SDValue();
13493 }
13494
13495 // To use the condition operand as a bitwise mask, it must have elements that
13496 // are the same size as the select elements. i.e, the condition operand must
13497 // have already been promoted from the IR select condition type <N x i1>.
13498 // Don't check if the types themselves are equal because that excludes
13499 // vector floating-point selects.
13500 if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
13501 return SDValue();
13502
13503 // Cond value must be 'sign splat' to be converted to a logical op.
13504 if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
13505 return SDValue();
13506
13507 // Try inverting Cond and swapping T/F if it gives all-ones/all-zeros form
13508 if (!IsTAllOne && !IsFAllZero && Cond.hasOneUse() &&
13509 Cond.getOpcode() == ISD::SETCC &&
13510 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
13511 CondVT) {
13512 if (IsTAllZero || IsFAllOne) {
13513 SDValue CC = Cond.getOperand(2);
13515 cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType());
13516 Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1),
13517 InverseCC);
13518 std::swap(TVal, FVal);
13519 std::swap(IsTAllOne, IsFAllOne);
13520 std::swap(IsTAllZero, IsFAllZero);
13521 }
13522 }
13523
13525 "Select condition no longer all-sign bits");
13526
13527 // select Cond, -1, 0 → bitcast Cond
13528 if (IsTAllOne && IsFAllZero)
13529 return DAG.getBitcast(VT, Cond);
13530
13531 // select Cond, -1, x → or Cond, x
13532 if (IsTAllOne) {
13533 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13534 SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X);
13535 return DAG.getBitcast(VT, Or);
13536 }
13537
13538 // select Cond, x, 0 → and Cond, x
13539 if (IsFAllZero) {
13540 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal));
13541 SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X);
13542 return DAG.getBitcast(VT, And);
13543 }
13544
13545 // select Cond, 0, x -> and not(Cond), x
13546 if (IsTAllZero &&
13548 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13549 SDValue And =
13550 DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
13551 return DAG.getBitcast(VT, And);
13552 }
13553
13554 return SDValue();
13555}
13556
13557SDValue DAGCombiner::visitVSELECT(SDNode *N) {
13558 SDValue N0 = N->getOperand(0);
13559 SDValue N1 = N->getOperand(1);
13560 SDValue N2 = N->getOperand(2);
13561 EVT VT = N->getValueType(0);
13562 SDLoc DL(N);
13563
13564 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13565 return V;
13566
13568 return V;
13569
13570 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
13571 if (!TLI.isTargetCanonicalSelect(N))
13572 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
13573 return DAG.getSelect(DL, VT, F, N2, N1);
13574
13575 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
13576 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
13579 TLI.getBooleanContents(N0.getValueType()) ==
13581 return DAG.getNode(
13582 ISD::ADD, DL, N1.getValueType(), N2,
13583 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
13584 }
13585
13586 // Canonicalize integer abs.
13587 // vselect (setg[te] X, 0), X, -X ->
13588 // vselect (setgt X, -1), X, -X ->
13589 // vselect (setl[te] X, 0), -X, X ->
13590 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
13591 if (N0.getOpcode() == ISD::SETCC) {
13592 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
13594 bool isAbs = false;
13595 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
13596
13597 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
13598 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
13599 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
13601 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
13602 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
13604
13605 if (isAbs) {
13607 return DAG.getNode(ISD::ABS, DL, VT, LHS);
13608
13609 SDValue Shift = DAG.getNode(
13610 ISD::SRA, DL, VT, LHS,
13611 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
13612 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
13613 AddToWorklist(Shift.getNode());
13614 AddToWorklist(Add.getNode());
13615 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
13616 }
13617
13618 // vselect x, y (fcmp lt x, y) -> fminnum x, y
13619 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
13620 //
13621 // This is OK if we don't care about what happens if either operand is a
13622 // NaN.
13623 //
13624 if (N0.hasOneUse() &&
13625 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
13626 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
13627 return FMinMax;
13628 }
13629
13630 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13631 return S;
13632 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13633 return S;
13634
13635 // If this select has a condition (setcc) with narrower operands than the
13636 // select, try to widen the compare to match the select width.
13637 // TODO: This should be extended to handle any constant.
13638 // TODO: This could be extended to handle non-loading patterns, but that
13639 // requires thorough testing to avoid regressions.
13640 if (isNullOrNullSplat(RHS)) {
13641 EVT NarrowVT = LHS.getValueType();
13643 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
13644 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
13645 unsigned WideWidth = WideVT.getScalarSizeInBits();
13646 bool IsSigned = isSignedIntSetCC(CC);
13647 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13648 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
13649 SetCCWidth != 1 && SetCCWidth < WideWidth &&
13650 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
13651 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
13652 // Both compare operands can be widened for free. The LHS can use an
13653 // extended load, and the RHS is a constant:
13654 // vselect (ext (setcc load(X), C)), N1, N2 -->
13655 // vselect (setcc extload(X), C'), N1, N2
13656 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13657 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
13658 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
13659 EVT WideSetCCVT = getSetCCResultType(WideVT);
13660 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
13661 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
13662 }
13663 }
13664
13665 if (SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2, CC, DL))
13666 return ABD;
13667
13668 // Match VSELECTs into add with unsigned saturation.
13669 if (hasOperation(ISD::UADDSAT, VT)) {
13670 // Check if one of the arms of the VSELECT is vector with all bits set.
13671 // If it's on the left side invert the predicate to simplify logic below.
13672 SDValue Other;
13673 ISD::CondCode SatCC = CC;
13675 Other = N2;
13676 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13677 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
13678 Other = N1;
13679 }
13680
13681 if (Other && Other.getOpcode() == ISD::ADD) {
13682 SDValue CondLHS = LHS, CondRHS = RHS;
13683 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13684
13685 // Canonicalize condition operands.
13686 if (SatCC == ISD::SETUGE) {
13687 std::swap(CondLHS, CondRHS);
13688 SatCC = ISD::SETULE;
13689 }
13690
13691 // We can test against either of the addition operands.
13692 // x <= x+y ? x+y : ~0 --> uaddsat x, y
13693 // x+y >= x ? x+y : ~0 --> uaddsat x, y
13694 if (SatCC == ISD::SETULE && Other == CondRHS &&
13695 (OpLHS == CondLHS || OpRHS == CondLHS))
13696 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13697
13698 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
13699 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13700 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
13701 CondLHS == OpLHS) {
13702 // If the RHS is a constant we have to reverse the const
13703 // canonicalization.
13704 // x >= ~C ? x+C : ~0 --> uaddsat x, C
13705 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13706 return Cond->getAPIntValue() == ~Op->getAPIntValue();
13707 };
13708 if (SatCC == ISD::SETULE &&
13709 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
13710 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13711 }
13712 }
13713 }
13714
13715 // Match VSELECTs into sub with unsigned saturation.
13716 if (hasOperation(ISD::USUBSAT, VT)) {
13717 // Check if one of the arms of the VSELECT is a zero vector. If it's on
13718 // the left side invert the predicate to simplify logic below.
13719 SDValue Other;
13720 ISD::CondCode SatCC = CC;
13722 Other = N2;
13723 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13725 Other = N1;
13726 }
13727
13728 // zext(x) >= y ? trunc(zext(x) - y) : 0
13729 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13730 // zext(x) > y ? trunc(zext(x) - y) : 0
13731 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13732 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
13733 Other.getOperand(0).getOpcode() == ISD::SUB &&
13734 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
13735 SDValue OpLHS = Other.getOperand(0).getOperand(0);
13736 SDValue OpRHS = Other.getOperand(0).getOperand(1);
13737 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
13738 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
13739 DAG, DL))
13740 return R;
13741 }
13742
13743 if (Other && Other.getNumOperands() == 2) {
13744 SDValue CondRHS = RHS;
13745 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13746
13747 if (OpLHS == LHS) {
13748 // Look for a general sub with unsigned saturation first.
13749 // x >= y ? x-y : 0 --> usubsat x, y
13750 // x > y ? x-y : 0 --> usubsat x, y
13751 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
13752 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
13753 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13754
13755 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13756 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13757 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
13758 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13759 // If the RHS is a constant we have to reverse the const
13760 // canonicalization.
13761 // x > C-1 ? x+-C : 0 --> usubsat x, C
13762 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13763 return (!Op && !Cond) ||
13764 (Op && Cond &&
13765 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
13766 };
13767 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
13768 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
13769 /*AllowUndefs*/ true)) {
13770 OpRHS = DAG.getNegative(OpRHS, DL, VT);
13771 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13772 }
13773
13774 // Another special case: If C was a sign bit, the sub has been
13775 // canonicalized into a xor.
13776 // FIXME: Would it be better to use computeKnownBits to
13777 // determine whether it's safe to decanonicalize the xor?
13778 // x s< 0 ? x^C : 0 --> usubsat x, C
13779 APInt SplatValue;
13780 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
13781 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
13783 SplatValue.isSignMask()) {
13784 // Note that we have to rebuild the RHS constant here to
13785 // ensure we don't rely on particular values of undef lanes.
13786 OpRHS = DAG.getConstant(SplatValue, DL, VT);
13787 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13788 }
13789 }
13790 }
13791 }
13792 }
13793 }
13794
13795 // (vselect (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
13796 // (vselect (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
13797 if (SDValue UMin = foldSelectToUMin(LHS, RHS, N1, N2, CC, DL))
13798 return UMin;
13799 }
13800
13801 if (SimplifySelectOps(N, N1, N2))
13802 return SDValue(N, 0); // Don't revisit N.
13803
13804 // Fold (vselect all_ones, N1, N2) -> N1
13806 return N1;
13807 // Fold (vselect all_zeros, N1, N2) -> N2
13809 return N2;
13810
13811 // The ConvertSelectToConcatVector function is assuming both the above
13812 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
13813 // and addressed.
13814 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
13817 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
13818 return CV;
13819 }
13820
13821 if (SDValue V = foldVSelectOfConstants(N))
13822 return V;
13823
13824 if (hasOperation(ISD::SRA, VT))
13826 return V;
13827
13829 return SDValue(N, 0);
13830
13831 if (SDValue V = combineVSelectWithAllOnesOrZeros(N0, N1, N2, TLI, DAG, DL))
13832 return V;
13833
13834 return SDValue();
13835}
13836
13837SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
13838 SDValue N0 = N->getOperand(0);
13839 SDValue N1 = N->getOperand(1);
13840 SDValue N2 = N->getOperand(2);
13841 SDValue N3 = N->getOperand(3);
13842 SDValue N4 = N->getOperand(4);
13843 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
13844 SDLoc DL(N);
13845
13846 // fold select_cc lhs, rhs, x, x, cc -> x
13847 if (N2 == N3)
13848 return N2;
13849
13850 // select_cc bool, 0, x, y, seteq -> select bool, y, x
13851 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
13852 isNullConstant(N1))
13853 return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
13854
13855 // Determine if the condition we're dealing with is constant
13856 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
13857 CC, DL, false)) {
13858 AddToWorklist(SCC.getNode());
13859
13860 // cond always true -> true val
13861 // cond always false -> false val
13862 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
13863 return SCCC->isZero() ? N3 : N2;
13864
13865 // When the condition is UNDEF, just return the first operand. This is
13866 // coherent the DAG creation, no setcc node is created in this case
13867 if (SCC->isUndef())
13868 return N2;
13869
13870 // Fold to a simpler select_cc
13871 if (SCC.getOpcode() == ISD::SETCC) {
13872 return DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(),
13873 SCC.getOperand(0), SCC.getOperand(1), N2, N3,
13874 SCC.getOperand(2), SCC->getFlags());
13875 }
13876 }
13877
13878 // If we can fold this based on the true/false value, do so.
13879 if (SimplifySelectOps(N, N2, N3))
13880 return SDValue(N, 0); // Don't revisit N.
13881
13882 // fold select_cc into other things, such as min/max/abs
13883 return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
13884}
13885
13886SDValue DAGCombiner::visitSETCC(SDNode *N) {
13887 // setcc is very commonly used as an argument to brcond. This pattern
13888 // also lend itself to numerous combines and, as a result, it is desired
13889 // we keep the argument to a brcond as a setcc as much as possible.
13890 bool PreferSetCC =
13891 N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
13892
13893 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13894 EVT VT = N->getValueType(0);
13895 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13896 SDLoc DL(N);
13897
13898 if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
13899 // If we prefer to have a setcc, and we don't, we'll try our best to
13900 // recreate one using rebuildSetCC.
13901 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
13902 SDValue NewSetCC = rebuildSetCC(Combined);
13903
13904 // We don't have anything interesting to combine to.
13905 if (NewSetCC.getNode() == N)
13906 return SDValue();
13907
13908 if (NewSetCC)
13909 return NewSetCC;
13910 }
13911 return Combined;
13912 }
13913
13914 // Optimize
13915 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
13916 // or
13917 // 2) (icmp eq/ne X, (rotate X, C1))
13918 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
13919 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
13920 // Then:
13921 // If C1 is a power of 2, then the rotate and shift+and versions are
13922 // equivilent, so we can interchange them depending on target preference.
13923 // Otherwise, if we have the shift+and version we can interchange srl/shl
13924 // which inturn affects the constant C0. We can use this to get better
13925 // constants again determined by target preference.
13926 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
13927 auto IsAndWithShift = [](SDValue A, SDValue B) {
13928 return A.getOpcode() == ISD::AND &&
13929 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
13930 A.getOperand(0) == B.getOperand(0);
13931 };
13932 auto IsRotateWithOp = [](SDValue A, SDValue B) {
13933 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
13934 B.getOperand(0) == A;
13935 };
13936 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
13937 bool IsRotate = false;
13938
13939 // Find either shift+and or rotate pattern.
13940 if (IsAndWithShift(N0, N1)) {
13941 AndOrOp = N0;
13942 ShiftOrRotate = N1;
13943 } else if (IsAndWithShift(N1, N0)) {
13944 AndOrOp = N1;
13945 ShiftOrRotate = N0;
13946 } else if (IsRotateWithOp(N0, N1)) {
13947 IsRotate = true;
13948 AndOrOp = N0;
13949 ShiftOrRotate = N1;
13950 } else if (IsRotateWithOp(N1, N0)) {
13951 IsRotate = true;
13952 AndOrOp = N1;
13953 ShiftOrRotate = N0;
13954 }
13955
13956 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
13957 (IsRotate || AndOrOp.hasOneUse())) {
13958 EVT OpVT = N0.getValueType();
13959 // Get constant shift/rotate amount and possibly mask (if its shift+and
13960 // variant).
13961 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
13962 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
13963 /*AllowTrunc*/ false);
13964 if (CNode == nullptr)
13965 return std::nullopt;
13966 return CNode->getAPIntValue();
13967 };
13968 std::optional<APInt> AndCMask =
13969 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
13970 std::optional<APInt> ShiftCAmt =
13971 GetAPIntValue(ShiftOrRotate.getOperand(1));
13972 unsigned NumBits = OpVT.getScalarSizeInBits();
13973
13974 // We found constants.
13975 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13976 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
13977 // Check that the constants meet the constraints.
13978 bool CanTransform = IsRotate;
13979 if (!CanTransform) {
13980 // Check that mask and shift compliment eachother
13981 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
13982 // Check that we are comparing all bits
13983 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13984 // Check that the and mask is correct for the shift
13985 CanTransform &=
13986 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
13987 }
13988
13989 // See if target prefers another shift/rotate opcode.
13990 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
13991 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13992 // Transform is valid and we have a new preference.
13993 if (CanTransform && NewShiftOpc != ShiftOpc) {
13994 SDValue NewShiftOrRotate =
13995 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
13996 ShiftOrRotate.getOperand(1));
13997 SDValue NewAndOrOp = SDValue();
13998
13999 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
14000 APInt NewMask =
14001 NewShiftOpc == ISD::SHL
14002 ? APInt::getHighBitsSet(NumBits,
14003 NumBits - ShiftCAmt->getZExtValue())
14004 : APInt::getLowBitsSet(NumBits,
14005 NumBits - ShiftCAmt->getZExtValue());
14006 NewAndOrOp =
14007 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
14008 DAG.getConstant(NewMask, DL, OpVT));
14009 } else {
14010 NewAndOrOp = ShiftOrRotate.getOperand(0);
14011 }
14012
14013 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
14014 }
14015 }
14016 }
14017 }
14018 return SDValue();
14019}
14020
14021SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
14022 SDValue LHS = N->getOperand(0);
14023 SDValue RHS = N->getOperand(1);
14024 SDValue Carry = N->getOperand(2);
14025 SDValue Cond = N->getOperand(3);
14026
14027 // If Carry is false, fold to a regular SETCC.
14028 if (isNullConstant(Carry))
14029 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
14030
14031 return SDValue();
14032}
14033
14034/// Check if N satisfies:
14035/// N is used once.
14036/// N is a Load.
14037/// The load is compatible with ExtOpcode. It means
14038/// If load has explicit zero/sign extension, ExpOpcode must have the same
14039/// extension.
14040/// Otherwise returns true.
14041static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
14042 if (!N.hasOneUse())
14043 return false;
14044
14045 if (!isa<LoadSDNode>(N))
14046 return false;
14047
14048 LoadSDNode *Load = cast<LoadSDNode>(N);
14049 ISD::LoadExtType LoadExt = Load->getExtensionType();
14050 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
14051 return true;
14052
14053 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
14054 // extension.
14055 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
14056 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
14057 return false;
14058
14059 return true;
14060}
14061
14062/// Fold
14063/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
14064/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
14065/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
14066/// This function is called by the DAGCombiner when visiting sext/zext/aext
14067/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
14069 SelectionDAG &DAG, const SDLoc &DL,
14070 CombineLevel Level) {
14071 unsigned Opcode = N->getOpcode();
14072 SDValue N0 = N->getOperand(0);
14073 EVT VT = N->getValueType(0);
14074 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
14075 Opcode == ISD::ANY_EXTEND) &&
14076 "Expected EXTEND dag node in input!");
14077
14078 SDValue Cond, Op1, Op2;
14080 m_Value(Op2)))))
14081 return SDValue();
14082
14083 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
14084 return SDValue();
14085
14086 auto ExtLoadOpcode = ISD::EXTLOAD;
14087 if (Opcode == ISD::SIGN_EXTEND)
14088 ExtLoadOpcode = ISD::SEXTLOAD;
14089 else if (Opcode == ISD::ZERO_EXTEND)
14090 ExtLoadOpcode = ISD::ZEXTLOAD;
14091
14092 // Illegal VSELECT may ISel fail if happen after legalization (DAG
14093 // Combine2), so we should conservatively check the OperationAction.
14094 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
14095 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
14096 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
14097 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
14098 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
14100 return SDValue();
14101
14102 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
14103 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
14104 return DAG.getSelect(DL, VT, Cond, Ext1, Ext2);
14105}
14106
14107/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
14108/// a build_vector of constants.
14109/// This function is called by the DAGCombiner when visiting sext/zext/aext
14110/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
14111/// Vector extends are not folded if operations are legal; this is to
14112/// avoid introducing illegal build_vector dag nodes.
14114 const TargetLowering &TLI,
14115 SelectionDAG &DAG, bool LegalTypes) {
14116 unsigned Opcode = N->getOpcode();
14117 SDValue N0 = N->getOperand(0);
14118 EVT VT = N->getValueType(0);
14119
14120 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
14121 "Expected EXTEND dag node in input!");
14122
14123 // fold (sext c1) -> c1
14124 // fold (zext c1) -> c1
14125 // fold (aext c1) -> c1
14126 if (isa<ConstantSDNode>(N0))
14127 return DAG.getNode(Opcode, DL, VT, N0);
14128
14129 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
14130 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
14131 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
14132 if (N0->getOpcode() == ISD::SELECT) {
14133 SDValue Op1 = N0->getOperand(1);
14134 SDValue Op2 = N0->getOperand(2);
14135 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
14136 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
14137 // For any_extend, choose sign extension of the constants to allow a
14138 // possible further transform to sign_extend_inreg.i.e.
14139 //
14140 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
14141 // t2: i64 = any_extend t1
14142 // -->
14143 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
14144 // -->
14145 // t4: i64 = sign_extend_inreg t3
14146 unsigned FoldOpc = Opcode;
14147 if (FoldOpc == ISD::ANY_EXTEND)
14148 FoldOpc = ISD::SIGN_EXTEND;
14149 return DAG.getSelect(DL, VT, N0->getOperand(0),
14150 DAG.getNode(FoldOpc, DL, VT, Op1),
14151 DAG.getNode(FoldOpc, DL, VT, Op2));
14152 }
14153 }
14154
14155 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
14156 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
14157 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
14158 EVT SVT = VT.getScalarType();
14159 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
14161 return SDValue();
14162
14163 // We can fold this node into a build_vector.
14164 unsigned VTBits = SVT.getSizeInBits();
14165 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
14167 unsigned NumElts = VT.getVectorNumElements();
14168
14169 for (unsigned i = 0; i != NumElts; ++i) {
14170 SDValue Op = N0.getOperand(i);
14171 if (Op.isUndef()) {
14172 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
14173 Elts.push_back(DAG.getUNDEF(SVT));
14174 else
14175 Elts.push_back(DAG.getConstant(0, DL, SVT));
14176 continue;
14177 }
14178
14179 SDLoc DL(Op);
14180 // Get the constant value and if needed trunc it to the size of the type.
14181 // Nodes like build_vector might have constants wider than the scalar type.
14182 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
14183 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
14184 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
14185 else
14186 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
14187 }
14188
14189 return DAG.getBuildVector(VT, DL, Elts);
14190}
14191
14192// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
14193// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
14194// transformation. Returns true if extension are possible and the above
14195// mentioned transformation is profitable.
14197 unsigned ExtOpc,
14198 SmallVectorImpl<SDNode *> &ExtendNodes,
14199 const TargetLowering &TLI) {
14200 bool HasCopyToRegUses = false;
14201 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
14202 for (SDUse &Use : N0->uses()) {
14203 SDNode *User = Use.getUser();
14204 if (User == N)
14205 continue;
14206 if (Use.getResNo() != N0.getResNo())
14207 continue;
14208 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
14209 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
14211 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
14212 // Sign bits will be lost after a zext.
14213 return false;
14214 bool Add = false;
14215 for (unsigned i = 0; i != 2; ++i) {
14216 SDValue UseOp = User->getOperand(i);
14217 if (UseOp == N0)
14218 continue;
14219 if (!isa<ConstantSDNode>(UseOp))
14220 return false;
14221 Add = true;
14222 }
14223 if (Add)
14224 ExtendNodes.push_back(User);
14225 continue;
14226 }
14227 // If truncates aren't free and there are users we can't
14228 // extend, it isn't worthwhile.
14229 if (!isTruncFree)
14230 return false;
14231 // Remember if this value is live-out.
14232 if (User->getOpcode() == ISD::CopyToReg)
14233 HasCopyToRegUses = true;
14234 }
14235
14236 if (HasCopyToRegUses) {
14237 bool BothLiveOut = false;
14238 for (SDUse &Use : N->uses()) {
14239 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
14240 BothLiveOut = true;
14241 break;
14242 }
14243 }
14244 if (BothLiveOut)
14245 // Both unextended and extended values are live out. There had better be
14246 // a good reason for the transformation.
14247 return !ExtendNodes.empty();
14248 }
14249 return true;
14250}
14251
14252void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
14253 SDValue OrigLoad, SDValue ExtLoad,
14254 ISD::NodeType ExtType) {
14255 // Extend SetCC uses if necessary.
14256 SDLoc DL(ExtLoad);
14257 for (SDNode *SetCC : SetCCs) {
14259
14260 for (unsigned j = 0; j != 2; ++j) {
14261 SDValue SOp = SetCC->getOperand(j);
14262 if (SOp == OrigLoad)
14263 Ops.push_back(ExtLoad);
14264 else
14265 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
14266 }
14267
14268 Ops.push_back(SetCC->getOperand(2));
14269 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
14270 }
14271}
14272
14273// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
14274SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
14275 SDValue N0 = N->getOperand(0);
14276 EVT DstVT = N->getValueType(0);
14277 EVT SrcVT = N0.getValueType();
14278
14279 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14280 N->getOpcode() == ISD::ZERO_EXTEND) &&
14281 "Unexpected node type (not an extend)!");
14282
14283 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
14284 // For example, on a target with legal v4i32, but illegal v8i32, turn:
14285 // (v8i32 (sext (v8i16 (load x))))
14286 // into:
14287 // (v8i32 (concat_vectors (v4i32 (sextload x)),
14288 // (v4i32 (sextload (x + 16)))))
14289 // Where uses of the original load, i.e.:
14290 // (v8i16 (load x))
14291 // are replaced with:
14292 // (v8i16 (truncate
14293 // (v8i32 (concat_vectors (v4i32 (sextload x)),
14294 // (v4i32 (sextload (x + 16)))))))
14295 //
14296 // This combine is only applicable to illegal, but splittable, vectors.
14297 // All legal types, and illegal non-vector types, are handled elsewhere.
14298 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
14299 //
14300 if (N0->getOpcode() != ISD::LOAD)
14301 return SDValue();
14302
14303 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14304
14305 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
14306 !N0.hasOneUse() || !LN0->isSimple() ||
14307 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
14309 return SDValue();
14310
14312 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
14313 return SDValue();
14314
14315 ISD::LoadExtType ExtType =
14316 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14317
14318 // Try to split the vector types to get down to legal types.
14319 EVT SplitSrcVT = SrcVT;
14320 EVT SplitDstVT = DstVT;
14321 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
14322 SplitSrcVT.getVectorNumElements() > 1) {
14323 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
14324 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
14325 }
14326
14327 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
14328 return SDValue();
14329
14330 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
14331
14332 SDLoc DL(N);
14333 const unsigned NumSplits =
14334 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
14335 const unsigned Stride = SplitSrcVT.getStoreSize();
14338
14339 SDValue BasePtr = LN0->getBasePtr();
14340 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
14341 const unsigned Offset = Idx * Stride;
14342
14344 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
14345 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
14346 SplitSrcVT, LN0->getBaseAlign(),
14347 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14348
14349 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
14350
14351 Loads.push_back(SplitLoad.getValue(0));
14352 Chains.push_back(SplitLoad.getValue(1));
14353 }
14354
14355 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
14356 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
14357
14358 // Simplify TF.
14359 AddToWorklist(NewChain.getNode());
14360
14361 CombineTo(N, NewValue);
14362
14363 // Replace uses of the original load (before extension)
14364 // with a truncate of the concatenated sextloaded vectors.
14365 SDValue Trunc =
14366 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
14367 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
14368 CombineTo(N0.getNode(), Trunc, NewChain);
14369 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14370}
14371
14372// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14373// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14374SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
14375 assert(N->getOpcode() == ISD::ZERO_EXTEND);
14376 EVT VT = N->getValueType(0);
14377 EVT OrigVT = N->getOperand(0).getValueType();
14378 if (TLI.isZExtFree(OrigVT, VT))
14379 return SDValue();
14380
14381 // and/or/xor
14382 SDValue N0 = N->getOperand(0);
14383 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
14384 N0.getOperand(1).getOpcode() != ISD::Constant ||
14385 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
14386 return SDValue();
14387
14388 // shl/shr
14389 SDValue N1 = N0->getOperand(0);
14390 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
14391 N1.getOperand(1).getOpcode() != ISD::Constant ||
14392 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
14393 return SDValue();
14394
14395 // load
14396 if (!isa<LoadSDNode>(N1.getOperand(0)))
14397 return SDValue();
14398 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
14399 EVT MemVT = Load->getMemoryVT();
14400 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
14401 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
14402 return SDValue();
14403
14404
14405 // If the shift op is SHL, the logic op must be AND, otherwise the result
14406 // will be wrong.
14407 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
14408 return SDValue();
14409
14410 if (!N0.hasOneUse() || !N1.hasOneUse())
14411 return SDValue();
14412
14414 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
14415 ISD::ZERO_EXTEND, SetCCs, TLI))
14416 return SDValue();
14417
14418 // Actually do the transformation.
14419 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
14420 Load->getChain(), Load->getBasePtr(),
14421 Load->getMemoryVT(), Load->getMemOperand());
14422
14423 SDLoc DL1(N1);
14424 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
14425 N1.getOperand(1));
14426
14427 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14428 SDLoc DL0(N0);
14429 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
14430 DAG.getConstant(Mask, DL0, VT));
14431
14432 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14433 CombineTo(N, And);
14434 if (SDValue(Load, 0).hasOneUse()) {
14435 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
14436 } else {
14437 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
14438 Load->getValueType(0), ExtLoad);
14439 CombineTo(Load, Trunc, ExtLoad.getValue(1));
14440 }
14441
14442 // N0 is dead at this point.
14443 recursivelyDeleteUnusedNodes(N0.getNode());
14444
14445 return SDValue(N,0); // Return N so it doesn't get rechecked!
14446}
14447
14448/// If we're narrowing or widening the result of a vector select and the final
14449/// size is the same size as a setcc (compare) feeding the select, then try to
14450/// apply the cast operation to the select's operands because matching vector
14451/// sizes for a select condition and other operands should be more efficient.
14452SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
14453 unsigned CastOpcode = Cast->getOpcode();
14454 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
14455 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
14456 CastOpcode == ISD::FP_ROUND) &&
14457 "Unexpected opcode for vector select narrowing/widening");
14458
14459 // We only do this transform before legal ops because the pattern may be
14460 // obfuscated by target-specific operations after legalization. Do not create
14461 // an illegal select op, however, because that may be difficult to lower.
14462 EVT VT = Cast->getValueType(0);
14463 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
14464 return SDValue();
14465
14466 SDValue VSel = Cast->getOperand(0);
14467 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
14468 VSel.getOperand(0).getOpcode() != ISD::SETCC)
14469 return SDValue();
14470
14471 // Does the setcc have the same vector size as the casted select?
14472 SDValue SetCC = VSel.getOperand(0);
14473 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
14474 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
14475 return SDValue();
14476
14477 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
14478 SDValue A = VSel.getOperand(1);
14479 SDValue B = VSel.getOperand(2);
14480 SDValue CastA, CastB;
14481 SDLoc DL(Cast);
14482 if (CastOpcode == ISD::FP_ROUND) {
14483 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
14484 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
14485 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
14486 } else {
14487 CastA = DAG.getNode(CastOpcode, DL, VT, A);
14488 CastB = DAG.getNode(CastOpcode, DL, VT, B);
14489 }
14490 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
14491}
14492
14493// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14494// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14496 const TargetLowering &TLI, EVT VT,
14497 bool LegalOperations, SDNode *N,
14498 SDValue N0, ISD::LoadExtType ExtLoadType) {
14499 SDNode *N0Node = N0.getNode();
14500 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
14501 : ISD::isZEXTLoad(N0Node);
14502 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
14503 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
14504 return SDValue();
14505
14506 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14507 EVT MemVT = LN0->getMemoryVT();
14508 if ((LegalOperations || !LN0->isSimple() ||
14509 VT.isVector()) &&
14510 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
14511 return SDValue();
14512
14513 SDValue ExtLoad =
14514 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14515 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
14516 Combiner.CombineTo(N, ExtLoad);
14517 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14518 if (LN0->use_empty())
14519 Combiner.recursivelyDeleteUnusedNodes(LN0);
14520 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14521}
14522
14523// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14524// Only generate vector extloads when 1) they're legal, and 2) they are
14525// deemed desirable by the target. NonNegZExt can be set to true if a zero
14526// extend has the nonneg flag to allow use of sextload if profitable.
14528 const TargetLowering &TLI, EVT VT,
14529 bool LegalOperations, SDNode *N, SDValue N0,
14530 ISD::LoadExtType ExtLoadType,
14531 ISD::NodeType ExtOpc,
14532 bool NonNegZExt = false) {
14534 return {};
14535
14536 // If this is zext nneg, see if it would make sense to treat it as a sext.
14537 if (NonNegZExt) {
14538 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
14539 "Unexpected load type or opcode");
14540 for (SDNode *User : N0->users()) {
14541 if (User->getOpcode() == ISD::SETCC) {
14543 if (ISD::isSignedIntSetCC(CC)) {
14544 ExtLoadType = ISD::SEXTLOAD;
14545 ExtOpc = ISD::SIGN_EXTEND;
14546 break;
14547 }
14548 }
14549 }
14550 }
14551
14552 // TODO: isFixedLengthVector() should be removed and any negative effects on
14553 // code generation being the result of that target's implementation of
14554 // isVectorLoadExtDesirable().
14555 if ((LegalOperations || VT.isFixedLengthVector() ||
14556 !cast<LoadSDNode>(N0)->isSimple()) &&
14557 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
14558 return {};
14559
14560 bool DoXform = true;
14562 if (!N0.hasOneUse())
14563 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
14564 if (VT.isVector())
14565 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
14566 if (!DoXform)
14567 return {};
14568
14569 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14570 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14571 LN0->getBasePtr(), N0.getValueType(),
14572 LN0->getMemOperand());
14573 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
14574 // If the load value is used only by N, replace it via CombineTo N.
14575 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
14576 Combiner.CombineTo(N, ExtLoad);
14577 if (NoReplaceTrunc) {
14578 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14579 Combiner.recursivelyDeleteUnusedNodes(LN0);
14580 } else {
14581 SDValue Trunc =
14582 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14583 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14584 }
14585 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14586}
14587
14588static SDValue
14590 bool LegalOperations, SDNode *N, SDValue N0,
14591 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
14592 if (!N0.hasOneUse())
14593 return SDValue();
14594
14596 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
14597 return SDValue();
14598
14599 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
14600 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
14601 return SDValue();
14602
14603 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
14604 return SDValue();
14605
14606 SDLoc dl(Ld);
14607 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
14608 SDValue NewLoad = DAG.getMaskedLoad(
14609 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
14610 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
14611 ExtLoadType, Ld->isExpandingLoad());
14612 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
14613 return NewLoad;
14614}
14615
14616// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
14618 const TargetLowering &TLI, EVT VT,
14619 SDValue N0,
14620 ISD::LoadExtType ExtLoadType) {
14621 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
14622 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
14623 return {};
14624 EVT MemoryVT = ALoad->getMemoryVT();
14625 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
14626 return {};
14627 // Can't fold into ALoad if it is already extending differently.
14628 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
14629 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
14630 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
14631 return {};
14632
14633 EVT OrigVT = ALoad->getValueType(0);
14634 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
14635 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomicLoad(
14636 ExtLoadType, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
14637 ALoad->getBasePtr(), ALoad->getMemOperand()));
14639 SDValue(ALoad, 0),
14640 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
14641 // Update the chain uses.
14642 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
14643 return SDValue(NewALoad, 0);
14644}
14645
14647 bool LegalOperations) {
14648 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14649 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
14650
14651 SDValue SetCC = N->getOperand(0);
14652 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
14653 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
14654 return SDValue();
14655
14656 SDValue X = SetCC.getOperand(0);
14657 SDValue Ones = SetCC.getOperand(1);
14658 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
14659 EVT VT = N->getValueType(0);
14660 EVT XVT = X.getValueType();
14661 // setge X, C is canonicalized to setgt, so we do not need to match that
14662 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
14663 // not require the 'not' op.
14664 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
14665 // Invert and smear/shift the sign bit:
14666 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
14667 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
14668 SDLoc DL(N);
14669 unsigned ShCt = VT.getSizeInBits() - 1;
14670 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14671 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
14672 SDValue NotX = DAG.getNOT(DL, X, VT);
14673 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
14674 auto ShiftOpcode =
14675 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
14676 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
14677 }
14678 }
14679 return SDValue();
14680}
14681
14682SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
14683 SDValue N0 = N->getOperand(0);
14684 if (N0.getOpcode() != ISD::SETCC)
14685 return SDValue();
14686
14687 SDValue N00 = N0.getOperand(0);
14688 SDValue N01 = N0.getOperand(1);
14690 EVT VT = N->getValueType(0);
14691 EVT N00VT = N00.getValueType();
14692 SDLoc DL(N);
14693
14694 // Propagate fast-math-flags.
14695 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14696
14697 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
14698 // the same size as the compared operands. Try to optimize sext(setcc())
14699 // if this is the case.
14700 if (VT.isVector() && !LegalOperations &&
14701 TLI.getBooleanContents(N00VT) ==
14703 EVT SVT = getSetCCResultType(N00VT);
14704
14705 // If we already have the desired type, don't change it.
14706 if (SVT != N0.getValueType()) {
14707 // We know that the # elements of the results is the same as the
14708 // # elements of the compare (and the # elements of the compare result
14709 // for that matter). Check to see that they are the same size. If so,
14710 // we know that the element size of the sext'd result matches the
14711 // element size of the compare operands.
14712 if (VT.getSizeInBits() == SVT.getSizeInBits())
14713 return DAG.getSetCC(DL, VT, N00, N01, CC);
14714
14715 // If the desired elements are smaller or larger than the source
14716 // elements, we can use a matching integer vector type and then
14717 // truncate/sign extend.
14718 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
14719 if (SVT == MatchingVecType) {
14720 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
14721 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
14722 }
14723 }
14724
14725 // Try to eliminate the sext of a setcc by zexting the compare operands.
14726 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
14728 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
14729 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14730 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14731
14732 // We have an unsupported narrow vector compare op that would be legal
14733 // if extended to the destination type. See if the compare operands
14734 // can be freely extended to the destination type.
14735 auto IsFreeToExtend = [&](SDValue V) {
14736 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
14737 return true;
14738 // Match a simple, non-extended load that can be converted to a
14739 // legal {z/s}ext-load.
14740 // TODO: Allow widening of an existing {z/s}ext-load?
14741 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
14742 ISD::isUNINDEXEDLoad(V.getNode()) &&
14743 cast<LoadSDNode>(V)->isSimple() &&
14744 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
14745 return false;
14746
14747 // Non-chain users of this value must either be the setcc in this
14748 // sequence or extends that can be folded into the new {z/s}ext-load.
14749 for (SDUse &Use : V->uses()) {
14750 // Skip uses of the chain and the setcc.
14751 SDNode *User = Use.getUser();
14752 if (Use.getResNo() != 0 || User == N0.getNode())
14753 continue;
14754 // Extra users must have exactly the same cast we are about to create.
14755 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
14756 // is enhanced similarly.
14757 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
14758 return false;
14759 }
14760 return true;
14761 };
14762
14763 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
14764 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
14765 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
14766 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
14767 }
14768 }
14769 }
14770
14771 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
14772 // Here, T can be 1 or -1, depending on the type of the setcc and
14773 // getBooleanContents().
14774 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
14775
14776 // To determine the "true" side of the select, we need to know the high bit
14777 // of the value returned by the setcc if it evaluates to true.
14778 // If the type of the setcc is i1, then the true case of the select is just
14779 // sext(i1 1), that is, -1.
14780 // If the type of the setcc is larger (say, i8) then the value of the high
14781 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
14782 // of the appropriate width.
14783 SDValue ExtTrueVal = (SetCCWidth == 1)
14784 ? DAG.getAllOnesConstant(DL, VT)
14785 : DAG.getBoolConstant(true, DL, VT, N00VT);
14786 SDValue Zero = DAG.getConstant(0, DL, VT);
14787 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
14788 return SCC;
14789
14790 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
14791 EVT SetCCVT = getSetCCResultType(N00VT);
14792 // Don't do this transform for i1 because there's a select transform
14793 // that would reverse it.
14794 // TODO: We should not do this transform at all without a target hook
14795 // because a sext is likely cheaper than a select?
14796 if (SetCCVT.getScalarSizeInBits() != 1 &&
14797 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
14798 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
14799 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
14800 }
14801 }
14802
14803 return SDValue();
14804}
14805
14806SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
14807 SDValue N0 = N->getOperand(0);
14808 EVT VT = N->getValueType(0);
14809 SDLoc DL(N);
14810
14811 if (VT.isVector())
14812 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14813 return FoldedVOp;
14814
14815 // sext(undef) = 0 because the top bit will all be the same.
14816 if (N0.isUndef())
14817 return DAG.getConstant(0, DL, VT);
14818
14819 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14820 return Res;
14821
14822 // fold (sext (sext x)) -> (sext x)
14823 // fold (sext (aext x)) -> (sext x)
14824 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
14825 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
14826
14827 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14828 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14831 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
14832 N0.getOperand(0));
14833
14834 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
14835 SDValue N00 = N0.getOperand(0);
14836 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
14837 if (N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) {
14838 // fold (sext (sext_inreg x)) -> (sext (trunc x))
14839 if ((!LegalTypes || TLI.isTypeLegal(ExtVT))) {
14840 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
14841 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
14842 }
14843
14844 // If the trunc wasn't legal, try to fold to (sext_inreg (anyext x))
14845 if (!LegalTypes || TLI.isTypeLegal(VT)) {
14846 SDValue ExtSrc = DAG.getAnyExtOrTrunc(N00, DL, VT);
14847 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, ExtSrc,
14848 N0->getOperand(1));
14849 }
14850 }
14851 }
14852
14853 if (N0.getOpcode() == ISD::TRUNCATE) {
14854 // fold (sext (truncate (load x))) -> (sext (smaller load x))
14855 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
14856 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14857 SDNode *oye = N0.getOperand(0).getNode();
14858 if (NarrowLoad.getNode() != N0.getNode()) {
14859 CombineTo(N0.getNode(), NarrowLoad);
14860 // CombineTo deleted the truncate, if needed, but not what's under it.
14861 AddToWorklist(oye);
14862 }
14863 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14864 }
14865
14866 // See if the value being truncated is already sign extended. If so, just
14867 // eliminate the trunc/sext pair.
14868 SDValue Op = N0.getOperand(0);
14869 unsigned OpBits = Op.getScalarValueSizeInBits();
14870 unsigned MidBits = N0.getScalarValueSizeInBits();
14871 unsigned DestBits = VT.getScalarSizeInBits();
14872
14873 if (N0->getFlags().hasNoSignedWrap() ||
14874 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14875 if (OpBits == DestBits) {
14876 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14877 // bits, it is already ready.
14878 return Op;
14879 }
14880
14881 if (OpBits < DestBits) {
14882 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14883 // bits, just sext from i32.
14884 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14885 }
14886
14887 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14888 // bits, just truncate to i32.
14889 SDNodeFlags Flags;
14890 Flags.setNoSignedWrap(true);
14891 Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
14892 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14893 }
14894
14895 // fold (sext (truncate x)) -> (sextinreg x).
14896 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
14897 N0.getValueType())) {
14898 if (OpBits < DestBits)
14899 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
14900 else if (OpBits > DestBits)
14901 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
14902 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
14903 DAG.getValueType(N0.getValueType()));
14904 }
14905 }
14906
14907 // Try to simplify (sext (load x)).
14908 if (SDValue foldedExt =
14909 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14911 return foldedExt;
14912
14913 if (SDValue foldedExt =
14914 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14916 return foldedExt;
14917
14918 // fold (sext (load x)) to multiple smaller sextloads.
14919 // Only on illegal but splittable vectors.
14920 if (SDValue ExtLoad = CombineExtLoad(N))
14921 return ExtLoad;
14922
14923 // Try to simplify (sext (sextload x)).
14924 if (SDValue foldedExt = tryToFoldExtOfExtload(
14925 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
14926 return foldedExt;
14927
14928 // Try to simplify (sext (atomic_load x)).
14929 if (SDValue foldedExt =
14930 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
14931 return foldedExt;
14932
14933 // fold (sext (and/or/xor (load x), cst)) ->
14934 // (and/or/xor (sextload x), (sext cst))
14935 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
14936 isa<LoadSDNode>(N0.getOperand(0)) &&
14937 N0.getOperand(1).getOpcode() == ISD::Constant &&
14938 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14939 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14940 EVT MemVT = LN00->getMemoryVT();
14941 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
14942 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
14944 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14945 ISD::SIGN_EXTEND, SetCCs, TLI);
14946 if (DoXform) {
14947 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
14948 LN00->getChain(), LN00->getBasePtr(),
14949 LN00->getMemoryVT(),
14950 LN00->getMemOperand());
14951 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
14952 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14953 ExtLoad, DAG.getConstant(Mask, DL, VT));
14954 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
14955 bool NoReplaceTruncAnd = !N0.hasOneUse();
14956 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14957 CombineTo(N, And);
14958 // If N0 has multiple uses, change other uses as well.
14959 if (NoReplaceTruncAnd) {
14960 SDValue TruncAnd =
14962 CombineTo(N0.getNode(), TruncAnd);
14963 }
14964 if (NoReplaceTrunc) {
14965 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14966 } else {
14967 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14968 LN00->getValueType(0), ExtLoad);
14969 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14970 }
14971 return SDValue(N,0); // Return N so it doesn't get rechecked!
14972 }
14973 }
14974 }
14975
14976 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14977 return V;
14978
14979 if (SDValue V = foldSextSetcc(N))
14980 return V;
14981
14982 // fold (sext x) -> (zext x) if the sign bit is known zero.
14983 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
14984 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
14985 DAG.SignBitIsZero(N0))
14986 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, SDNodeFlags::NonNeg);
14987
14988 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14989 return NewVSel;
14990
14991 // Eliminate this sign extend by doing a negation in the destination type:
14992 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
14993 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
14997 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
14998 return DAG.getNegative(Zext, DL, VT);
14999 }
15000 // Eliminate this sign extend by doing a decrement in the destination type:
15001 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
15002 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
15006 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
15007 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
15008 }
15009
15010 // fold sext (not i1 X) -> add (zext i1 X), -1
15011 // TODO: This could be extended to handle bool vectors.
15012 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
15013 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
15014 TLI.isOperationLegal(ISD::ADD, VT)))) {
15015 // If we can eliminate the 'not', the sext form should be better
15016 if (SDValue NewXor = visitXOR(N0.getNode())) {
15017 // Returning N0 is a form of in-visit replacement that may have
15018 // invalidated N0.
15019 if (NewXor.getNode() == N0.getNode()) {
15020 // Return SDValue here as the xor should have already been replaced in
15021 // this sext.
15022 return SDValue();
15023 }
15024
15025 // Return a new sext with the new xor.
15026 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
15027 }
15028
15029 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
15030 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
15031 }
15032
15033 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15034 return Res;
15035
15036 return SDValue();
15037}
15038
15039/// Given an extending node with a pop-count operand, if the target does not
15040/// support a pop-count in the narrow source type but does support it in the
15041/// destination type, widen the pop-count to the destination type.
15042static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
15043 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
15044 Extend->getOpcode() == ISD::ANY_EXTEND) &&
15045 "Expected extend op");
15046
15047 SDValue CtPop = Extend->getOperand(0);
15048 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
15049 return SDValue();
15050
15051 EVT VT = Extend->getValueType(0);
15052 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15055 return SDValue();
15056
15057 // zext (ctpop X) --> ctpop (zext X)
15058 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
15059 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
15060}
15061
15062// If we have (zext (abs X)) where X is a type that will be promoted by type
15063// legalization, convert to (abs (sext X)). But don't extend past a legal type.
15064static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
15065 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
15066
15067 EVT VT = Extend->getValueType(0);
15068 if (VT.isVector())
15069 return SDValue();
15070
15071 SDValue Abs = Extend->getOperand(0);
15072 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
15073 return SDValue();
15074
15075 EVT AbsVT = Abs.getValueType();
15076 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15077 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
15079 return SDValue();
15080
15081 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
15082
15083 SDValue SExt =
15084 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
15085 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
15086 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
15087}
15088
15089SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
15090 SDValue N0 = N->getOperand(0);
15091 EVT VT = N->getValueType(0);
15092 SDLoc DL(N);
15093
15094 if (VT.isVector())
15095 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
15096 return FoldedVOp;
15097
15098 // zext(undef) = 0
15099 if (N0.isUndef())
15100 return DAG.getConstant(0, DL, VT);
15101
15102 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15103 return Res;
15104
15105 // fold (zext (zext x)) -> (zext x)
15106 // fold (zext (aext x)) -> (zext x)
15107 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
15108 SDNodeFlags Flags;
15109 if (N0.getOpcode() == ISD::ZERO_EXTEND)
15110 Flags.setNonNeg(N0->getFlags().hasNonNeg());
15111 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
15112 }
15113
15114 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15115 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15118 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
15119
15120 // fold (zext (truncate x)) -> (zext x) or
15121 // (zext (truncate x)) -> (truncate x)
15122 // This is valid when the truncated bits of x are already zero.
15123 SDValue Op;
15124 KnownBits Known;
15125 if (isTruncateOf(DAG, N0, Op, Known)) {
15126 APInt TruncatedBits =
15127 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
15128 APInt(Op.getScalarValueSizeInBits(), 0) :
15129 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
15130 N0.getScalarValueSizeInBits(),
15131 std::min(Op.getScalarValueSizeInBits(),
15132 VT.getScalarSizeInBits()));
15133 if (TruncatedBits.isSubsetOf(Known.Zero)) {
15134 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
15135 DAG.salvageDebugInfo(*N0.getNode());
15136
15137 return ZExtOrTrunc;
15138 }
15139 }
15140
15141 // fold (zext (truncate x)) -> (and x, mask)
15142 if (N0.getOpcode() == ISD::TRUNCATE) {
15143 // fold (zext (truncate (load x))) -> (zext (smaller load x))
15144 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
15145 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
15146 SDNode *oye = N0.getOperand(0).getNode();
15147 if (NarrowLoad.getNode() != N0.getNode()) {
15148 CombineTo(N0.getNode(), NarrowLoad);
15149 // CombineTo deleted the truncate, if needed, but not what's under it.
15150 AddToWorklist(oye);
15151 }
15152 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15153 }
15154
15155 EVT SrcVT = N0.getOperand(0).getValueType();
15156 EVT MinVT = N0.getValueType();
15157
15158 if (N->getFlags().hasNonNeg()) {
15159 SDValue Op = N0.getOperand(0);
15160 unsigned OpBits = SrcVT.getScalarSizeInBits();
15161 unsigned MidBits = MinVT.getScalarSizeInBits();
15162 unsigned DestBits = VT.getScalarSizeInBits();
15163
15164 if (N0->getFlags().hasNoSignedWrap() ||
15165 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
15166 if (OpBits == DestBits) {
15167 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
15168 // bits, it is already ready.
15169 return Op;
15170 }
15171
15172 if (OpBits < DestBits) {
15173 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
15174 // bits, just sext from i32.
15175 // FIXME: This can probably be ZERO_EXTEND nneg?
15176 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
15177 }
15178
15179 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
15180 // bits, just truncate to i32.
15181 SDNodeFlags Flags;
15182 Flags.setNoSignedWrap(true);
15183 Flags.setNoUnsignedWrap(true);
15184 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
15185 }
15186 }
15187
15188 // Try to mask before the extension to avoid having to generate a larger mask,
15189 // possibly over several sub-vectors.
15190 if (SrcVT.bitsLT(VT) && VT.isVector()) {
15191 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
15193 SDValue Op = N0.getOperand(0);
15194 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
15195 AddToWorklist(Op.getNode());
15196 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
15197 // Transfer the debug info; the new node is equivalent to N0.
15198 DAG.transferDbgValues(N0, ZExtOrTrunc);
15199 return ZExtOrTrunc;
15200 }
15201 }
15202
15203 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
15204 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
15205 AddToWorklist(Op.getNode());
15206 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
15207 // We may safely transfer the debug info describing the truncate node over
15208 // to the equivalent and operation.
15209 DAG.transferDbgValues(N0, And);
15210 return And;
15211 }
15212 }
15213
15214 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
15215 // if either of the casts is not free.
15216 if (N0.getOpcode() == ISD::AND &&
15217 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
15218 N0.getOperand(1).getOpcode() == ISD::Constant &&
15219 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
15220 !TLI.isZExtFree(N0.getValueType(), VT))) {
15221 SDValue X = N0.getOperand(0).getOperand(0);
15222 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
15223 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
15224 return DAG.getNode(ISD::AND, DL, VT,
15225 X, DAG.getConstant(Mask, DL, VT));
15226 }
15227
15228 // Try to simplify (zext (load x)).
15229 if (SDValue foldedExt = tryToFoldExtOfLoad(
15230 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
15231 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
15232 return foldedExt;
15233
15234 if (SDValue foldedExt =
15235 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
15237 return foldedExt;
15238
15239 // fold (zext (load x)) to multiple smaller zextloads.
15240 // Only on illegal but splittable vectors.
15241 if (SDValue ExtLoad = CombineExtLoad(N))
15242 return ExtLoad;
15243
15244 // Try to simplify (zext (atomic_load x)).
15245 if (SDValue foldedExt =
15246 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
15247 return foldedExt;
15248
15249 // fold (zext (and/or/xor (load x), cst)) ->
15250 // (and/or/xor (zextload x), (zext cst))
15251 // Unless (and (load x) cst) will match as a zextload already and has
15252 // additional users, or the zext is already free.
15253 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
15254 isa<LoadSDNode>(N0.getOperand(0)) &&
15255 N0.getOperand(1).getOpcode() == ISD::Constant &&
15256 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
15257 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
15258 EVT MemVT = LN00->getMemoryVT();
15259 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
15260 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
15261 bool DoXform = true;
15263 if (!N0.hasOneUse()) {
15264 if (N0.getOpcode() == ISD::AND) {
15265 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
15266 EVT LoadResultTy = AndC->getValueType(0);
15267 EVT ExtVT;
15268 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
15269 DoXform = false;
15270 }
15271 }
15272 if (DoXform)
15273 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
15274 ISD::ZERO_EXTEND, SetCCs, TLI);
15275 if (DoXform) {
15276 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
15277 LN00->getChain(), LN00->getBasePtr(),
15278 LN00->getMemoryVT(),
15279 LN00->getMemOperand());
15280 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
15281 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
15282 ExtLoad, DAG.getConstant(Mask, DL, VT));
15283 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
15284 bool NoReplaceTruncAnd = !N0.hasOneUse();
15285 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
15286 CombineTo(N, And);
15287 // If N0 has multiple uses, change other uses as well.
15288 if (NoReplaceTruncAnd) {
15289 SDValue TruncAnd =
15291 CombineTo(N0.getNode(), TruncAnd);
15292 }
15293 if (NoReplaceTrunc) {
15294 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
15295 } else {
15296 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
15297 LN00->getValueType(0), ExtLoad);
15298 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
15299 }
15300 return SDValue(N,0); // Return N so it doesn't get rechecked!
15301 }
15302 }
15303 }
15304
15305 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
15306 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
15307 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
15308 return ZExtLoad;
15309
15310 // Try to simplify (zext (zextload x)).
15311 if (SDValue foldedExt = tryToFoldExtOfExtload(
15312 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
15313 return foldedExt;
15314
15315 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
15316 return V;
15317
15318 if (N0.getOpcode() == ISD::SETCC) {
15319 // Propagate fast-math-flags.
15320 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15321
15322 // Only do this before legalize for now.
15323 if (!LegalOperations && VT.isVector() &&
15324 N0.getValueType().getVectorElementType() == MVT::i1) {
15325 EVT N00VT = N0.getOperand(0).getValueType();
15326 if (getSetCCResultType(N00VT) == N0.getValueType())
15327 return SDValue();
15328
15329 // We know that the # elements of the results is the same as the #
15330 // elements of the compare (and the # elements of the compare result for
15331 // that matter). Check to see that they are the same size. If so, we know
15332 // that the element size of the sext'd result matches the element size of
15333 // the compare operands.
15334 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
15335 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
15336 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
15337 N0.getOperand(1), N0.getOperand(2));
15338 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
15339 }
15340
15341 // If the desired elements are smaller or larger than the source
15342 // elements we can use a matching integer vector type and then
15343 // truncate/any extend followed by zext_in_reg.
15344 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15345 SDValue VsetCC =
15346 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
15347 N0.getOperand(1), N0.getOperand(2));
15348 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
15349 N0.getValueType());
15350 }
15351
15352 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
15353 EVT N0VT = N0.getValueType();
15354 EVT N00VT = N0.getOperand(0).getValueType();
15355 if (SDValue SCC = SimplifySelectCC(
15356 DL, N0.getOperand(0), N0.getOperand(1),
15357 DAG.getBoolConstant(true, DL, N0VT, N00VT),
15358 DAG.getBoolConstant(false, DL, N0VT, N00VT),
15359 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15360 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
15361 }
15362
15363 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
15364 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
15365 !TLI.isZExtFree(N0, VT)) {
15366 SDValue ShVal = N0.getOperand(0);
15367 SDValue ShAmt = N0.getOperand(1);
15368 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
15369 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
15370 if (N0.getOpcode() == ISD::SHL) {
15371 // If the original shl may be shifting out bits, do not perform this
15372 // transformation.
15373 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
15374 ShVal.getOperand(0).getValueSizeInBits();
15375 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
15376 // If the shift is too large, then see if we can deduce that the
15377 // shift is safe anyway.
15378
15379 // Check if the bits being shifted out are known to be zero.
15380 KnownBits KnownShVal = DAG.computeKnownBits(ShVal);
15381 if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
15382 return SDValue();
15383 }
15384 }
15385
15386 // Ensure that the shift amount is wide enough for the shifted value.
15387 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
15388 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
15389
15390 return DAG.getNode(N0.getOpcode(), DL, VT,
15391 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
15392 }
15393 }
15394 }
15395
15396 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15397 return NewVSel;
15398
15399 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15400 return NewCtPop;
15401
15402 if (SDValue V = widenAbs(N, DAG))
15403 return V;
15404
15405 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15406 return Res;
15407
15408 // CSE zext nneg with sext if the zext is not free.
15409 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
15410 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
15411 if (CSENode)
15412 return SDValue(CSENode, 0);
15413 }
15414
15415 return SDValue();
15416}
15417
15418SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
15419 SDValue N0 = N->getOperand(0);
15420 EVT VT = N->getValueType(0);
15421 SDLoc DL(N);
15422
15423 // aext(undef) = undef
15424 if (N0.isUndef())
15425 return DAG.getUNDEF(VT);
15426
15427 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15428 return Res;
15429
15430 // fold (aext (aext x)) -> (aext x)
15431 // fold (aext (zext x)) -> (zext x)
15432 // fold (aext (sext x)) -> (sext x)
15433 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
15434 N0.getOpcode() == ISD::SIGN_EXTEND) {
15435 SDNodeFlags Flags;
15436 if (N0.getOpcode() == ISD::ZERO_EXTEND)
15437 Flags.setNonNeg(N0->getFlags().hasNonNeg());
15438 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
15439 }
15440
15441 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
15442 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15443 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
15447 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
15448
15449 // fold (aext (truncate (load x))) -> (aext (smaller load x))
15450 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
15451 if (N0.getOpcode() == ISD::TRUNCATE) {
15452 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
15453 SDNode *oye = N0.getOperand(0).getNode();
15454 if (NarrowLoad.getNode() != N0.getNode()) {
15455 CombineTo(N0.getNode(), NarrowLoad);
15456 // CombineTo deleted the truncate, if needed, but not what's under it.
15457 AddToWorklist(oye);
15458 }
15459 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15460 }
15461 }
15462
15463 // fold (aext (truncate x))
15464 if (N0.getOpcode() == ISD::TRUNCATE)
15465 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
15466
15467 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
15468 // if the trunc is not free.
15469 if (N0.getOpcode() == ISD::AND &&
15470 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
15471 N0.getOperand(1).getOpcode() == ISD::Constant &&
15472 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
15473 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
15474 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
15475 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
15476 return DAG.getNode(ISD::AND, DL, VT, X, Y);
15477 }
15478
15479 // fold (aext (load x)) -> (aext (truncate (extload x)))
15480 // None of the supported targets knows how to perform load and any_ext
15481 // on vectors in one instruction, so attempt to fold to zext instead.
15482 if (VT.isVector()) {
15483 // Try to simplify (zext (load x)).
15484 if (SDValue foldedExt =
15485 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
15487 return foldedExt;
15488 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
15491 bool DoXform = true;
15493 if (!N0.hasOneUse())
15494 DoXform =
15495 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
15496 if (DoXform) {
15497 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15498 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
15499 LN0->getBasePtr(), N0.getValueType(),
15500 LN0->getMemOperand());
15501 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
15502 // If the load value is used only by N, replace it via CombineTo N.
15503 bool NoReplaceTrunc = N0.hasOneUse();
15504 CombineTo(N, ExtLoad);
15505 if (NoReplaceTrunc) {
15506 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15507 recursivelyDeleteUnusedNodes(LN0);
15508 } else {
15509 SDValue Trunc =
15510 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
15511 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
15512 }
15513 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15514 }
15515 }
15516
15517 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
15518 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
15519 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
15520 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
15521 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
15522 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15523 ISD::LoadExtType ExtType = LN0->getExtensionType();
15524 EVT MemVT = LN0->getMemoryVT();
15525 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
15526 SDValue ExtLoad =
15527 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
15528 MemVT, LN0->getMemOperand());
15529 CombineTo(N, ExtLoad);
15530 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15531 recursivelyDeleteUnusedNodes(LN0);
15532 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15533 }
15534 }
15535
15536 if (N0.getOpcode() == ISD::SETCC) {
15537 // Propagate fast-math-flags.
15538 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15539
15540 // For vectors:
15541 // aext(setcc) -> vsetcc
15542 // aext(setcc) -> truncate(vsetcc)
15543 // aext(setcc) -> aext(vsetcc)
15544 // Only do this before legalize for now.
15545 if (VT.isVector() && !LegalOperations) {
15546 EVT N00VT = N0.getOperand(0).getValueType();
15547 if (getSetCCResultType(N00VT) == N0.getValueType())
15548 return SDValue();
15549
15550 // We know that the # elements of the results is the same as the
15551 // # elements of the compare (and the # elements of the compare result
15552 // for that matter). Check to see that they are the same size. If so,
15553 // we know that the element size of the sext'd result matches the
15554 // element size of the compare operands.
15555 if (VT.getSizeInBits() == N00VT.getSizeInBits())
15556 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
15557 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15558
15559 // If the desired elements are smaller or larger than the source
15560 // elements we can use a matching integer vector type and then
15561 // truncate/any extend
15562 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15563 SDValue VsetCC = DAG.getSetCC(
15564 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
15565 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15566 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
15567 }
15568
15569 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
15570 if (SDValue SCC = SimplifySelectCC(
15571 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
15572 DAG.getConstant(0, DL, VT),
15573 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15574 return SCC;
15575 }
15576
15577 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15578 return NewCtPop;
15579
15580 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15581 return Res;
15582
15583 return SDValue();
15584}
15585
15586SDValue DAGCombiner::visitAssertExt(SDNode *N) {
15587 unsigned Opcode = N->getOpcode();
15588 SDValue N0 = N->getOperand(0);
15589 SDValue N1 = N->getOperand(1);
15590 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
15591
15592 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
15593 if (N0.getOpcode() == Opcode &&
15594 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
15595 return N0;
15596
15597 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15598 N0.getOperand(0).getOpcode() == Opcode) {
15599 // We have an assert, truncate, assert sandwich. Make one stronger assert
15600 // by asserting on the smallest asserted type to the larger source type.
15601 // This eliminates the later assert:
15602 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
15603 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
15604 SDLoc DL(N);
15605 SDValue BigA = N0.getOperand(0);
15606 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15607 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
15608 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
15609 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15610 BigA.getOperand(0), MinAssertVTVal);
15611 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15612 }
15613
15614 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
15615 // than X. Just move the AssertZext in front of the truncate and drop the
15616 // AssertSExt.
15617 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15619 Opcode == ISD::AssertZext) {
15620 SDValue BigA = N0.getOperand(0);
15621 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15622 if (AssertVT.bitsLT(BigA_AssertVT)) {
15623 SDLoc DL(N);
15624 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15625 BigA.getOperand(0), N1);
15626 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15627 }
15628 }
15629
15630 if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
15632 const APInt &Mask = N0.getConstantOperandAPInt(1);
15633
15634 // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
15635 // than X, and the And doesn't change the lower iX bits, we can move the
15636 // AssertZext in front of the And and drop the AssertSext.
15637 if (N0.getOperand(0).getOpcode() == ISD::AssertSext && N0.hasOneUse()) {
15638 SDValue BigA = N0.getOperand(0);
15639 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15640 if (AssertVT.bitsLT(BigA_AssertVT) &&
15641 Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
15642 SDLoc DL(N);
15643 SDValue NewAssert =
15644 DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
15645 return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
15646 N0.getOperand(1));
15647 }
15648 }
15649
15650 // Remove AssertZext entirely if the mask guarantees the assertion cannot
15651 // fail.
15652 // TODO: Use KB countMinLeadingZeros to handle non-constant masks?
15653 if (Mask.isIntN(AssertVT.getScalarSizeInBits()))
15654 return N0;
15655 }
15656
15657 return SDValue();
15658}
15659
15660SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
15661 SDLoc DL(N);
15662
15663 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
15664 SDValue N0 = N->getOperand(0);
15665
15666 // Fold (assertalign (assertalign x, AL0), AL1) ->
15667 // (assertalign x, max(AL0, AL1))
15668 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
15669 return DAG.getAssertAlign(DL, N0.getOperand(0),
15670 std::max(AL, AAN->getAlign()));
15671
15672 // In rare cases, there are trivial arithmetic ops in source operands. Sink
15673 // this assert down to source operands so that those arithmetic ops could be
15674 // exposed to the DAG combining.
15675 switch (N0.getOpcode()) {
15676 default:
15677 break;
15678 case ISD::ADD:
15679 case ISD::PTRADD:
15680 case ISD::SUB: {
15681 unsigned AlignShift = Log2(AL);
15682 SDValue LHS = N0.getOperand(0);
15683 SDValue RHS = N0.getOperand(1);
15684 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
15685 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
15686 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
15687 if (LHSAlignShift < AlignShift)
15688 LHS = DAG.getAssertAlign(DL, LHS, AL);
15689 if (RHSAlignShift < AlignShift)
15690 RHS = DAG.getAssertAlign(DL, RHS, AL);
15691 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
15692 }
15693 break;
15694 }
15695 }
15696
15697 return SDValue();
15698}
15699
15700/// If the result of a load is shifted/masked/truncated to an effectively
15701/// narrower type, try to transform the load to a narrower type and/or
15702/// use an extending load.
15703SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
15704 unsigned Opc = N->getOpcode();
15705
15707 SDValue N0 = N->getOperand(0);
15708 EVT VT = N->getValueType(0);
15709 EVT ExtVT = VT;
15710
15711 // This transformation isn't valid for vector loads.
15712 if (VT.isVector())
15713 return SDValue();
15714
15715 // The ShAmt variable is used to indicate that we've consumed a right
15716 // shift. I.e. we want to narrow the width of the load by skipping to load the
15717 // ShAmt least significant bits.
15718 unsigned ShAmt = 0;
15719 // A special case is when the least significant bits from the load are masked
15720 // away, but using an AND rather than a right shift. HasShiftedOffset is used
15721 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
15722 // the result.
15723 unsigned ShiftedOffset = 0;
15724 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
15725 // extended to VT.
15726 if (Opc == ISD::SIGN_EXTEND_INREG) {
15727 ExtType = ISD::SEXTLOAD;
15728 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15729 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
15730 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
15731 // value, or it may be shifting a higher subword, half or byte into the
15732 // lowest bits.
15733
15734 // Only handle shift with constant shift amount, and the shiftee must be a
15735 // load.
15736 auto *LN = dyn_cast<LoadSDNode>(N0);
15737 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15738 if (!N1C || !LN)
15739 return SDValue();
15740 // If the shift amount is larger than the memory type then we're not
15741 // accessing any of the loaded bytes.
15742 ShAmt = N1C->getZExtValue();
15743 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
15744 if (MemoryWidth <= ShAmt)
15745 return SDValue();
15746 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
15747 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
15748 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15749 // If original load is a SEXTLOAD then we can't simply replace it by a
15750 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
15751 // followed by a ZEXT, but that is not handled at the moment). Similarly if
15752 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
15753 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
15754 LN->getExtensionType() == ISD::ZEXTLOAD) &&
15755 LN->getExtensionType() != ExtType)
15756 return SDValue();
15757 } else if (Opc == ISD::AND) {
15758 // An AND with a constant mask is the same as a truncate + zero-extend.
15759 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
15760 if (!AndC)
15761 return SDValue();
15762
15763 const APInt &Mask = AndC->getAPIntValue();
15764 unsigned ActiveBits = 0;
15765 if (Mask.isMask()) {
15766 ActiveBits = Mask.countr_one();
15767 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
15768 ShiftedOffset = ShAmt;
15769 } else {
15770 return SDValue();
15771 }
15772
15773 ExtType = ISD::ZEXTLOAD;
15774 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15775 }
15776
15777 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
15778 // a right shift. Here we redo some of those checks, to possibly adjust the
15779 // ExtVT even further based on "a masking AND". We could also end up here for
15780 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
15781 // need to be done here as well.
15782 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
15783 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
15784 // Bail out when the SRL has more than one use. This is done for historical
15785 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
15786 // check below? And maybe it could be non-profitable to do the transform in
15787 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
15788 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
15789 if (!SRL.hasOneUse())
15790 return SDValue();
15791
15792 // Only handle shift with constant shift amount, and the shiftee must be a
15793 // load.
15794 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
15795 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
15796 if (!SRL1C || !LN)
15797 return SDValue();
15798
15799 // If the shift amount is larger than the input type then we're not
15800 // accessing any of the loaded bytes. If the load was a zextload/extload
15801 // then the result of the shift+trunc is zero/undef (handled elsewhere).
15802 ShAmt = SRL1C->getZExtValue();
15803 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
15804 if (ShAmt >= MemoryWidth)
15805 return SDValue();
15806
15807 // Because a SRL must be assumed to *need* to zero-extend the high bits
15808 // (as opposed to anyext the high bits), we can't combine the zextload
15809 // lowering of SRL and an sextload.
15810 if (LN->getExtensionType() == ISD::SEXTLOAD)
15811 return SDValue();
15812
15813 // Avoid reading outside the memory accessed by the original load (could
15814 // happened if we only adjust the load base pointer by ShAmt). Instead we
15815 // try to narrow the load even further. The typical scenario here is:
15816 // (i64 (truncate (i96 (srl (load x), 64)))) ->
15817 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
15818 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
15819 // Don't replace sextload by zextload.
15820 if (ExtType == ISD::SEXTLOAD)
15821 return SDValue();
15822 // Narrow the load.
15823 ExtType = ISD::ZEXTLOAD;
15824 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15825 }
15826
15827 // If the SRL is only used by a masking AND, we may be able to adjust
15828 // the ExtVT to make the AND redundant.
15829 SDNode *Mask = *(SRL->user_begin());
15830 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
15831 isa<ConstantSDNode>(Mask->getOperand(1))) {
15832 unsigned Offset, ActiveBits;
15833 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
15834 if (ShiftMask.isMask()) {
15835 EVT MaskedVT =
15836 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
15837 // If the mask is smaller, recompute the type.
15838 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
15839 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
15840 ExtVT = MaskedVT;
15841 } else if (ExtType == ISD::ZEXTLOAD &&
15842 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
15843 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
15844 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15845 // If the mask is shifted we can use a narrower load and a shl to insert
15846 // the trailing zeros.
15847 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
15848 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
15849 ExtVT = MaskedVT;
15850 ShAmt = Offset + ShAmt;
15851 ShiftedOffset = Offset;
15852 }
15853 }
15854 }
15855
15856 N0 = SRL.getOperand(0);
15857 }
15858
15859 // If the load is shifted left (and the result isn't shifted back right), we
15860 // can fold a truncate through the shift. The typical scenario is that N
15861 // points at a TRUNCATE here so the attempted fold is:
15862 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
15863 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
15864 unsigned ShLeftAmt = 0;
15865 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15866 ExtVT == VT && TLI.isNarrowingProfitable(N, N0.getValueType(), VT)) {
15867 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
15868 ShLeftAmt = N01->getZExtValue();
15869 N0 = N0.getOperand(0);
15870 }
15871 }
15872
15873 // If we haven't found a load, we can't narrow it.
15874 if (!isa<LoadSDNode>(N0))
15875 return SDValue();
15876
15877 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15878 // Reducing the width of a volatile load is illegal. For atomics, we may be
15879 // able to reduce the width provided we never widen again. (see D66309)
15880 if (!LN0->isSimple() ||
15881 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
15882 return SDValue();
15883
15884 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
15885 unsigned LVTStoreBits =
15887 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
15888 return LVTStoreBits - EVTStoreBits - ShAmt;
15889 };
15890
15891 // We need to adjust the pointer to the load by ShAmt bits in order to load
15892 // the correct bytes.
15893 unsigned PtrAdjustmentInBits =
15894 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
15895
15896 uint64_t PtrOff = PtrAdjustmentInBits / 8;
15897 SDLoc DL(LN0);
15898 // The original load itself didn't wrap, so an offset within it doesn't.
15899 SDValue NewPtr =
15902 AddToWorklist(NewPtr.getNode());
15903
15904 SDValue Load;
15905 if (ExtType == ISD::NON_EXTLOAD) {
15906 const MDNode *OldRanges = LN0->getRanges();
15907 const MDNode *NewRanges = nullptr;
15908 // If LSBs are loaded and the truncated ConstantRange for the OldRanges
15909 // metadata is not the full-set for the new width then create a NewRanges
15910 // metadata for the truncated load
15911 if (ShAmt == 0 && OldRanges) {
15912 ConstantRange CR = getConstantRangeFromMetadata(*OldRanges);
15913 unsigned BitSize = VT.getScalarSizeInBits();
15914
15915 // It is possible for an 8-bit extending load with 8-bit range
15916 // metadata to be narrowed to an 8-bit load. This guard is necessary to
15917 // ensure that truncation is strictly smaller.
15918 if (CR.getBitWidth() > BitSize) {
15919 ConstantRange TruncatedCR = CR.truncate(BitSize);
15920 if (!TruncatedCR.isFullSet()) {
15921 Metadata *Bounds[2] = {
15923 ConstantInt::get(*DAG.getContext(), TruncatedCR.getLower())),
15925 ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
15926 NewRanges = MDNode::get(*DAG.getContext(), Bounds);
15927 }
15928 } else if (CR.getBitWidth() == BitSize)
15929 NewRanges = OldRanges;
15930 }
15931 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
15932 LN0->getPointerInfo().getWithOffset(PtrOff),
15933 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15934 LN0->getAAInfo(), NewRanges);
15935 } else
15936 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
15937 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
15938 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15939 LN0->getAAInfo());
15940
15941 // Replace the old load's chain with the new load's chain.
15942 WorklistRemover DeadNodes(*this);
15943 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15944
15945 // Shift the result left, if we've swallowed a left shift.
15947 if (ShLeftAmt != 0) {
15948 // If the shift amount is as large as the result size (but, presumably,
15949 // no larger than the source) then the useful bits of the result are
15950 // zero; we can't simply return the shortened shift, because the result
15951 // of that operation is undefined.
15952 if (ShLeftAmt >= VT.getScalarSizeInBits())
15953 Result = DAG.getConstant(0, DL, VT);
15954 else
15955 Result = DAG.getNode(ISD::SHL, DL, VT, Result,
15956 DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
15957 }
15958
15959 if (ShiftedOffset != 0) {
15960 // We're using a shifted mask, so the load now has an offset. This means
15961 // that data has been loaded into the lower bytes than it would have been
15962 // before, so we need to shl the loaded data into the correct position in the
15963 // register.
15964 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
15965 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
15966 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
15967 }
15968
15969 // Return the new loaded value.
15970 return Result;
15971}
15972
15973SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
15974 SDValue N0 = N->getOperand(0);
15975 SDValue N1 = N->getOperand(1);
15976 EVT VT = N->getValueType(0);
15977 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
15978 unsigned VTBits = VT.getScalarSizeInBits();
15979 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
15980 SDLoc DL(N);
15981
15982 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
15983 if (N0.isUndef())
15984 return DAG.getConstant(0, DL, VT);
15985
15986 // fold (sext_in_reg c1) -> c1
15987 if (SDValue C =
15989 return C;
15990
15991 // If the input is already sign extended, just drop the extension.
15992 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
15993 return N0;
15994
15995 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
15996 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
15997 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
15998 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N0.getOperand(0), N1);
15999
16000 // fold (sext_in_reg (sext x)) -> (sext x)
16001 // fold (sext_in_reg (aext x)) -> (sext x)
16002 // if x is small enough or if we know that x has more than 1 sign bit and the
16003 // sign_extend_inreg is extending from one of them.
16004 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
16005 SDValue N00 = N0.getOperand(0);
16006 unsigned N00Bits = N00.getScalarValueSizeInBits();
16007 if ((N00Bits <= ExtVTBits ||
16008 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
16009 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
16010 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
16011 }
16012
16013 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
16014 // if x is small enough or if we know that x has more than 1 sign bit and the
16015 // sign_extend_inreg is extending from one of them.
16017 SDValue N00 = N0.getOperand(0);
16018 unsigned N00Bits = N00.getScalarValueSizeInBits();
16019 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
16020 if ((N00Bits == ExtVTBits ||
16021 (!IsZext && (N00Bits < ExtVTBits ||
16022 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
16023 (!LegalOperations ||
16025 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, N00);
16026 }
16027
16028 // fold (sext_in_reg (zext x)) -> (sext x)
16029 // iff we are extending the source sign bit.
16030 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
16031 SDValue N00 = N0.getOperand(0);
16032 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
16033 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
16034 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
16035 }
16036
16037 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
16038 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
16039 return DAG.getZeroExtendInReg(N0, DL, ExtVT);
16040
16041 // fold operands of sext_in_reg based on knowledge that the top bits are not
16042 // demanded.
16044 return SDValue(N, 0);
16045
16046 // fold (sext_in_reg (load x)) -> (smaller sextload x)
16047 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
16048 if (SDValue NarrowLoad = reduceLoadWidth(N))
16049 return NarrowLoad;
16050
16051 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
16052 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
16053 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
16054 if (N0.getOpcode() == ISD::SRL) {
16055 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
16056 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
16057 // We can turn this into an SRA iff the input to the SRL is already sign
16058 // extended enough.
16059 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
16060 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
16061 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
16062 N0.getOperand(1));
16063 }
16064 }
16065
16066 // fold (sext_inreg (extload x)) -> (sextload x)
16067 // If sextload is not supported by target, we can only do the combine when
16068 // load has one use. Doing otherwise can block folding the extload with other
16069 // extends that the target does support.
16071 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
16072 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
16073 N0.hasOneUse()) ||
16074 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
16075 auto *LN0 = cast<LoadSDNode>(N0);
16076 SDValue ExtLoad =
16077 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
16078 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
16079 CombineTo(N, ExtLoad);
16080 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
16081 AddToWorklist(ExtLoad.getNode());
16082 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16083 }
16084
16085 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
16087 N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
16088 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
16089 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
16090 auto *LN0 = cast<LoadSDNode>(N0);
16091 SDValue ExtLoad =
16092 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
16093 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
16094 CombineTo(N, ExtLoad);
16095 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
16096 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16097 }
16098
16099 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
16100 // ignore it if the masked load is already sign extended
16101 bool Frozen = N0.getOpcode() == ISD::FREEZE && N0.hasOneUse();
16102 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(Frozen ? N0.getOperand(0) : N0)) {
16103 if (ExtVT == Ld->getMemoryVT() && Ld->hasNUsesOfValue(1, 0) &&
16104 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
16105 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
16106 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
16107 VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
16108 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
16109 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
16110 CombineTo(N, Frozen ? N0 : ExtMaskedLoad);
16111 CombineTo(Ld, ExtMaskedLoad, ExtMaskedLoad.getValue(1));
16112 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16113 }
16114 }
16115
16116 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
16117 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
16118 if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
16120 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
16121 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
16122
16123 SDValue ExtLoad = DAG.getMaskedGather(
16124 DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
16125 GN0->getIndexType(), ISD::SEXTLOAD);
16126
16127 CombineTo(N, ExtLoad);
16128 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
16129 AddToWorklist(ExtLoad.getNode());
16130 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16131 }
16132 }
16133
16134 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
16135 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
16136 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
16137 N0.getOperand(1), false))
16138 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, BSwap, N1);
16139 }
16140
16141 // Fold (iM_signext_inreg
16142 // (extract_subvector (zext|anyext|sext iN_v to _) _)
16143 // from iN)
16144 // -> (extract_subvector (signext iN_v to iM))
16145 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
16147 SDValue InnerExt = N0.getOperand(0);
16148 EVT InnerExtVT = InnerExt->getValueType(0);
16149 SDValue Extendee = InnerExt->getOperand(0);
16150
16151 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
16152 (!LegalOperations ||
16153 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
16154 SDValue SignExtExtendee =
16155 DAG.getNode(ISD::SIGN_EXTEND, DL, InnerExtVT, Extendee);
16156 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SignExtExtendee,
16157 N0.getOperand(1));
16158 }
16159 }
16160
16161 return SDValue();
16162}
16163
16165 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
16166 bool LegalOperations) {
16167 unsigned InregOpcode = N->getOpcode();
16168 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
16169
16170 SDValue Src = N->getOperand(0);
16171 EVT VT = N->getValueType(0);
16172 EVT SrcVT = VT.changeVectorElementType(
16173 *DAG.getContext(), Src.getValueType().getVectorElementType());
16174
16175 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
16176 "Expected EXTEND_VECTOR_INREG dag node in input!");
16177
16178 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
16179 // FIXME: one-use check may be overly restrictive
16180 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
16181 return SDValue();
16182
16183 // Profitability check: we must be extending exactly one of it's operands.
16184 // FIXME: this is probably overly restrictive.
16185 Src = Src.getOperand(0);
16186 if (Src.getValueType() != SrcVT)
16187 return SDValue();
16188
16189 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
16190 return SDValue();
16191
16192 return DAG.getNode(Opcode, DL, VT, Src);
16193}
16194
16195SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
16196 SDValue N0 = N->getOperand(0);
16197 EVT VT = N->getValueType(0);
16198 SDLoc DL(N);
16199
16200 if (N0.isUndef()) {
16201 // aext_vector_inreg(undef) = undef because the top bits are undefined.
16202 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
16203 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
16204 ? DAG.getUNDEF(VT)
16205 : DAG.getConstant(0, DL, VT);
16206 }
16207
16208 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
16209 return Res;
16210
16212 return SDValue(N, 0);
16213
16215 LegalOperations))
16216 return R;
16217
16218 return SDValue();
16219}
16220
16221SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
16222 EVT VT = N->getValueType(0);
16223 SDValue N0 = N->getOperand(0);
16224
16225 SDValue FPVal;
16226 if (sd_match(N0, m_FPToUI(m_Value(FPVal))) &&
16228 ISD::FP_TO_UINT_SAT, FPVal.getValueType(), VT))
16229 return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), VT, FPVal,
16230 DAG.getValueType(VT.getScalarType()));
16231
16232 return SDValue();
16233}
16234
16235/// Detect patterns of truncation with unsigned saturation:
16236///
16237/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
16238/// Return the source value x to be truncated or SDValue() if the pattern was
16239/// not matched.
16240///
16242 unsigned NumDstBits = VT.getScalarSizeInBits();
16243 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16244 // Saturation with truncation. We truncate from InVT to VT.
16245 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16246
16247 SDValue Min;
16248 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
16249 if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
16250 return Min;
16251
16252 return SDValue();
16253}
16254
16255/// Detect patterns of truncation with signed saturation:
16256/// (truncate (smin (smax (x, signed_min_of_dest_type),
16257/// signed_max_of_dest_type)) to dest_type)
16258/// or:
16259/// (truncate (smax (smin (x, signed_max_of_dest_type),
16260/// signed_min_of_dest_type)) to dest_type).
16261///
16262/// Return the source value to be truncated or SDValue() if the pattern was not
16263/// matched.
16265 unsigned NumDstBits = VT.getScalarSizeInBits();
16266 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16267 // Saturation with truncation. We truncate from InVT to VT.
16268 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16269
16270 SDValue Val;
16271 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
16272 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
16273
16274 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
16275 m_SpecificInt(SignedMax))))
16276 return Val;
16277
16278 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
16279 m_SpecificInt(SignedMin))))
16280 return Val;
16281
16282 return SDValue();
16283}
16284
16285/// Detect patterns of truncation with unsigned saturation:
16287 const SDLoc &DL) {
16288 unsigned NumDstBits = VT.getScalarSizeInBits();
16289 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16290 // Saturation with truncation. We truncate from InVT to VT.
16291 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16292
16293 SDValue Val;
16294 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
16295 // Min == 0, Max is unsigned max of destination type.
16296 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
16297 m_Zero())))
16298 return Val;
16299
16300 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
16301 m_SpecificInt(UnsignedMax))))
16302 return Val;
16303
16304 if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
16305 m_SpecificInt(UnsignedMax))))
16306 return Val;
16307
16308 return SDValue();
16309}
16310
16311static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
16312 SDLoc &DL, const TargetLowering &TLI,
16313 SelectionDAG &DAG) {
16314 auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
16315 return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
16316 TLI.isTypeDesirableForOp(Opc, VT));
16317 };
16318
16319 if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
16320 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
16321 if (SDValue SSatVal = detectSSatSPattern(Src, VT))
16322 return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
16323 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16324 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
16325 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
16326 } else if (Src.getOpcode() == ISD::UMIN) {
16327 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16328 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
16329 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
16330 if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
16331 if (SDValue USatVal = detectUSatUPattern(Src, VT))
16332 return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
16333 }
16334
16335 return SDValue();
16336}
16337
16338SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
16339 SDValue N0 = N->getOperand(0);
16340 EVT VT = N->getValueType(0);
16341 EVT SrcVT = N0.getValueType();
16342 bool isLE = DAG.getDataLayout().isLittleEndian();
16343 SDLoc DL(N);
16344
16345 // trunc(undef) = undef
16346 if (N0.isUndef())
16347 return DAG.getUNDEF(VT);
16348
16349 // fold (truncate (truncate x)) -> (truncate x)
16350 if (N0.getOpcode() == ISD::TRUNCATE)
16351 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16352
16353 // fold saturated truncate
16354 if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
16355 return SaturatedTR;
16356
16357 // fold (truncate c1) -> c1
16358 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
16359 return C;
16360
16361 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
16362 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
16363 N0.getOpcode() == ISD::SIGN_EXTEND ||
16364 N0.getOpcode() == ISD::ANY_EXTEND) {
16365 // if the source is smaller than the dest, we still need an extend.
16366 if (N0.getOperand(0).getValueType().bitsLT(VT)) {
16367 SDNodeFlags Flags;
16368 if (N0.getOpcode() == ISD::ZERO_EXTEND)
16369 Flags.setNonNeg(N0->getFlags().hasNonNeg());
16370 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
16371 }
16372 // if the source is larger than the dest, than we just need the truncate.
16373 if (N0.getOperand(0).getValueType().bitsGT(VT))
16374 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16375 // if the source and dest are the same type, we can drop both the extend
16376 // and the truncate.
16377 return N0.getOperand(0);
16378 }
16379
16380 // Try to narrow a truncate-of-sext_in_reg to the destination type:
16381 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
16382 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
16383 N0.hasOneUse()) {
16384 SDValue X = N0.getOperand(0);
16385 SDValue ExtVal = N0.getOperand(1);
16386 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
16387 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
16388 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
16389 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
16390 }
16391 }
16392
16393 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
16394 if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
16395 return SDValue();
16396
16397 // Fold extract-and-trunc into a narrow extract. For example:
16398 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
16399 // i32 y = TRUNCATE(i64 x)
16400 // -- becomes --
16401 // v16i8 b = BITCAST (v2i64 val)
16402 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
16403 //
16404 // Note: We only run this optimization after type legalization (which often
16405 // creates this pattern) and before operation legalization after which
16406 // we need to be more careful about the vector instructions that we generate.
16407 if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 &&
16408 N0->hasOneUse()) {
16409 EVT TrTy = N->getValueType(0);
16410 SDValue Src = N0;
16411
16412 // Check for cases where we shift down an upper element before truncation.
16413 int EltOffset = 0;
16414 if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
16415 if (auto ShAmt = DAG.getValidShiftAmount(Src)) {
16416 if ((*ShAmt % TrTy.getSizeInBits()) == 0) {
16417 Src = Src.getOperand(0);
16418 EltOffset = *ShAmt / TrTy.getSizeInBits();
16419 }
16420 }
16421 }
16422
16423 if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16424 EVT VecTy = Src.getOperand(0).getValueType();
16425 EVT ExTy = Src.getValueType();
16426
16427 auto EltCnt = VecTy.getVectorElementCount();
16428 unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits();
16429 auto NewEltCnt = EltCnt * SizeRatio;
16430
16431 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
16432 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
16433
16434 SDValue EltNo = Src->getOperand(1);
16435 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
16436 int Elt = EltNo->getAsZExtVal();
16437 int Index = isLE ? (Elt * SizeRatio + EltOffset)
16438 : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
16439 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
16440 DAG.getBitcast(NVT, Src.getOperand(0)),
16441 DAG.getVectorIdxConstant(Index, DL));
16442 }
16443 }
16444 }
16445
16446 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
16447 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse() &&
16448 TLI.isTruncateFree(SrcVT, VT)) {
16449 if (!LegalOperations ||
16450 (TLI.isOperationLegal(ISD::SELECT, SrcVT) &&
16451 TLI.isNarrowingProfitable(N0.getNode(), SrcVT, VT))) {
16452 SDLoc SL(N0);
16453 SDValue Cond = N0.getOperand(0);
16454 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
16455 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
16456 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
16457 }
16458 }
16459
16460 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
16461 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
16462 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
16463 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
16464 SDValue Amt = N0.getOperand(1);
16465 KnownBits Known = DAG.computeKnownBits(Amt);
16466 unsigned Size = VT.getScalarSizeInBits();
16467 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
16468 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
16469 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16470 if (AmtVT != Amt.getValueType()) {
16471 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
16472 AddToWorklist(Amt.getNode());
16473 }
16474 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
16475 }
16476 }
16477
16478 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
16479 return V;
16480
16481 if (SDValue ABD = foldABSToABD(N, DL))
16482 return ABD;
16483
16484 // Attempt to pre-truncate BUILD_VECTOR sources.
16485 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
16486 N0.hasOneUse() &&
16487 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
16488 // Avoid creating illegal types if running after type legalizer.
16489 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
16490 EVT SVT = VT.getScalarType();
16491 SmallVector<SDValue, 8> TruncOps;
16492 for (const SDValue &Op : N0->op_values()) {
16493 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
16494 TruncOps.push_back(TruncOp);
16495 }
16496 return DAG.getBuildVector(VT, DL, TruncOps);
16497 }
16498
16499 // trunc (splat_vector x) -> splat_vector (trunc x)
16500 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
16501 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
16502 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
16503 EVT SVT = VT.getScalarType();
16504 return DAG.getSplatVector(
16505 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
16506 }
16507
16508 // Fold a series of buildvector, bitcast, and truncate if possible.
16509 // For example fold
16510 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
16511 // (2xi32 (buildvector x, y)).
16512 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
16513 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
16515 N0.getOperand(0).hasOneUse()) {
16516 SDValue BuildVect = N0.getOperand(0);
16517 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
16518 EVT TruncVecEltTy = VT.getVectorElementType();
16519
16520 // Check that the element types match.
16521 if (BuildVectEltTy == TruncVecEltTy) {
16522 // Now we only need to compute the offset of the truncated elements.
16523 unsigned BuildVecNumElts = BuildVect.getNumOperands();
16524 unsigned TruncVecNumElts = VT.getVectorNumElements();
16525 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
16526 unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
16527
16528 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
16529 "Invalid number of elements");
16530
16532 for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
16533 i += TruncEltOffset)
16534 Opnds.push_back(BuildVect.getOperand(i));
16535
16536 return DAG.getBuildVector(VT, DL, Opnds);
16537 }
16538 }
16539
16540 // fold (truncate (load x)) -> (smaller load x)
16541 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
16542 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
16543 if (SDValue Reduced = reduceLoadWidth(N))
16544 return Reduced;
16545
16546 // Handle the case where the truncated result is at least as wide as the
16547 // loaded type.
16548 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
16549 auto *LN0 = cast<LoadSDNode>(N0);
16550 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
16551 SDValue NewLoad = DAG.getExtLoad(
16552 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
16553 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
16554 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
16555 return NewLoad;
16556 }
16557 }
16558 }
16559
16560 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
16561 // where ... are all 'undef'.
16562 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
16564 SDValue V;
16565 unsigned Idx = 0;
16566 unsigned NumDefs = 0;
16567
16568 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
16569 SDValue X = N0.getOperand(i);
16570 if (!X.isUndef()) {
16571 V = X;
16572 Idx = i;
16573 NumDefs++;
16574 }
16575 // Stop if more than one members are non-undef.
16576 if (NumDefs > 1)
16577 break;
16578
16581 X.getValueType().getVectorElementCount()));
16582 }
16583
16584 if (NumDefs == 0)
16585 return DAG.getUNDEF(VT);
16586
16587 if (NumDefs == 1) {
16588 assert(V.getNode() && "The single defined operand is empty!");
16590 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
16591 if (i != Idx) {
16592 Opnds.push_back(DAG.getUNDEF(VTs[i]));
16593 continue;
16594 }
16595 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
16596 AddToWorklist(NV.getNode());
16597 Opnds.push_back(NV);
16598 }
16599 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
16600 }
16601 }
16602
16603 // Fold truncate of a bitcast of a vector to an extract of the low vector
16604 // element.
16605 //
16606 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
16607 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
16608 SDValue VecSrc = N0.getOperand(0);
16609 EVT VecSrcVT = VecSrc.getValueType();
16610 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
16611 (!LegalOperations ||
16612 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
16613 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
16614 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
16615 DAG.getVectorIdxConstant(Idx, DL));
16616 }
16617 }
16618
16619 // Simplify the operands using demanded-bits information.
16621 return SDValue(N, 0);
16622
16623 // fold (truncate (extract_subvector(ext x))) ->
16624 // (extract_subvector x)
16625 // TODO: This can be generalized to cover cases where the truncate and extract
16626 // do not fully cancel each other out.
16627 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
16628 SDValue N00 = N0.getOperand(0);
16629 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
16630 N00.getOpcode() == ISD::ZERO_EXTEND ||
16631 N00.getOpcode() == ISD::ANY_EXTEND) {
16632 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
16634 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
16635 N00.getOperand(0), N0.getOperand(1));
16636 }
16637 }
16638
16639 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
16640 return NewVSel;
16641
16642 // Narrow a suitable binary operation with a non-opaque constant operand by
16643 // moving it ahead of the truncate. This is limited to pre-legalization
16644 // because targets may prefer a wider type during later combines and invert
16645 // this transform.
16646 switch (N0.getOpcode()) {
16647 case ISD::ADD:
16648 case ISD::SUB:
16649 case ISD::MUL:
16650 case ISD::AND:
16651 case ISD::OR:
16652 case ISD::XOR:
16653 if (!LegalOperations && N0.hasOneUse() &&
16654 (N0.getOperand(0) == N0.getOperand(1) ||
16656 isConstantOrConstantVector(N0.getOperand(1), true))) {
16657 // TODO: We already restricted this to pre-legalization, but for vectors
16658 // we are extra cautious to not create an unsupported operation.
16659 // Target-specific changes are likely needed to avoid regressions here.
16660 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
16661 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16662 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16663 SDNodeFlags Flags;
16664 // Propagate nuw for sub.
16665 if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() &&
16667 N0->getOperand(0),
16669 VT.getScalarSizeInBits())))
16670 Flags.setNoUnsignedWrap(true);
16671 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags);
16672 }
16673 }
16674 break;
16675 case ISD::ADDE:
16676 case ISD::UADDO_CARRY:
16677 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
16678 // (trunc uaddo_carry(X, Y, Carry)) ->
16679 // (uaddo_carry trunc(X), trunc(Y), Carry)
16680 // When the adde's carry is not used.
16681 // We only do for uaddo_carry before legalize operation
16682 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
16683 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
16684 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
16685 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16686 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16687 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
16688 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
16689 }
16690 break;
16691 case ISD::USUBSAT:
16692 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
16693 // enough to know that the upper bits are zero we must ensure that we don't
16694 // introduce an extra truncate.
16695 if (!LegalOperations && N0.hasOneUse() &&
16698 VT.getScalarSizeInBits() &&
16699 hasOperation(N0.getOpcode(), VT)) {
16700 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
16701 DAG, DL);
16702 }
16703 break;
16704 case ISD::AVGCEILS:
16705 case ISD::AVGCEILU:
16706 // trunc (avgceilu (sext (x), sext (y))) -> avgceils(x, y)
16707 // trunc (avgceils (zext (x), zext (y))) -> avgceilu(x, y)
16708 if (N0.hasOneUse()) {
16709 SDValue Op0 = N0.getOperand(0);
16710 SDValue Op1 = N0.getOperand(1);
16711 if (N0.getOpcode() == ISD::AVGCEILU) {
16713 Op0.getOpcode() == ISD::SIGN_EXTEND &&
16714 Op1.getOpcode() == ISD::SIGN_EXTEND &&
16715 Op0.getOperand(0).getValueType() == VT &&
16716 Op1.getOperand(0).getValueType() == VT)
16717 return DAG.getNode(ISD::AVGCEILS, DL, VT, Op0.getOperand(0),
16718 Op1.getOperand(0));
16719 } else {
16721 Op0.getOpcode() == ISD::ZERO_EXTEND &&
16722 Op1.getOpcode() == ISD::ZERO_EXTEND &&
16723 Op0.getOperand(0).getValueType() == VT &&
16724 Op1.getOperand(0).getValueType() == VT)
16725 return DAG.getNode(ISD::AVGCEILU, DL, VT, Op0.getOperand(0),
16726 Op1.getOperand(0));
16727 }
16728 }
16729 [[fallthrough]];
16730 case ISD::AVGFLOORS:
16731 case ISD::AVGFLOORU:
16732 case ISD::ABDS:
16733 case ISD::ABDU:
16734 // (trunc (avg a, b)) -> (avg (trunc a), (trunc b))
16735 // (trunc (abdu/abds a, b)) -> (abdu/abds (trunc a), (trunc b))
16736 if (!LegalOperations && N0.hasOneUse() &&
16737 TLI.isOperationLegal(N0.getOpcode(), VT)) {
16738 EVT TruncVT = VT;
16739 unsigned SrcBits = SrcVT.getScalarSizeInBits();
16740 unsigned TruncBits = TruncVT.getScalarSizeInBits();
16741
16742 SDValue A = N0.getOperand(0);
16743 SDValue B = N0.getOperand(1);
16744 bool CanFold = false;
16745
16746 if (N0.getOpcode() == ISD::AVGFLOORU || N0.getOpcode() == ISD::AVGCEILU ||
16747 N0.getOpcode() == ISD::ABDU) {
16748 APInt UpperBits = APInt::getBitsSetFrom(SrcBits, TruncBits);
16749 CanFold = DAG.MaskedValueIsZero(B, UpperBits) &&
16750 DAG.MaskedValueIsZero(A, UpperBits);
16751 } else {
16752 unsigned NeededBits = SrcBits - TruncBits;
16753 CanFold = DAG.ComputeNumSignBits(B) > NeededBits &&
16754 DAG.ComputeNumSignBits(A) > NeededBits;
16755 }
16756
16757 if (CanFold) {
16758 SDValue NewA = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, A);
16759 SDValue NewB = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, B);
16760 return DAG.getNode(N0.getOpcode(), DL, TruncVT, NewA, NewB);
16761 }
16762 }
16763 break;
16764 }
16765
16766 return SDValue();
16767}
16768
16769static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
16770 SDValue Elt = N->getOperand(i);
16771 if (Elt.getOpcode() != ISD::MERGE_VALUES)
16772 return Elt.getNode();
16773 return Elt.getOperand(Elt.getResNo()).getNode();
16774}
16775
16776/// build_pair (load, load) -> load
16777/// if load locations are consecutive.
16778SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
16779 assert(N->getOpcode() == ISD::BUILD_PAIR);
16780
16781 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
16782 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
16783
16784 // A BUILD_PAIR is always having the least significant part in elt 0 and the
16785 // most significant part in elt 1. So when combining into one large load, we
16786 // need to consider the endianness.
16787 if (DAG.getDataLayout().isBigEndian())
16788 std::swap(LD1, LD2);
16789
16790 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
16791 !LD1->hasOneUse() || !LD2->hasOneUse() ||
16792 LD1->getAddressSpace() != LD2->getAddressSpace())
16793 return SDValue();
16794
16795 unsigned LD1Fast = 0;
16796 EVT LD1VT = LD1->getValueType(0);
16797 unsigned LD1Bytes = LD1VT.getStoreSize();
16798 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
16799 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
16800 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16801 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
16802 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
16803 LD1->getPointerInfo(), LD1->getAlign());
16804
16805 return SDValue();
16806}
16807
16808static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
16809 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
16810 // and Lo parts; on big-endian machines it doesn't.
16811 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
16812}
16813
16814SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
16815 const TargetLowering &TLI) {
16816 // If this is not a bitcast to an FP type or if the target doesn't have
16817 // IEEE754-compliant FP logic, we're done.
16818 EVT VT = N->getValueType(0);
16819 SDValue N0 = N->getOperand(0);
16820 EVT SourceVT = N0.getValueType();
16821
16822 if (!VT.isFloatingPoint())
16823 return SDValue();
16824
16825 // TODO: Handle cases where the integer constant is a different scalar
16826 // bitwidth to the FP.
16827 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
16828 return SDValue();
16829
16830 unsigned FPOpcode;
16831 APInt SignMask;
16832 switch (N0.getOpcode()) {
16833 case ISD::AND:
16834 FPOpcode = ISD::FABS;
16835 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
16836 break;
16837 case ISD::XOR:
16838 FPOpcode = ISD::FNEG;
16839 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16840 break;
16841 case ISD::OR:
16842 FPOpcode = ISD::FABS;
16843 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16844 break;
16845 default:
16846 return SDValue();
16847 }
16848
16849 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
16850 return SDValue();
16851
16852 // This needs to be the inverse of logic in foldSignChangeInBitcast.
16853 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
16854 // removing this would require more changes.
16855 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
16856 if (sd_match(Op, m_BitCast(m_SpecificVT(VT))))
16857 return true;
16858
16859 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
16860 };
16861
16862 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
16863 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
16864 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
16865 // fneg (fabs X)
16866 SDValue LogicOp0 = N0.getOperand(0);
16867 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
16868 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
16869 IsBitCastOrFree(LogicOp0, VT)) {
16870 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
16871 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
16872 NumFPLogicOpsConv++;
16873 if (N0.getOpcode() == ISD::OR)
16874 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
16875 return FPOp;
16876 }
16877
16878 return SDValue();
16879}
16880
16881SDValue DAGCombiner::visitBITCAST(SDNode *N) {
16882 SDValue N0 = N->getOperand(0);
16883 EVT VT = N->getValueType(0);
16884
16885 if (N0.isUndef())
16886 return DAG.getUNDEF(VT);
16887
16888 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
16889 // Only do this before legalize types, unless both types are integer and the
16890 // scalar type is legal. Only do this before legalize ops, since the target
16891 // maybe depending on the bitcast.
16892 // First check to see if this is all constant.
16893 // TODO: Support FP bitcasts after legalize types.
16894 if (VT.isVector() &&
16895 (!LegalTypes ||
16896 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
16897 TLI.isTypeLegal(VT.getVectorElementType()))) &&
16898 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
16899 cast<BuildVectorSDNode>(N0)->isConstant())
16900 return DAG.FoldConstantBuildVector(cast<BuildVectorSDNode>(N0), SDLoc(N),
16902
16903 // If the input is a constant, let getNode fold it.
16904 if (isIntOrFPConstant(N0)) {
16905 // If we can't allow illegal operations, we need to check that this is just
16906 // a fp -> int or int -> conversion and that the resulting operation will
16907 // be legal.
16908 if (!LegalOperations ||
16909 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
16911 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
16912 TLI.isOperationLegal(ISD::Constant, VT))) {
16913 SDValue C = DAG.getBitcast(VT, N0);
16914 if (C.getNode() != N)
16915 return C;
16916 }
16917 }
16918
16919 // (conv (conv x, t1), t2) -> (conv x, t2)
16920 if (N0.getOpcode() == ISD::BITCAST)
16921 return DAG.getBitcast(VT, N0.getOperand(0));
16922
16923 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
16924 // iff the current bitwise logicop type isn't legal
16925 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
16926 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
16927 auto IsFreeBitcast = [VT](SDValue V) {
16928 return (V.getOpcode() == ISD::BITCAST &&
16929 V.getOperand(0).getValueType() == VT) ||
16931 V->hasOneUse());
16932 };
16933 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
16934 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
16935 DAG.getBitcast(VT, N0.getOperand(0)),
16936 DAG.getBitcast(VT, N0.getOperand(1)));
16937 }
16938
16939 // fold (conv (load x)) -> (load (conv*)x)
16940 // fold (conv (freeze (load x))) -> (freeze (load (conv*)x))
16941 // If the resultant load doesn't need a higher alignment than the original!
16942 auto CastLoad = [this, &VT](SDValue N0, const SDLoc &DL) {
16943 if (!ISD::isNormalLoad(N0.getNode()) || !N0.hasOneUse())
16944 return SDValue();
16945
16946 // Do not remove the cast if the types differ in endian layout.
16949 return SDValue();
16950
16951 // If the load is volatile, we only want to change the load type if the
16952 // resulting load is legal. Otherwise we might increase the number of
16953 // memory accesses. We don't care if the original type was legal or not
16954 // as we assume software couldn't rely on the number of accesses of an
16955 // illegal type.
16956 auto *LN0 = cast<LoadSDNode>(N0);
16957 if ((LegalOperations || !LN0->isSimple()) &&
16958 !TLI.isOperationLegal(ISD::LOAD, VT))
16959 return SDValue();
16960
16961 if (!TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
16962 *LN0->getMemOperand()))
16963 return SDValue();
16964
16965 // If the range metadata type does not match the new memory
16966 // operation type, remove the range metadata.
16967 if (const MDNode *MD = LN0->getRanges()) {
16968 ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0));
16969 if (Lower->getBitWidth() != VT.getScalarSizeInBits() || !VT.isInteger()) {
16970 LN0->getMemOperand()->clearRanges();
16971 }
16972 }
16973 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
16974 LN0->getMemOperand());
16975 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
16976 return Load;
16977 };
16978
16979 if (SDValue NewLd = CastLoad(N0, SDLoc(N)))
16980 return NewLd;
16981
16982 if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse())
16983 if (SDValue NewLd = CastLoad(N0.getOperand(0), SDLoc(N)))
16984 return DAG.getFreeze(NewLd);
16985
16986 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
16987 return V;
16988
16989 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16990 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16991 //
16992 // For ppc_fp128:
16993 // fold (bitcast (fneg x)) ->
16994 // flipbit = signbit
16995 // (xor (bitcast x) (build_pair flipbit, flipbit))
16996 //
16997 // fold (bitcast (fabs x)) ->
16998 // flipbit = (and (extract_element (bitcast x), 0), signbit)
16999 // (xor (bitcast x) (build_pair flipbit, flipbit))
17000 // This often reduces constant pool loads.
17001 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
17002 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
17003 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
17004 !N0.getValueType().isVector()) {
17005 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
17006 AddToWorklist(NewConv.getNode());
17007
17008 SDLoc DL(N);
17009 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
17010 assert(VT.getSizeInBits() == 128);
17011 SDValue SignBit = DAG.getConstant(
17012 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
17013 SDValue FlipBit;
17014 if (N0.getOpcode() == ISD::FNEG) {
17015 FlipBit = SignBit;
17016 AddToWorklist(FlipBit.getNode());
17017 } else {
17018 assert(N0.getOpcode() == ISD::FABS);
17019 SDValue Hi =
17020 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
17022 SDLoc(NewConv)));
17023 AddToWorklist(Hi.getNode());
17024 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
17025 AddToWorklist(FlipBit.getNode());
17026 }
17027 SDValue FlipBits =
17028 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
17029 AddToWorklist(FlipBits.getNode());
17030 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
17031 }
17032 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
17033 if (N0.getOpcode() == ISD::FNEG)
17034 return DAG.getNode(ISD::XOR, DL, VT,
17035 NewConv, DAG.getConstant(SignBit, DL, VT));
17036 assert(N0.getOpcode() == ISD::FABS);
17037 return DAG.getNode(ISD::AND, DL, VT,
17038 NewConv, DAG.getConstant(~SignBit, DL, VT));
17039 }
17040
17041 // fold (bitconvert (fcopysign cst, x)) ->
17042 // (or (and (bitconvert x), sign), (and cst, (not sign)))
17043 // Note that we don't handle (copysign x, cst) because this can always be
17044 // folded to an fneg or fabs.
17045 //
17046 // For ppc_fp128:
17047 // fold (bitcast (fcopysign cst, x)) ->
17048 // flipbit = (and (extract_element
17049 // (xor (bitcast cst), (bitcast x)), 0),
17050 // signbit)
17051 // (xor (bitcast cst) (build_pair flipbit, flipbit))
17052 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
17054 !VT.isVector()) {
17055 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
17056 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
17057 if (isTypeLegal(IntXVT)) {
17058 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
17059 AddToWorklist(X.getNode());
17060
17061 // If X has a different width than the result/lhs, sext it or truncate it.
17062 unsigned VTWidth = VT.getSizeInBits();
17063 if (OrigXWidth < VTWidth) {
17064 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
17065 AddToWorklist(X.getNode());
17066 } else if (OrigXWidth > VTWidth) {
17067 // To get the sign bit in the right place, we have to shift it right
17068 // before truncating.
17069 SDLoc DL(X);
17070 X = DAG.getNode(ISD::SRL, DL,
17071 X.getValueType(), X,
17072 DAG.getConstant(OrigXWidth-VTWidth, DL,
17073 X.getValueType()));
17074 AddToWorklist(X.getNode());
17075 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
17076 AddToWorklist(X.getNode());
17077 }
17078
17079 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
17080 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
17081 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
17082 AddToWorklist(Cst.getNode());
17083 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
17084 AddToWorklist(X.getNode());
17085 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
17086 AddToWorklist(XorResult.getNode());
17087 SDValue XorResult64 = DAG.getNode(
17088 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
17090 SDLoc(XorResult)));
17091 AddToWorklist(XorResult64.getNode());
17092 SDValue FlipBit =
17093 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
17094 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
17095 AddToWorklist(FlipBit.getNode());
17096 SDValue FlipBits =
17097 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
17098 AddToWorklist(FlipBits.getNode());
17099 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
17100 }
17101 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
17102 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
17103 X, DAG.getConstant(SignBit, SDLoc(X), VT));
17104 AddToWorklist(X.getNode());
17105
17106 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
17107 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
17108 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
17109 AddToWorklist(Cst.getNode());
17110
17111 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
17112 }
17113 }
17114
17115 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
17116 if (N0.getOpcode() == ISD::BUILD_PAIR)
17117 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
17118 return CombineLD;
17119
17120 // int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x)))
17121 // => int_vt (any_extend elt_vt:x)
17122 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) {
17123 SDValue SrcScalar = N0.getOperand(0);
17124 if (SrcScalar.getValueType().isScalarInteger())
17125 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SrcScalar);
17126 }
17127
17128 // Remove double bitcasts from shuffles - this is often a legacy of
17129 // XformToShuffleWithZero being used to combine bitmaskings (of
17130 // float vectors bitcast to integer vectors) into shuffles.
17131 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
17132 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
17133 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
17136 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
17137
17138 // If operands are a bitcast, peek through if it casts the original VT.
17139 // If operands are a constant, just bitcast back to original VT.
17140 auto PeekThroughBitcast = [&](SDValue Op) {
17141 if (Op.getOpcode() == ISD::BITCAST &&
17142 Op.getOperand(0).getValueType() == VT)
17143 return SDValue(Op.getOperand(0));
17144 if (Op.isUndef() || isAnyConstantBuildVector(Op))
17145 return DAG.getBitcast(VT, Op);
17146 return SDValue();
17147 };
17148
17149 // FIXME: If either input vector is bitcast, try to convert the shuffle to
17150 // the result type of this bitcast. This would eliminate at least one
17151 // bitcast. See the transform in InstCombine.
17152 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
17153 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
17154 if (!(SV0 && SV1))
17155 return SDValue();
17156
17157 int MaskScale =
17159 SmallVector<int, 8> NewMask;
17160 for (int M : SVN->getMask())
17161 for (int i = 0; i != MaskScale; ++i)
17162 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
17163
17164 SDValue LegalShuffle =
17165 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
17166 if (LegalShuffle)
17167 return LegalShuffle;
17168 }
17169
17170 return SDValue();
17171}
17172
17173SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
17174 EVT VT = N->getValueType(0);
17175 return CombineConsecutiveLoads(N, VT);
17176}
17177
17178SDValue DAGCombiner::visitFREEZE(SDNode *N) {
17179 SDValue N0 = N->getOperand(0);
17180
17181 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
17182 return N0;
17183
17184 // If we have frozen and unfrozen users of N0, update so everything uses N.
17185 if (!N0.isUndef() && !N0.hasOneUse()) {
17186 SDValue FrozenN0(N, 0);
17187 // Unfreeze all uses of N to avoid double deleting N from the CSE map.
17188 DAG.ReplaceAllUsesOfValueWith(FrozenN0, N0);
17189 DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0);
17190 // ReplaceAllUsesOfValueWith will have also updated the use in N, thus
17191 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
17192 assert(N->getOperand(0) == FrozenN0 && "Expected cycle in DAG");
17193 DAG.UpdateNodeOperands(N, N0);
17194 return FrozenN0;
17195 }
17196
17197 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
17198 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
17199 // example https://reviews.llvm.org/D136529#4120959.
17200 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
17201 return SDValue();
17202
17203 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
17204 // Try to push freeze through instructions that propagate but don't produce
17205 // poison as far as possible. If an operand of freeze follows three
17206 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
17207 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
17208 // the freeze through to the operands that are not guaranteed non-poison.
17209 // NOTE: we will strip poison-generating flags, so ignore them here.
17210 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
17211 /*ConsiderFlags*/ false) ||
17212 N0->getNumValues() != 1 || !N0->hasOneUse())
17213 return SDValue();
17214
17215 // TOOD: we should always allow multiple operands, however this increases the
17216 // likelihood of infinite loops due to the ReplaceAllUsesOfValueWith call
17217 // below causing later nodes that share frozen operands to fold again and no
17218 // longer being able to confirm other operands are not poison due to recursion
17219 // depth limits on isGuaranteedNotToBeUndefOrPoison.
17220 bool AllowMultipleMaybePoisonOperands =
17221 N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
17222 N0.getOpcode() == ISD::BUILD_VECTOR ||
17224 N0.getOpcode() == ISD::BUILD_PAIR ||
17227
17228 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
17229 // ones" or "constant" into something that depends on FrozenUndef. We can
17230 // instead pick undef values to keep those properties, while at the same time
17231 // folding away the freeze.
17232 // If we implement a more general solution for folding away freeze(undef) in
17233 // the future, then this special handling can be removed.
17234 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
17235 SDLoc DL(N0);
17236 EVT VT = N0.getValueType();
17238 return DAG.getAllOnesConstant(DL, VT);
17241 for (const SDValue &Op : N0->op_values())
17242 NewVecC.push_back(
17243 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
17244 return DAG.getBuildVector(VT, DL, NewVecC);
17245 }
17246 }
17247
17248 SmallSet<SDValue, 8> MaybePoisonOperands;
17249 SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
17250 for (auto [OpNo, Op] : enumerate(N0->ops())) {
17251 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly=*/false))
17252 continue;
17253 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
17254 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
17255 if (IsNewMaybePoisonOperand)
17256 MaybePoisonOperandNumbers.push_back(OpNo);
17257 if (!HadMaybePoisonOperands)
17258 continue;
17259 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
17260 // Multiple maybe-poison ops when not allowed - bail out.
17261 return SDValue();
17262 }
17263 }
17264 // NOTE: the whole op may be not guaranteed to not be undef or poison because
17265 // it could create undef or poison due to it's poison-generating flags.
17266 // So not finding any maybe-poison operands is fine.
17267
17268 for (unsigned OpNo : MaybePoisonOperandNumbers) {
17269 // N0 can mutate during iteration, so make sure to refetch the maybe poison
17270 // operands via the operand numbers. The typical scenario is that we have
17271 // something like this
17272 // t262: i32 = freeze t181
17273 // t150: i32 = ctlz_zero_undef t262
17274 // t184: i32 = ctlz_zero_undef t181
17275 // t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
17276 // When freezing the t181 operand we get t262 back, and then the
17277 // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
17278 // also recursively replace t184 by t150.
17279 SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
17280 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
17281 if (MaybePoisonOperand.isUndef())
17282 continue;
17283 // First, freeze each offending operand.
17284 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
17285 // Then, change all other uses of unfrozen operand to use frozen operand.
17286 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
17287 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
17288 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
17289 // But, that also updated the use in the freeze we just created, thus
17290 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
17291 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
17292 MaybePoisonOperand);
17293 }
17294
17295 // This node has been merged with another.
17296 if (N->getOpcode() == ISD::DELETED_NODE)
17297 return SDValue(N, 0);
17298 }
17299
17300 assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted!");
17301
17302 // The whole node may have been updated, so the value we were holding
17303 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
17304 N0 = N->getOperand(0);
17305
17306 // Finally, recreate the node, it's operands were updated to use
17307 // frozen operands, so we just need to use it's "original" operands.
17309 // TODO: ISD::UNDEF and ISD::POISON should get separate handling, but best
17310 // leave for a future patch.
17311 for (SDValue &Op : Ops) {
17312 if (Op.isUndef())
17313 Op = DAG.getFreeze(Op);
17314 }
17315
17316 SDLoc DL(N0);
17317
17318 // Special case handling for ShuffleVectorSDNode nodes.
17319 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0))
17320 return DAG.getVectorShuffle(N0.getValueType(), DL, Ops[0], Ops[1],
17321 SVN->getMask());
17322
17323 // NOTE: this strips poison generating flags.
17324 // Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan,
17325 // ninf, nsz, or fast.
17326 // However, contract, reassoc, afn, and arcp should be preserved,
17327 // as these fast-math flags do not introduce poison values.
17328 SDNodeFlags SrcFlags = N0->getFlags();
17329 SDNodeFlags SafeFlags;
17330 SafeFlags.setAllowContract(SrcFlags.hasAllowContract());
17331 SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation());
17332 SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs());
17333 SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal());
17334 return DAG.getNode(N0.getOpcode(), DL, N0->getVTList(), Ops, SafeFlags);
17335}
17336
17337// Returns true if floating point contraction is allowed on the FMUL-SDValue
17338// `N`
17340 assert(N.getOpcode() == ISD::FMUL);
17341
17342 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
17343 N->getFlags().hasAllowContract();
17344}
17345
17346/// Try to perform FMA combining on a given FADD node.
17347template <class MatchContextClass>
17348SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
17349 SDValue N0 = N->getOperand(0);
17350 SDValue N1 = N->getOperand(1);
17351 EVT VT = N->getValueType(0);
17352 SDLoc SL(N);
17353 MatchContextClass matcher(DAG, TLI, N);
17354 const TargetOptions &Options = DAG.getTarget().Options;
17355
17356 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17357
17358 // Floating-point multiply-add with intermediate rounding.
17359 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17360 // FIXME: Add VP_FMAD opcode.
17361 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17362
17363 // Floating-point multiply-add without intermediate rounding.
17364 bool HasFMA =
17365 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17367
17368 // No valid opcode, do not combine.
17369 if (!HasFMAD && !HasFMA)
17370 return SDValue();
17371
17372 bool AllowFusionGlobally =
17373 Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
17374 // If the addition is not contractable, do not combine.
17375 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17376 return SDValue();
17377
17378 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
17379 // beneficial. It does not reduce latency. It increases register pressure. It
17380 // replaces an fadd with an fma which is a more complex instruction, so is
17381 // likely to have a larger encoding, use more functional units, etc.
17382 if (N0 == N1)
17383 return SDValue();
17384
17385 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17386 return SDValue();
17387
17388 // Always prefer FMAD to FMA for precision.
17389 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17391
17392 auto isFusedOp = [&](SDValue N) {
17393 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17394 };
17395
17396 // Is the node an FMUL and contractable either due to global flags or
17397 // SDNodeFlags.
17398 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17399 if (!matcher.match(N, ISD::FMUL))
17400 return false;
17401 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17402 };
17403 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
17404 // prefer to fold the multiply with fewer uses.
17406 if (N0->use_size() > N1->use_size())
17407 std::swap(N0, N1);
17408 }
17409
17410 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
17411 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
17412 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
17413 N0.getOperand(1), N1);
17414 }
17415
17416 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
17417 // Note: Commutes FADD operands.
17418 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
17419 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
17420 N1.getOperand(1), N0);
17421 }
17422
17423 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
17424 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
17425 // This also works with nested fma instructions:
17426 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
17427 // fma A, B, (fma C, D, fma (E, F, G))
17428 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
17429 // fma A, B, (fma C, D, fma (E, F, G)).
17430 // This requires reassociation because it changes the order of operations.
17431 bool CanReassociate = N->getFlags().hasAllowReassociation();
17432 if (CanReassociate) {
17433 SDValue FMA, E;
17434 if (isFusedOp(N0) && N0.hasOneUse()) {
17435 FMA = N0;
17436 E = N1;
17437 } else if (isFusedOp(N1) && N1.hasOneUse()) {
17438 FMA = N1;
17439 E = N0;
17440 }
17441
17442 SDValue TmpFMA = FMA;
17443 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
17444 SDValue FMul = TmpFMA->getOperand(2);
17445 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
17446 SDValue C = FMul.getOperand(0);
17447 SDValue D = FMul.getOperand(1);
17448 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
17450 // Replacing the inner FMul could cause the outer FMA to be simplified
17451 // away.
17452 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
17453 }
17454
17455 TmpFMA = TmpFMA->getOperand(2);
17456 }
17457 }
17458
17459 // Look through FP_EXTEND nodes to do more combining.
17460
17461 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
17462 if (matcher.match(N0, ISD::FP_EXTEND)) {
17463 SDValue N00 = N0.getOperand(0);
17464 if (isContractableFMUL(N00) &&
17465 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17466 N00.getValueType())) {
17467 return matcher.getNode(
17468 PreferredFusedOpcode, SL, VT,
17469 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17470 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
17471 }
17472 }
17473
17474 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
17475 // Note: Commutes FADD operands.
17476 if (matcher.match(N1, ISD::FP_EXTEND)) {
17477 SDValue N10 = N1.getOperand(0);
17478 if (isContractableFMUL(N10) &&
17479 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17480 N10.getValueType())) {
17481 return matcher.getNode(
17482 PreferredFusedOpcode, SL, VT,
17483 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
17484 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17485 }
17486 }
17487
17488 // More folding opportunities when target permits.
17489 if (Aggressive) {
17490 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
17491 // -> (fma x, y, (fma (fpext u), (fpext v), z))
17492 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17493 SDValue Z) {
17494 return matcher.getNode(
17495 PreferredFusedOpcode, SL, VT, X, Y,
17496 matcher.getNode(PreferredFusedOpcode, SL, VT,
17497 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17498 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17499 };
17500 if (isFusedOp(N0)) {
17501 SDValue N02 = N0.getOperand(2);
17502 if (matcher.match(N02, ISD::FP_EXTEND)) {
17503 SDValue N020 = N02.getOperand(0);
17504 if (isContractableFMUL(N020) &&
17505 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17506 N020.getValueType())) {
17507 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
17508 N020.getOperand(0), N020.getOperand(1),
17509 N1);
17510 }
17511 }
17512 }
17513
17514 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
17515 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
17516 // FIXME: This turns two single-precision and one double-precision
17517 // operation into two double-precision operations, which might not be
17518 // interesting for all targets, especially GPUs.
17519 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17520 SDValue Z) {
17521 return matcher.getNode(
17522 PreferredFusedOpcode, SL, VT,
17523 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
17524 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
17525 matcher.getNode(PreferredFusedOpcode, SL, VT,
17526 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17527 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17528 };
17529 if (N0.getOpcode() == ISD::FP_EXTEND) {
17530 SDValue N00 = N0.getOperand(0);
17531 if (isFusedOp(N00)) {
17532 SDValue N002 = N00.getOperand(2);
17533 if (isContractableFMUL(N002) &&
17534 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17535 N00.getValueType())) {
17536 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
17537 N002.getOperand(0), N002.getOperand(1),
17538 N1);
17539 }
17540 }
17541 }
17542
17543 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
17544 // -> (fma y, z, (fma (fpext u), (fpext v), x))
17545 if (isFusedOp(N1)) {
17546 SDValue N12 = N1.getOperand(2);
17547 if (N12.getOpcode() == ISD::FP_EXTEND) {
17548 SDValue N120 = N12.getOperand(0);
17549 if (isContractableFMUL(N120) &&
17550 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17551 N120.getValueType())) {
17552 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
17553 N120.getOperand(0), N120.getOperand(1),
17554 N0);
17555 }
17556 }
17557 }
17558
17559 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
17560 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
17561 // FIXME: This turns two single-precision and one double-precision
17562 // operation into two double-precision operations, which might not be
17563 // interesting for all targets, especially GPUs.
17564 if (N1.getOpcode() == ISD::FP_EXTEND) {
17565 SDValue N10 = N1.getOperand(0);
17566 if (isFusedOp(N10)) {
17567 SDValue N102 = N10.getOperand(2);
17568 if (isContractableFMUL(N102) &&
17569 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17570 N10.getValueType())) {
17571 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
17572 N102.getOperand(0), N102.getOperand(1),
17573 N0);
17574 }
17575 }
17576 }
17577 }
17578
17579 return SDValue();
17580}
17581
17582/// Try to perform FMA combining on a given FSUB node.
17583template <class MatchContextClass>
17584SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
17585 SDValue N0 = N->getOperand(0);
17586 SDValue N1 = N->getOperand(1);
17587 EVT VT = N->getValueType(0);
17588 SDLoc SL(N);
17589 MatchContextClass matcher(DAG, TLI, N);
17590 const TargetOptions &Options = DAG.getTarget().Options;
17591
17592 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17593
17594 // Floating-point multiply-add with intermediate rounding.
17595 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17596 // FIXME: Add VP_FMAD opcode.
17597 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17598
17599 // Floating-point multiply-add without intermediate rounding.
17600 bool HasFMA =
17601 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17603
17604 // No valid opcode, do not combine.
17605 if (!HasFMAD && !HasFMA)
17606 return SDValue();
17607
17608 const SDNodeFlags Flags = N->getFlags();
17609 bool AllowFusionGlobally =
17610 (Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD);
17611
17612 // If the subtraction is not contractable, do not combine.
17613 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17614 return SDValue();
17615
17616 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17617 return SDValue();
17618
17619 // Always prefer FMAD to FMA for precision.
17620 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17622 bool NoSignedZero = Flags.hasNoSignedZeros();
17623
17624 // Is the node an FMUL and contractable either due to global flags or
17625 // SDNodeFlags.
17626 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17627 if (!matcher.match(N, ISD::FMUL))
17628 return false;
17629 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17630 };
17631
17632 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17633 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
17634 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
17635 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
17636 XY.getOperand(1),
17637 matcher.getNode(ISD::FNEG, SL, VT, Z));
17638 }
17639 return SDValue();
17640 };
17641
17642 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17643 // Note: Commutes FSUB operands.
17644 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
17645 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
17646 return matcher.getNode(
17647 PreferredFusedOpcode, SL, VT,
17648 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
17649 YZ.getOperand(1), X);
17650 }
17651 return SDValue();
17652 };
17653
17654 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
17655 // prefer to fold the multiply with fewer uses.
17656 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
17657 (N0->use_size() > N1->use_size())) {
17658 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
17659 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17660 return V;
17661 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
17662 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17663 return V;
17664 } else {
17665 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17666 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17667 return V;
17668 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17669 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17670 return V;
17671 }
17672
17673 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
17674 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
17675 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
17676 SDValue N00 = N0.getOperand(0).getOperand(0);
17677 SDValue N01 = N0.getOperand(0).getOperand(1);
17678 return matcher.getNode(PreferredFusedOpcode, SL, VT,
17679 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
17680 matcher.getNode(ISD::FNEG, SL, VT, N1));
17681 }
17682
17683 // Look through FP_EXTEND nodes to do more combining.
17684
17685 // fold (fsub (fpext (fmul x, y)), z)
17686 // -> (fma (fpext x), (fpext y), (fneg z))
17687 if (matcher.match(N0, ISD::FP_EXTEND)) {
17688 SDValue N00 = N0.getOperand(0);
17689 if (isContractableFMUL(N00) &&
17690 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17691 N00.getValueType())) {
17692 return matcher.getNode(
17693 PreferredFusedOpcode, SL, VT,
17694 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17695 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17696 matcher.getNode(ISD::FNEG, SL, VT, N1));
17697 }
17698 }
17699
17700 // fold (fsub x, (fpext (fmul y, z)))
17701 // -> (fma (fneg (fpext y)), (fpext z), x)
17702 // Note: Commutes FSUB operands.
17703 if (matcher.match(N1, ISD::FP_EXTEND)) {
17704 SDValue N10 = N1.getOperand(0);
17705 if (isContractableFMUL(N10) &&
17706 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17707 N10.getValueType())) {
17708 return matcher.getNode(
17709 PreferredFusedOpcode, SL, VT,
17710 matcher.getNode(
17711 ISD::FNEG, SL, VT,
17712 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
17713 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17714 }
17715 }
17716
17717 // fold (fsub (fpext (fneg (fmul, x, y))), z)
17718 // -> (fneg (fma (fpext x), (fpext y), z))
17719 // Note: This could be removed with appropriate canonicalization of the
17720 // input expression into (fneg (fadd (fpext (fmul, x, y)), z)). However, the
17721 // command line flag -fp-contract=fast and fast-math flag contract prevent
17722 // from implementing the canonicalization in visitFSUB.
17723 if (matcher.match(N0, ISD::FP_EXTEND)) {
17724 SDValue N00 = N0.getOperand(0);
17725 if (matcher.match(N00, ISD::FNEG)) {
17726 SDValue N000 = N00.getOperand(0);
17727 if (isContractableFMUL(N000) &&
17728 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17729 N00.getValueType())) {
17730 return matcher.getNode(
17731 ISD::FNEG, SL, VT,
17732 matcher.getNode(
17733 PreferredFusedOpcode, SL, VT,
17734 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17735 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17736 N1));
17737 }
17738 }
17739 }
17740
17741 // fold (fsub (fneg (fpext (fmul, x, y))), z)
17742 // -> (fneg (fma (fpext x)), (fpext y), z)
17743 // Note: This could be removed with appropriate canonicalization of the
17744 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
17745 // command line flag -fp-contract=fast and fast-math flag contract prevent
17746 // from implementing the canonicalization in visitFSUB.
17747 if (matcher.match(N0, ISD::FNEG)) {
17748 SDValue N00 = N0.getOperand(0);
17749 if (matcher.match(N00, ISD::FP_EXTEND)) {
17750 SDValue N000 = N00.getOperand(0);
17751 if (isContractableFMUL(N000) &&
17752 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17753 N000.getValueType())) {
17754 return matcher.getNode(
17755 ISD::FNEG, SL, VT,
17756 matcher.getNode(
17757 PreferredFusedOpcode, SL, VT,
17758 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17759 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17760 N1));
17761 }
17762 }
17763 }
17764
17765 auto isContractableAndReassociableFMUL = [&isContractableFMUL](SDValue N) {
17766 return isContractableFMUL(N) && N->getFlags().hasAllowReassociation();
17767 };
17768
17769 auto isFusedOp = [&](SDValue N) {
17770 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17771 };
17772
17773 // More folding opportunities when target permits.
17774 if (Aggressive && N->getFlags().hasAllowReassociation()) {
17775 bool CanFuse = N->getFlags().hasAllowContract();
17776 // fold (fsub (fma x, y, (fmul u, v)), z)
17777 // -> (fma x, y (fma u, v, (fneg z)))
17778 if (CanFuse && isFusedOp(N0) &&
17779 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
17780 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
17781 return matcher.getNode(
17782 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17783 matcher.getNode(PreferredFusedOpcode, SL, VT,
17784 N0.getOperand(2).getOperand(0),
17785 N0.getOperand(2).getOperand(1),
17786 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17787 }
17788
17789 // fold (fsub x, (fma y, z, (fmul u, v)))
17790 // -> (fma (fneg y), z, (fma (fneg u), v, x))
17791 if (CanFuse && isFusedOp(N1) &&
17792 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
17793 N1->hasOneUse() && NoSignedZero) {
17794 SDValue N20 = N1.getOperand(2).getOperand(0);
17795 SDValue N21 = N1.getOperand(2).getOperand(1);
17796 return matcher.getNode(
17797 PreferredFusedOpcode, SL, VT,
17798 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17799 N1.getOperand(1),
17800 matcher.getNode(PreferredFusedOpcode, SL, VT,
17801 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
17802 }
17803
17804 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
17805 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
17806 if (isFusedOp(N0) && N0->hasOneUse()) {
17807 SDValue N02 = N0.getOperand(2);
17808 if (matcher.match(N02, ISD::FP_EXTEND)) {
17809 SDValue N020 = N02.getOperand(0);
17810 if (isContractableAndReassociableFMUL(N020) &&
17811 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17812 N020.getValueType())) {
17813 return matcher.getNode(
17814 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17815 matcher.getNode(
17816 PreferredFusedOpcode, SL, VT,
17817 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
17818 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
17819 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17820 }
17821 }
17822 }
17823
17824 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
17825 // -> (fma (fpext x), (fpext y),
17826 // (fma (fpext u), (fpext v), (fneg z)))
17827 // FIXME: This turns two single-precision and one double-precision
17828 // operation into two double-precision operations, which might not be
17829 // interesting for all targets, especially GPUs.
17830 if (matcher.match(N0, ISD::FP_EXTEND)) {
17831 SDValue N00 = N0.getOperand(0);
17832 if (isFusedOp(N00)) {
17833 SDValue N002 = N00.getOperand(2);
17834 if (isContractableAndReassociableFMUL(N002) &&
17835 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17836 N00.getValueType())) {
17837 return matcher.getNode(
17838 PreferredFusedOpcode, SL, VT,
17839 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17840 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17841 matcher.getNode(
17842 PreferredFusedOpcode, SL, VT,
17843 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
17844 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
17845 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17846 }
17847 }
17848 }
17849
17850 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
17851 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
17852 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
17853 N1->hasOneUse()) {
17854 SDValue N120 = N1.getOperand(2).getOperand(0);
17855 if (isContractableAndReassociableFMUL(N120) &&
17856 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17857 N120.getValueType())) {
17858 SDValue N1200 = N120.getOperand(0);
17859 SDValue N1201 = N120.getOperand(1);
17860 return matcher.getNode(
17861 PreferredFusedOpcode, SL, VT,
17862 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17863 N1.getOperand(1),
17864 matcher.getNode(
17865 PreferredFusedOpcode, SL, VT,
17866 matcher.getNode(ISD::FNEG, SL, VT,
17867 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
17868 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
17869 }
17870 }
17871
17872 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
17873 // -> (fma (fneg (fpext y)), (fpext z),
17874 // (fma (fneg (fpext u)), (fpext v), x))
17875 // FIXME: This turns two single-precision and one double-precision
17876 // operation into two double-precision operations, which might not be
17877 // interesting for all targets, especially GPUs.
17878 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
17879 SDValue CvtSrc = N1.getOperand(0);
17880 SDValue N100 = CvtSrc.getOperand(0);
17881 SDValue N101 = CvtSrc.getOperand(1);
17882 SDValue N102 = CvtSrc.getOperand(2);
17883 if (isContractableAndReassociableFMUL(N102) &&
17884 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17885 CvtSrc.getValueType())) {
17886 SDValue N1020 = N102.getOperand(0);
17887 SDValue N1021 = N102.getOperand(1);
17888 return matcher.getNode(
17889 PreferredFusedOpcode, SL, VT,
17890 matcher.getNode(ISD::FNEG, SL, VT,
17891 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
17892 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
17893 matcher.getNode(
17894 PreferredFusedOpcode, SL, VT,
17895 matcher.getNode(ISD::FNEG, SL, VT,
17896 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
17897 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
17898 }
17899 }
17900 }
17901
17902 return SDValue();
17903}
17904
17905/// Try to perform FMA combining on a given FMUL node based on the distributive
17906/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
17907/// subtraction instead of addition).
17908SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
17909 SDValue N0 = N->getOperand(0);
17910 SDValue N1 = N->getOperand(1);
17911 EVT VT = N->getValueType(0);
17912 SDLoc SL(N);
17913
17914 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
17915
17916 const TargetOptions &Options = DAG.getTarget().Options;
17917
17918 // The transforms below are incorrect when x == 0 and y == inf, because the
17919 // intermediate multiplication produces a nan.
17920 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
17921 if (!FAdd->getFlags().hasNoInfs())
17922 return SDValue();
17923
17924 // Floating-point multiply-add without intermediate rounding.
17925 bool HasFMA =
17927 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17929
17930 // Floating-point multiply-add with intermediate rounding. This can result
17931 // in a less precise result due to the changed rounding order.
17932 bool HasFMAD = LegalOperations && TLI.isFMADLegal(DAG, N);
17933
17934 // No valid opcode, do not combine.
17935 if (!HasFMAD && !HasFMA)
17936 return SDValue();
17937
17938 // Always prefer FMAD to FMA for precision.
17939 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17941
17942 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
17943 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
17944 auto FuseFADD = [&](SDValue X, SDValue Y) {
17945 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
17946 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
17947 if (C->isExactlyValue(+1.0))
17948 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17949 Y);
17950 if (C->isExactlyValue(-1.0))
17951 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17952 DAG.getNode(ISD::FNEG, SL, VT, Y));
17953 }
17954 }
17955 return SDValue();
17956 };
17957
17958 if (SDValue FMA = FuseFADD(N0, N1))
17959 return FMA;
17960 if (SDValue FMA = FuseFADD(N1, N0))
17961 return FMA;
17962
17963 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
17964 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
17965 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
17966 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
17967 auto FuseFSUB = [&](SDValue X, SDValue Y) {
17968 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
17969 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
17970 if (C0->isExactlyValue(+1.0))
17971 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17972 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17973 Y);
17974 if (C0->isExactlyValue(-1.0))
17975 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17976 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17977 DAG.getNode(ISD::FNEG, SL, VT, Y));
17978 }
17979 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
17980 if (C1->isExactlyValue(+1.0))
17981 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17982 DAG.getNode(ISD::FNEG, SL, VT, Y));
17983 if (C1->isExactlyValue(-1.0))
17984 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17985 Y);
17986 }
17987 }
17988 return SDValue();
17989 };
17990
17991 if (SDValue FMA = FuseFSUB(N0, N1))
17992 return FMA;
17993 if (SDValue FMA = FuseFSUB(N1, N0))
17994 return FMA;
17995
17996 return SDValue();
17997}
17998
17999SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
18000 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18001
18002 // FADD -> FMA combines:
18003 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
18004 if (Fused.getOpcode() != ISD::DELETED_NODE)
18005 AddToWorklist(Fused.getNode());
18006 return Fused;
18007 }
18008 return SDValue();
18009}
18010
18011SDValue DAGCombiner::visitFADD(SDNode *N) {
18012 SDValue N0 = N->getOperand(0);
18013 SDValue N1 = N->getOperand(1);
18014 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
18015 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
18016 EVT VT = N->getValueType(0);
18017 SDLoc DL(N);
18018 SDNodeFlags Flags = N->getFlags();
18019 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18020
18021 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18022 return R;
18023
18024 // fold (fadd c1, c2) -> c1 + c2
18025 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
18026 return C;
18027
18028 // canonicalize constant to RHS
18029 if (N0CFP && !N1CFP)
18030 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
18031
18032 // fold vector ops
18033 if (VT.isVector())
18034 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18035 return FoldedVOp;
18036
18037 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
18038 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
18039 if (N1C && N1C->isZero())
18040 if (N1C->isNegative() || Flags.hasNoSignedZeros() ||
18042 return N0;
18043
18044 if (SDValue NewSel = foldBinOpIntoSelect(N))
18045 return NewSel;
18046
18047 // fold (fadd A, (fneg B)) -> (fsub A, B)
18048 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
18049 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
18050 N1, DAG, LegalOperations, ForCodeSize))
18051 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
18052
18053 // fold (fadd (fneg A), B) -> (fsub B, A)
18054 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
18055 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
18056 N0, DAG, LegalOperations, ForCodeSize))
18057 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
18058
18059 auto isFMulNegTwo = [](SDValue FMul) {
18060 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
18061 return false;
18062 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
18063 return C && C->isExactlyValue(-2.0);
18064 };
18065
18066 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
18067 if (isFMulNegTwo(N0)) {
18068 SDValue B = N0.getOperand(0);
18069 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
18070 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
18071 }
18072 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
18073 if (isFMulNegTwo(N1)) {
18074 SDValue B = N1.getOperand(0);
18075 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
18076 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
18077 }
18078
18079 // No FP constant should be created after legalization as Instruction
18080 // Selection pass has a hard time dealing with FP constants.
18081 bool AllowNewConst = (Level < AfterLegalizeDAG);
18082
18083 // If nnan is enabled, fold lots of things.
18084 if (Flags.hasNoNaNs() && AllowNewConst) {
18085 // If allowed, fold (fadd (fneg x), x) -> 0.0
18086 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
18087 return DAG.getConstantFP(0.0, DL, VT);
18088
18089 // If allowed, fold (fadd x, (fneg x)) -> 0.0
18090 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
18091 return DAG.getConstantFP(0.0, DL, VT);
18092 }
18093
18094 // If reassoc and nsz, fold lots of things.
18095 // TODO: break out portions of the transformations below for which Unsafe is
18096 // considered and which do not require both nsz and reassoc
18097 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
18098 AllowNewConst) {
18099 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
18100 if (N1CFP && N0.getOpcode() == ISD::FADD &&
18102 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
18103 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
18104 }
18105
18106 // We can fold chains of FADD's of the same value into multiplications.
18107 // This transform is not safe in general because we are reducing the number
18108 // of rounding steps.
18109 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
18110 if (N0.getOpcode() == ISD::FMUL) {
18111 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
18112 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
18113
18114 // (fadd (fmul x, c), x) -> (fmul x, c+1)
18115 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
18116 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
18117 DAG.getConstantFP(1.0, DL, VT));
18118 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
18119 }
18120
18121 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
18122 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
18123 N1.getOperand(0) == N1.getOperand(1) &&
18124 N0.getOperand(0) == N1.getOperand(0)) {
18125 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
18126 DAG.getConstantFP(2.0, DL, VT));
18127 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
18128 }
18129 }
18130
18131 if (N1.getOpcode() == ISD::FMUL) {
18132 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
18133 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
18134
18135 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
18136 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
18137 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
18138 DAG.getConstantFP(1.0, DL, VT));
18139 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
18140 }
18141
18142 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
18143 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
18144 N0.getOperand(0) == N0.getOperand(1) &&
18145 N1.getOperand(0) == N0.getOperand(0)) {
18146 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
18147 DAG.getConstantFP(2.0, DL, VT));
18148 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
18149 }
18150 }
18151
18152 if (N0.getOpcode() == ISD::FADD) {
18153 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
18154 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
18155 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
18156 (N0.getOperand(0) == N1)) {
18157 return DAG.getNode(ISD::FMUL, DL, VT, N1,
18158 DAG.getConstantFP(3.0, DL, VT));
18159 }
18160 }
18161
18162 if (N1.getOpcode() == ISD::FADD) {
18163 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
18164 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
18165 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
18166 N1.getOperand(0) == N0) {
18167 return DAG.getNode(ISD::FMUL, DL, VT, N0,
18168 DAG.getConstantFP(3.0, DL, VT));
18169 }
18170 }
18171
18172 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
18173 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
18174 N0.getOperand(0) == N0.getOperand(1) &&
18175 N1.getOperand(0) == N1.getOperand(1) &&
18176 N0.getOperand(0) == N1.getOperand(0)) {
18177 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
18178 DAG.getConstantFP(4.0, DL, VT));
18179 }
18180 }
18181 } // reassoc && nsz && AllowNewConst
18182
18183 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()) {
18184 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
18185 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
18186 VT, N0, N1, Flags))
18187 return SD;
18188 }
18189
18190 // FADD -> FMA combines:
18191 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
18192 if (Fused.getOpcode() != ISD::DELETED_NODE)
18193 AddToWorklist(Fused.getNode());
18194 return Fused;
18195 }
18196 return SDValue();
18197}
18198
18199SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
18200 SDValue Chain = N->getOperand(0);
18201 SDValue N0 = N->getOperand(1);
18202 SDValue N1 = N->getOperand(2);
18203 EVT VT = N->getValueType(0);
18204 EVT ChainVT = N->getValueType(1);
18205 SDLoc DL(N);
18206 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18207
18208 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
18209 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
18210 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
18211 N1, DAG, LegalOperations, ForCodeSize)) {
18212 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
18213 {Chain, N0, NegN1});
18214 }
18215
18216 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
18217 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
18218 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
18219 N0, DAG, LegalOperations, ForCodeSize)) {
18220 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
18221 {Chain, N1, NegN0});
18222 }
18223 return SDValue();
18224}
18225
18226SDValue DAGCombiner::visitFSUB(SDNode *N) {
18227 SDValue N0 = N->getOperand(0);
18228 SDValue N1 = N->getOperand(1);
18229 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
18230 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
18231 EVT VT = N->getValueType(0);
18232 SDLoc DL(N);
18233 const SDNodeFlags Flags = N->getFlags();
18234 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18235
18236 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18237 return R;
18238
18239 // fold (fsub c1, c2) -> c1-c2
18240 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
18241 return C;
18242
18243 // fold vector ops
18244 if (VT.isVector())
18245 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18246 return FoldedVOp;
18247
18248 if (SDValue NewSel = foldBinOpIntoSelect(N))
18249 return NewSel;
18250
18251 // (fsub A, 0) -> A
18252 if (N1CFP && N1CFP->isZero()) {
18253 if (!N1CFP->isNegative() || Flags.hasNoSignedZeros() ||
18254 DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) {
18255 return N0;
18256 }
18257 }
18258
18259 if (N0 == N1) {
18260 // (fsub x, x) -> 0.0
18261 if (Flags.hasNoNaNs())
18262 return DAG.getConstantFP(0.0f, DL, VT);
18263 }
18264
18265 // (fsub -0.0, N1) -> -N1
18266 if (N0CFP && N0CFP->isZero()) {
18267 if (N0CFP->isNegative() || Flags.hasNoSignedZeros() ||
18268 DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) {
18269 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
18270 // flushed to zero, unless all users treat denorms as zero (DAZ).
18271 // FIXME: This transform will change the sign of a NaN and the behavior
18272 // of a signaling NaN. It is only valid when a NoNaN flag is present.
18273 DenormalMode DenormMode = DAG.getDenormalMode(VT);
18274 if (DenormMode == DenormalMode::getIEEE()) {
18275 if (SDValue NegN1 =
18276 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
18277 return NegN1;
18278 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
18279 return DAG.getNode(ISD::FNEG, DL, VT, N1);
18280 }
18281 }
18282 }
18283
18284 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
18285 N1.getOpcode() == ISD::FADD) {
18286 // X - (X + Y) -> -Y
18287 if (N0 == N1->getOperand(0))
18288 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
18289 // X - (Y + X) -> -Y
18290 if (N0 == N1->getOperand(1))
18291 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
18292 }
18293
18294 // fold (fsub A, (fneg B)) -> (fadd A, B)
18295 if (SDValue NegN1 =
18296 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
18297 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
18298
18299 // FSUB -> FMA combines:
18300 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
18301 AddToWorklist(Fused.getNode());
18302 return Fused;
18303 }
18304
18305 return SDValue();
18306}
18307
18308// Transform IEEE Floats:
18309// (fmul C, (uitofp Pow2))
18310// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
18311// (fdiv C, (uitofp Pow2))
18312// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
18313//
18314// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
18315// there is no need for more than an add/sub.
18316//
18317// This is valid under the following circumstances:
18318// 1) We are dealing with IEEE floats
18319// 2) C is normal
18320// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
18321// TODO: Much of this could also be used for generating `ldexp` on targets the
18322// prefer it.
18323SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
18324 EVT VT = N->getValueType(0);
18326 return SDValue();
18327
18328 SDValue ConstOp, Pow2Op;
18329
18330 std::optional<int> Mantissa;
18331 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
18332 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
18333 return false;
18334
18335 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
18336 Pow2Op = N->getOperand(1 - ConstOpIdx);
18337 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
18338 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
18339 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
18340 return false;
18341
18342 Pow2Op = Pow2Op.getOperand(0);
18343
18344 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
18345 // TODO: We could use knownbits to make this bound more precise.
18346 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
18347
18348 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
18349 if (CFP == nullptr)
18350 return false;
18351
18352 const APFloat &APF = CFP->getValueAPF();
18353
18354 // Make sure we have normal constant.
18355 if (!APF.isNormal())
18356 return false;
18357
18358 // Make sure the floats exponent is within the bounds that this transform
18359 // produces bitwise equals value.
18360 int CurExp = ilogb(APF);
18361 // FMul by pow2 will only increase exponent.
18362 int MinExp =
18363 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
18364 // FDiv by pow2 will only decrease exponent.
18365 int MaxExp =
18366 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
18367 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
18369 return false;
18370
18371 // Finally make sure we actually know the mantissa for the float type.
18372 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
18373 if (!Mantissa)
18374 Mantissa = ThisMantissa;
18375
18376 return *Mantissa == ThisMantissa && ThisMantissa > 0;
18377 };
18378
18379 // TODO: We may be able to include undefs.
18380 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
18381 };
18382
18383 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
18384 return SDValue();
18385
18386 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
18387 return SDValue();
18388
18389 // Get log2 after all other checks have taken place. This is because
18390 // BuildLogBase2 may create a new node.
18391 SDLoc DL(N);
18392 // Get Log2 type with same bitwidth as the float type (VT).
18393 EVT NewIntVT = VT.changeElementType(
18394 *DAG.getContext(),
18396
18397 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
18398 /*InexpensiveOnly*/ true, NewIntVT);
18399 if (!Log2)
18400 return SDValue();
18401
18402 // Perform actual transform.
18403 SDValue MantissaShiftCnt =
18404 DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
18405 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
18406 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
18407 // cast. We could implement that by handle here to handle the casts.
18408 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
18409 SDValue ResAsInt =
18410 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
18411 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
18412 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
18413 return ResAsFP;
18414}
18415
18416SDValue DAGCombiner::visitFMUL(SDNode *N) {
18417 SDValue N0 = N->getOperand(0);
18418 SDValue N1 = N->getOperand(1);
18419 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
18420 EVT VT = N->getValueType(0);
18421 SDLoc DL(N);
18422 const SDNodeFlags Flags = N->getFlags();
18423 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18424
18425 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18426 return R;
18427
18428 // fold (fmul c1, c2) -> c1*c2
18429 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
18430 return C;
18431
18432 // canonicalize constant to RHS
18435 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
18436
18437 // fold vector ops
18438 if (VT.isVector())
18439 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18440 return FoldedVOp;
18441
18442 if (SDValue NewSel = foldBinOpIntoSelect(N))
18443 return NewSel;
18444
18445 if (Flags.hasAllowReassociation()) {
18446 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
18448 N0.getOpcode() == ISD::FMUL) {
18449 SDValue N00 = N0.getOperand(0);
18450 SDValue N01 = N0.getOperand(1);
18451 // Avoid an infinite loop by making sure that N00 is not a constant
18452 // (the inner multiply has not been constant folded yet).
18455 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
18456 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
18457 }
18458 }
18459
18460 // Match a special-case: we convert X * 2.0 into fadd.
18461 // fmul (fadd X, X), C -> fmul X, 2.0 * C
18462 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
18463 N0.getOperand(0) == N0.getOperand(1)) {
18464 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
18465 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
18466 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
18467 }
18468
18469 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
18470 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
18471 VT, N0, N1, Flags))
18472 return SD;
18473 }
18474
18475 // fold (fmul X, 2.0) -> (fadd X, X)
18476 if (N1CFP && N1CFP->isExactlyValue(+2.0))
18477 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
18478
18479 // fold (fmul X, -1.0) -> (fsub -0.0, X)
18480 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
18481 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
18482 return DAG.getNode(ISD::FSUB, DL, VT,
18483 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
18484 }
18485 }
18486
18487 // -N0 * -N1 --> N0 * N1
18492 SDValue NegN0 =
18493 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18494 if (NegN0) {
18495 HandleSDNode NegN0Handle(NegN0);
18496 SDValue NegN1 =
18497 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18498 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18500 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
18501 }
18502
18503 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
18504 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
18505 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
18506 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
18507 TLI.isOperationLegal(ISD::FABS, VT)) {
18508 SDValue Select = N0, X = N1;
18509 if (Select.getOpcode() != ISD::SELECT)
18510 std::swap(Select, X);
18511
18512 SDValue Cond = Select.getOperand(0);
18513 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
18514 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
18515
18516 if (TrueOpnd && FalseOpnd &&
18517 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
18518 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
18519 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
18520 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18521 switch (CC) {
18522 default: break;
18523 case ISD::SETOLT:
18524 case ISD::SETULT:
18525 case ISD::SETOLE:
18526 case ISD::SETULE:
18527 case ISD::SETLT:
18528 case ISD::SETLE:
18529 std::swap(TrueOpnd, FalseOpnd);
18530 [[fallthrough]];
18531 case ISD::SETOGT:
18532 case ISD::SETUGT:
18533 case ISD::SETOGE:
18534 case ISD::SETUGE:
18535 case ISD::SETGT:
18536 case ISD::SETGE:
18537 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
18538 TLI.isOperationLegal(ISD::FNEG, VT))
18539 return DAG.getNode(ISD::FNEG, DL, VT,
18540 DAG.getNode(ISD::FABS, DL, VT, X));
18541 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
18542 return DAG.getNode(ISD::FABS, DL, VT, X);
18543
18544 break;
18545 }
18546 }
18547 }
18548
18549 // FMUL -> FMA combines:
18550 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
18551 AddToWorklist(Fused.getNode());
18552 return Fused;
18553 }
18554
18555 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
18556 // able to run.
18557 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18558 return R;
18559
18560 return SDValue();
18561}
18562
18563template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
18564 SDValue N0 = N->getOperand(0);
18565 SDValue N1 = N->getOperand(1);
18566 SDValue N2 = N->getOperand(2);
18567 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
18568 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
18569 ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
18570 EVT VT = N->getValueType(0);
18571 SDLoc DL(N);
18572 // FMA nodes have flags that propagate to the created nodes.
18573 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18574 MatchContextClass matcher(DAG, TLI, N);
18575
18576 // Constant fold FMA.
18577 if (SDValue C =
18578 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
18579 return C;
18580
18581 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
18586 SDValue NegN0 =
18587 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18588 if (NegN0) {
18589 HandleSDNode NegN0Handle(NegN0);
18590 SDValue NegN1 =
18591 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18592 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18594 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
18595 }
18596
18597 if (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs()) {
18598 if (N->getFlags().hasNoSignedZeros() ||
18599 (N2CFP && !N2CFP->isExactlyValue(-0.0))) {
18600 if (N0CFP && N0CFP->isZero())
18601 return N2;
18602 if (N1CFP && N1CFP->isZero())
18603 return N2;
18604 }
18605 }
18606
18607 // FIXME: Support splat of constant.
18608 if (N0CFP && N0CFP->isExactlyValue(1.0))
18609 return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
18610 if (N1CFP && N1CFP->isExactlyValue(1.0))
18611 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18612
18613 // Canonicalize (fma c, x, y) -> (fma x, c, y)
18616 return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
18617
18618 bool CanReassociate = N->getFlags().hasAllowReassociation();
18619 if (CanReassociate) {
18620 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
18621 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
18624 return matcher.getNode(
18625 ISD::FMUL, DL, VT, N0,
18626 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
18627 }
18628
18629 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
18630 if (matcher.match(N0, ISD::FMUL) &&
18633 return matcher.getNode(
18634 ISD::FMA, DL, VT, N0.getOperand(0),
18635 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
18636 }
18637 }
18638
18639 // (fma x, -1, y) -> (fadd (fneg x), y)
18640 // FIXME: Support splat of constant.
18641 if (N1CFP) {
18642 if (N1CFP->isExactlyValue(1.0))
18643 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18644
18645 if (N1CFP->isExactlyValue(-1.0) &&
18646 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
18647 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
18648 AddToWorklist(RHSNeg.getNode());
18649 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
18650 }
18651
18652 // fma (fneg x), K, y -> fma x -K, y
18653 if (matcher.match(N0, ISD::FNEG) &&
18655 (N1.hasOneUse() &&
18656 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
18657 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
18658 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
18659 }
18660 }
18661
18662 // FIXME: Support splat of constant.
18663 if (CanReassociate) {
18664 // (fma x, c, x) -> (fmul x, (c+1))
18665 if (N1CFP && N0 == N2) {
18666 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18667 matcher.getNode(ISD::FADD, DL, VT, N1,
18668 DAG.getConstantFP(1.0, DL, VT)));
18669 }
18670
18671 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
18672 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
18673 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18674 matcher.getNode(ISD::FADD, DL, VT, N1,
18675 DAG.getConstantFP(-1.0, DL, VT)));
18676 }
18677 }
18678
18679 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
18680 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
18681 if (!TLI.isFNegFree(VT))
18683 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
18684 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
18685 return SDValue();
18686}
18687
18688SDValue DAGCombiner::visitFMAD(SDNode *N) {
18689 SDValue N0 = N->getOperand(0);
18690 SDValue N1 = N->getOperand(1);
18691 SDValue N2 = N->getOperand(2);
18692 EVT VT = N->getValueType(0);
18693 SDLoc DL(N);
18694
18695 // Constant fold FMAD.
18696 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMAD, DL, VT, {N0, N1, N2}))
18697 return C;
18698
18699 return SDValue();
18700}
18701
18702SDValue DAGCombiner::visitFMULADD(SDNode *N) {
18703 SDValue N0 = N->getOperand(0);
18704 SDValue N1 = N->getOperand(1);
18705 SDValue N2 = N->getOperand(2);
18706 EVT VT = N->getValueType(0);
18707 SDLoc DL(N);
18708
18709 // Constant fold FMULADD.
18710 if (SDValue C =
18711 DAG.FoldConstantArithmetic(ISD::FMULADD, DL, VT, {N0, N1, N2}))
18712 return C;
18713
18714 return SDValue();
18715}
18716
18717// Combine multiple FDIVs with the same divisor into multiple FMULs by the
18718// reciprocal.
18719// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
18720// Notice that this is not always beneficial. One reason is different targets
18721// may have different costs for FDIV and FMUL, so sometimes the cost of two
18722// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
18723// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
18724SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
18725 // TODO: Limit this transform based on optsize/minsize - it always creates at
18726 // least 1 extra instruction. But the perf win may be substantial enough
18727 // that only minsize should restrict this.
18728 const SDNodeFlags Flags = N->getFlags();
18729 if (LegalDAG || !Flags.hasAllowReciprocal())
18730 return SDValue();
18731
18732 // Skip if current node is a reciprocal/fneg-reciprocal.
18733 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
18734 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
18735 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
18736 return SDValue();
18737
18738 // Exit early if the target does not want this transform or if there can't
18739 // possibly be enough uses of the divisor to make the transform worthwhile.
18740 unsigned MinUses = TLI.combineRepeatedFPDivisors();
18741
18742 // For splat vectors, scale the number of uses by the splat factor. If we can
18743 // convert the division into a scalar op, that will likely be much faster.
18744 unsigned NumElts = 1;
18745 EVT VT = N->getValueType(0);
18746 if (VT.isVector() && DAG.isSplatValue(N1))
18747 NumElts = VT.getVectorMinNumElements();
18748
18749 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
18750 return SDValue();
18751
18752 // Find all FDIV users of the same divisor.
18753 // Use a set because duplicates may be present in the user list.
18754 SetVector<SDNode *> Users;
18755 for (auto *U : N1->users()) {
18756 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
18757 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
18758 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
18759 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
18760 U->getFlags().hasAllowReassociation() &&
18761 U->getFlags().hasNoSignedZeros())
18762 continue;
18763
18764 // This division is eligible for optimization only if global unsafe math
18765 // is enabled or if this division allows reciprocal formation.
18766 if (U->getFlags().hasAllowReciprocal())
18767 Users.insert(U);
18768 }
18769 }
18770
18771 // Now that we have the actual number of divisor uses, make sure it meets
18772 // the minimum threshold specified by the target.
18773 if ((Users.size() * NumElts) < MinUses)
18774 return SDValue();
18775
18776 SDLoc DL(N);
18777 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18778 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
18779
18780 // Dividend / Divisor -> Dividend * Reciprocal
18781 for (auto *U : Users) {
18782 SDValue Dividend = U->getOperand(0);
18783 if (Dividend != FPOne) {
18784 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
18785 Reciprocal, Flags);
18786 CombineTo(U, NewNode);
18787 } else if (U != Reciprocal.getNode()) {
18788 // In the absence of fast-math-flags, this user node is always the
18789 // same node as Reciprocal, but with FMF they may be different nodes.
18790 CombineTo(U, Reciprocal);
18791 }
18792 }
18793 return SDValue(N, 0); // N was replaced.
18794}
18795
18796SDValue DAGCombiner::visitFDIV(SDNode *N) {
18797 SDValue N0 = N->getOperand(0);
18798 SDValue N1 = N->getOperand(1);
18799 EVT VT = N->getValueType(0);
18800 SDLoc DL(N);
18801 SDNodeFlags Flags = N->getFlags();
18802 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18803
18804 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18805 return R;
18806
18807 // fold (fdiv c1, c2) -> c1/c2
18808 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
18809 return C;
18810
18811 // fold vector ops
18812 if (VT.isVector())
18813 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18814 return FoldedVOp;
18815
18816 if (SDValue NewSel = foldBinOpIntoSelect(N))
18817 return NewSel;
18818
18820 return V;
18821
18822 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
18823 // the loss is acceptable with AllowReciprocal.
18824 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
18825 // Compute the reciprocal 1.0 / c2.
18826 const APFloat &N1APF = N1CFP->getValueAPF();
18827 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
18829 // Only do the transform if the reciprocal is a legal fp immediate that
18830 // isn't too nasty (eg NaN, denormal, ...).
18831 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
18832 (st == APFloat::opInexact && Flags.hasAllowReciprocal())) &&
18833 (!LegalOperations ||
18834 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
18835 // backend)... we should handle this gracefully after Legalize.
18836 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
18838 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
18839 return DAG.getNode(ISD::FMUL, DL, VT, N0,
18840 DAG.getConstantFP(Recip, DL, VT));
18841 }
18842
18843 if (Flags.hasAllowReciprocal()) {
18844 // If this FDIV is part of a reciprocal square root, it may be folded
18845 // into a target-specific square root estimate instruction.
18846 bool N1AllowReciprocal = N1->getFlags().hasAllowReciprocal();
18847 if (N1.getOpcode() == ISD::FSQRT) {
18848 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0)))
18849 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18850 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
18851 N1.getOperand(0).getOpcode() == ISD::FSQRT &&
18852 N1AllowReciprocal) {
18853 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
18854 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
18855 AddToWorklist(RV.getNode());
18856 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18857 }
18858 } else if (N1.getOpcode() == ISD::FP_ROUND &&
18859 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18860 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
18861 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
18862 AddToWorklist(RV.getNode());
18863 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18864 }
18865 } else if (N1.getOpcode() == ISD::FMUL) {
18866 // Look through an FMUL. Even though this won't remove the FDIV directly,
18867 // it's still worthwhile to get rid of the FSQRT if possible.
18868 SDValue Sqrt, Y;
18869 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18870 Sqrt = N1.getOperand(0);
18871 Y = N1.getOperand(1);
18872 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
18873 Sqrt = N1.getOperand(1);
18874 Y = N1.getOperand(0);
18875 }
18876 if (Sqrt.getNode()) {
18877 // If the other multiply operand is known positive, pull it into the
18878 // sqrt. That will eliminate the division if we convert to an estimate.
18879 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
18880 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
18881 SDValue A;
18882 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
18883 A = Y.getOperand(0);
18884 else if (Y == Sqrt.getOperand(0))
18885 A = Y;
18886 if (A) {
18887 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
18888 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
18889 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
18890 SDValue AAZ =
18891 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
18892 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ))
18893 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
18894
18895 // Estimate creation failed. Clean up speculatively created nodes.
18896 recursivelyDeleteUnusedNodes(AAZ.getNode());
18897 }
18898 }
18899
18900 // We found a FSQRT, so try to make this fold:
18901 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
18902 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) {
18903 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
18904 AddToWorklist(Div.getNode());
18905 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
18906 }
18907 }
18908 }
18909
18910 // Fold into a reciprocal estimate and multiply instead of a real divide.
18911 if (Flags.hasNoInfs())
18912 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
18913 return RV;
18914 }
18915
18916 // Fold X/Sqrt(X) -> Sqrt(X)
18917 if ((Flags.hasNoSignedZeros() || DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) &&
18918 Flags.hasAllowReassociation())
18919 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
18920 return N1;
18921
18922 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
18927 SDValue NegN0 =
18928 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18929 if (NegN0) {
18930 HandleSDNode NegN0Handle(NegN0);
18931 SDValue NegN1 =
18932 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18933 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18935 return DAG.getNode(ISD::FDIV, DL, VT, NegN0, NegN1);
18936 }
18937
18938 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18939 return R;
18940
18941 return SDValue();
18942}
18943
18944SDValue DAGCombiner::visitFREM(SDNode *N) {
18945 SDValue N0 = N->getOperand(0);
18946 SDValue N1 = N->getOperand(1);
18947 EVT VT = N->getValueType(0);
18948 SDNodeFlags Flags = N->getFlags();
18949 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18950 SDLoc DL(N);
18951
18952 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18953 return R;
18954
18955 // fold (frem c1, c2) -> fmod(c1,c2)
18956 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
18957 return C;
18958
18959 if (SDValue NewSel = foldBinOpIntoSelect(N))
18960 return NewSel;
18961
18962 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
18963 // power of 2.
18964 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
18968 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
18969 bool NeedsCopySign = !Flags.hasNoSignedZeros() &&
18970 !DAG.canIgnoreSignBitOfZero(SDValue(N, 0)) &&
18972 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
18973 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
18974 SDValue MLA;
18976 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
18977 N1, N0);
18978 } else {
18979 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
18980 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
18981 }
18982 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
18983 }
18984
18985 return SDValue();
18986}
18987
18988SDValue DAGCombiner::visitFSQRT(SDNode *N) {
18989 SDNodeFlags Flags = N->getFlags();
18990
18991 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
18992 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
18993 if (!Flags.hasApproximateFuncs() || !Flags.hasNoInfs())
18994 return SDValue();
18995
18996 SDValue N0 = N->getOperand(0);
18997 if (TLI.isFsqrtCheap(N0, DAG))
18998 return SDValue();
18999
19000 // FSQRT nodes have flags that propagate to the created nodes.
19001 SelectionDAG::FlagInserter FlagInserter(DAG, Flags);
19002 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
19003 // transform the fdiv, we may produce a sub-optimal estimate sequence
19004 // because the reciprocal calculation may not have to filter out a
19005 // 0.0 input.
19006 return buildSqrtEstimate(N0);
19007}
19008
19009/// copysign(x, fp_extend(y)) -> copysign(x, y)
19010/// copysign(x, fp_round(y)) -> copysign(x, y)
19011/// Operands to the functions are the type of X and Y respectively.
19012static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
19013 // Always fold no-op FP casts.
19014 if (XTy == YTy)
19015 return true;
19016
19017 // Do not optimize out type conversion of f128 type yet.
19018 // For some targets like x86_64, configuration is changed to keep one f128
19019 // value in one SSE register, but instruction selection cannot handle
19020 // FCOPYSIGN on SSE registers yet.
19021 if (YTy == MVT::f128)
19022 return false;
19023
19024 // Avoid mismatched vector operand types, for better instruction selection.
19025 return !YTy.isVector();
19026}
19027
19029 SDValue N1 = N->getOperand(1);
19030 if (N1.getOpcode() != ISD::FP_EXTEND &&
19031 N1.getOpcode() != ISD::FP_ROUND)
19032 return false;
19033 EVT N1VT = N1->getValueType(0);
19034 EVT N1Op0VT = N1->getOperand(0).getValueType();
19035 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
19036}
19037
19038SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
19039 SDValue N0 = N->getOperand(0);
19040 SDValue N1 = N->getOperand(1);
19041 EVT VT = N->getValueType(0);
19042 SDLoc DL(N);
19043
19044 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
19045 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
19046 return C;
19047
19048 // copysign(x, fp_extend(y)) -> copysign(x, y)
19049 // copysign(x, fp_round(y)) -> copysign(x, y)
19051 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
19052
19054 return SDValue(N, 0);
19055
19056 if (VT != N1.getValueType())
19057 return SDValue();
19058
19059 // If this is equivalent to a disjoint or, replace it with one. This can
19060 // happen if the sign operand is a sign mask (i.e., x << sign_bit_position).
19061 if (DAG.SignBitIsZeroFP(N0) &&
19063 // TODO: Just directly match the shift pattern. computeKnownBits is heavy
19064 // for a such a narrowly targeted case.
19065 EVT IntVT = VT.changeTypeToInteger();
19066 // TODO: It appears to be profitable in some situations to unconditionally
19067 // emit a fabs(n0) to perform this combine.
19068 SDValue CastSrc0 = DAG.getNode(ISD::BITCAST, DL, IntVT, N0);
19069 SDValue CastSrc1 = DAG.getNode(ISD::BITCAST, DL, IntVT, N1);
19070
19071 SDValue SignOr = DAG.getNode(ISD::OR, DL, IntVT, CastSrc0, CastSrc1,
19073 return DAG.getNode(ISD::BITCAST, DL, VT, SignOr);
19074 }
19075
19076 return SDValue();
19077}
19078
19079SDValue DAGCombiner::visitFPOW(SDNode *N) {
19080 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
19081 if (!ExponentC)
19082 return SDValue();
19083 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19084
19085 // Try to convert x ** (1/3) into cube root.
19086 // TODO: Handle the various flavors of long double.
19087 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
19088 // Some range near 1/3 should be fine.
19089 EVT VT = N->getValueType(0);
19090 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
19091 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
19092 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
19093 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
19094 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
19095 // For regular numbers, rounding may cause the results to differ.
19096 // Therefore, we require { nsz ninf nnan afn } for this transform.
19097 // TODO: We could select out the special cases if we don't have nsz/ninf.
19098 SDNodeFlags Flags = N->getFlags();
19099 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
19100 !Flags.hasApproximateFuncs())
19101 return SDValue();
19102
19103 // Do not create a cbrt() libcall if the target does not have it, and do not
19104 // turn a pow that has lowering support into a cbrt() libcall.
19105 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
19108 return SDValue();
19109
19110 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
19111 }
19112
19113 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
19114 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
19115 // TODO: This could be extended (using a target hook) to handle smaller
19116 // power-of-2 fractional exponents.
19117 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
19118 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
19119 if (ExponentIs025 || ExponentIs075) {
19120 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
19121 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
19122 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
19123 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
19124 // For regular numbers, rounding may cause the results to differ.
19125 // Therefore, we require { nsz ninf afn } for this transform.
19126 // TODO: We could select out the special cases if we don't have nsz/ninf.
19127 SDNodeFlags Flags = N->getFlags();
19128
19129 // We only need no signed zeros for the 0.25 case.
19130 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
19131 !Flags.hasApproximateFuncs())
19132 return SDValue();
19133
19134 // Don't double the number of libcalls. We are trying to inline fast code.
19136 return SDValue();
19137
19138 // Assume that libcalls are the smallest code.
19139 // TODO: This restriction should probably be lifted for vectors.
19140 if (ForCodeSize)
19141 return SDValue();
19142
19143 // pow(X, 0.25) --> sqrt(sqrt(X))
19144 SDLoc DL(N);
19145 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
19146 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
19147 if (ExponentIs025)
19148 return SqrtSqrt;
19149 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
19150 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
19151 }
19152
19153 return SDValue();
19154}
19155
19157 const TargetLowering &TLI) {
19158 // We can fold the fpto[us]i -> [us]itofp pattern into a single ftrunc.
19159 // Additionally, if there are clamps ([us]min or [us]max) around
19160 // the fpto[us]i, we can fold those into fminnum/fmaxnum around the ftrunc.
19161 // If NoSignedZerosFPMath is enabled, this is a direct replacement.
19162 // Otherwise, for strict math, we must handle edge cases:
19163 // 1. For unsigned conversions, use FABS to handle negative cases. Take -0.0
19164 // as example, it first becomes integer 0, and is converted back to +0.0.
19165 // FTRUNC on its own could produce -0.0.
19166
19167 // FIXME: We should be able to use node-level FMF here.
19168 EVT VT = N->getValueType(0);
19169 if (!TLI.isOperationLegal(ISD::FTRUNC, VT))
19170 return SDValue();
19171
19172 bool IsUnsigned = N->getOpcode() == ISD::UINT_TO_FP;
19173 bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP;
19174 assert(IsSigned || IsUnsigned);
19175
19176 bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath ||
19178 // For signed conversions: The optimization changes signed zero behavior.
19179 if (IsSigned && !IsSignedZeroSafe)
19180 return SDValue();
19181 // For unsigned conversions, we need FABS to canonicalize -0.0 to +0.0
19182 // (unless outputting a signed zero is OK).
19183 if (IsUnsigned && !IsSignedZeroSafe && !TLI.isFAbsFree(VT))
19184 return SDValue();
19185
19186 // Collect potential clamp operations (outermost to innermost) and peel.
19187 struct ClampInfo {
19188 bool IsMin;
19190 };
19191 constexpr unsigned MaxClamps = 2;
19193 unsigned MinOp = IsUnsigned ? ISD::UMIN : ISD::SMIN;
19194 unsigned MaxOp = IsUnsigned ? ISD::UMAX : ISD::SMAX;
19195 SDValue IntVal = N->getOperand(0);
19196 for (unsigned Level = 0; Level < MaxClamps; ++Level) {
19197 if (!IntVal.hasOneUse() ||
19198 (IntVal.getOpcode() != MinOp && IntVal.getOpcode() != MaxOp))
19199 break;
19200 SDValue RHS = IntVal.getOperand(1);
19201 APInt IntConst;
19202 if (auto *IntConstNode = dyn_cast<ConstantSDNode>(RHS))
19203 IntConst = IntConstNode->getAPIntValue();
19204 else if (!ISD::isConstantSplatVector(RHS.getNode(), IntConst))
19205 return SDValue();
19206 APFloat FPConst(VT.getFltSemantics());
19207 FPConst.convertFromAPInt(IntConst, IsSigned, APFloat::rmNearestTiesToEven);
19208 // Verify roundtrip exactness.
19209 APSInt RoundTrip(IntConst.getBitWidth(), IsUnsigned);
19210 bool IsExact;
19211 if (FPConst.convertToInteger(RoundTrip, APFloat::rmTowardZero, &IsExact) !=
19212 APFloat::opOK ||
19213 !IsExact || static_cast<const APInt &>(RoundTrip) != IntConst)
19214 return SDValue();
19215 bool IsMin = IntVal.getOpcode() == MinOp;
19216 Clamps.push_back({IsMin, DAG.getConstantFP(FPConst, DL, VT)});
19217 IntVal = IntVal.getOperand(0);
19218 }
19219
19220 // Check that the sequence ends with the correct kind of fpto[us]i.
19221 unsigned FPToIntOp = IsUnsigned ? ISD::FP_TO_UINT : ISD::FP_TO_SINT;
19222 if (IntVal.getOpcode() != FPToIntOp ||
19223 IntVal.getOperand(0).getValueType() != VT)
19224 return SDValue();
19225
19226 SDValue Result = IntVal.getOperand(0);
19227 if (IsUnsigned && !IsSignedZeroSafe && TLI.isFAbsFree(VT))
19228 Result = DAG.getNode(ISD::FABS, DL, VT, Result);
19229 Result = DAG.getNode(ISD::FTRUNC, DL, VT, Result);
19230 // Apply clamps, if any, in reverse order (innermost first).
19231 for (const ClampInfo &Clamp : reverse(Clamps)) {
19232 unsigned FPClampOp =
19233 getMinMaxOpcodeForClamp(Clamp.IsMin, Result, Clamp.Constant, DAG, TLI);
19234 if (FPClampOp == ISD::DELETED_NODE)
19235 return SDValue();
19236 Result = DAG.getNode(FPClampOp, DL, VT, Result, Clamp.Constant);
19237 }
19238 return Result;
19239}
19240
19241SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
19242 SDValue N0 = N->getOperand(0);
19243 EVT VT = N->getValueType(0);
19244 EVT OpVT = N0.getValueType();
19245 SDLoc DL(N);
19246
19247 // [us]itofp(undef) = 0, because the result value is bounded.
19248 if (N0.isUndef())
19249 return DAG.getConstantFP(0.0, DL, VT);
19250
19251 // fold (sint_to_fp c1) -> c1fp
19252 // ...but only if the target supports immediate floating-point values
19253 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19254 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SINT_TO_FP, DL, VT, {N0}))
19255 return C;
19256
19257 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
19258 // but UINT_TO_FP is legal on this target, try to convert.
19259 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
19260 hasOperation(ISD::UINT_TO_FP, OpVT)) {
19261 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
19262 if (DAG.SignBitIsZero(N0))
19263 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0);
19264 }
19265
19266 // The next optimizations are desirable only if SELECT_CC can be lowered.
19267 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
19268 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
19269 !VT.isVector() &&
19270 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19271 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
19272 DAG.getConstantFP(0.0, DL, VT));
19273
19274 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
19275 // (select (setcc x, y, cc), 1.0, 0.0)
19276 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
19277 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
19278 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19279 return DAG.getSelect(DL, VT, N0.getOperand(0),
19280 DAG.getConstantFP(1.0, DL, VT),
19281 DAG.getConstantFP(0.0, DL, VT));
19282
19283 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
19284 return FTrunc;
19285
19286 // fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
19287 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() &&
19289 N0.getOperand(0).getValueType()))
19290 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0));
19291
19292 return SDValue();
19293}
19294
19295SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
19296 SDValue N0 = N->getOperand(0);
19297 EVT VT = N->getValueType(0);
19298 EVT OpVT = N0.getValueType();
19299 SDLoc DL(N);
19300
19301 // [us]itofp(undef) = 0, because the result value is bounded.
19302 if (N0.isUndef())
19303 return DAG.getConstantFP(0.0, DL, VT);
19304
19305 // fold (uint_to_fp c1) -> c1fp
19306 // ...but only if the target supports immediate floating-point values
19307 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19308 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UINT_TO_FP, DL, VT, {N0}))
19309 return C;
19310
19311 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
19312 // but SINT_TO_FP is legal on this target, try to convert.
19313 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
19314 hasOperation(ISD::SINT_TO_FP, OpVT)) {
19315 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
19316 if (DAG.SignBitIsZero(N0))
19317 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0);
19318 }
19319
19320 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
19321 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
19322 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19323 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
19324 DAG.getConstantFP(0.0, DL, VT));
19325
19326 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
19327 return FTrunc;
19328
19329 // fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
19330 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() &&
19332 N0.getOperand(0).getValueType()))
19333 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0));
19334
19335 return SDValue();
19336}
19337
19338// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
19340 SDValue N0 = N->getOperand(0);
19341 EVT VT = N->getValueType(0);
19342
19343 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
19344 return SDValue();
19345
19346 SDValue Src = N0.getOperand(0);
19347 EVT SrcVT = Src.getValueType();
19348 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
19349 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
19350
19351 // We can safely assume the conversion won't overflow the output range,
19352 // because (for example) (uint8_t)18293.f is undefined behavior.
19353
19354 // Since we can assume the conversion won't overflow, our decision as to
19355 // whether the input will fit in the float should depend on the minimum
19356 // of the input range and output range.
19357
19358 // This means this is also safe for a signed input and unsigned output, since
19359 // a negative input would lead to undefined behavior.
19360 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
19361 unsigned OutputSize = (int)VT.getScalarSizeInBits();
19362 unsigned ActualSize = std::min(InputSize, OutputSize);
19363 const fltSemantics &Sem = N0.getValueType().getFltSemantics();
19364
19365 // We can only fold away the float conversion if the input range can be
19366 // represented exactly in the float range.
19367 if (APFloat::semanticsPrecision(Sem) >= ActualSize) {
19368 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
19369 unsigned ExtOp =
19370 IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
19371 return DAG.getNode(ExtOp, DL, VT, Src);
19372 }
19373 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
19374 return DAG.getNode(ISD::TRUNCATE, DL, VT, Src);
19375 return DAG.getBitcast(VT, Src);
19376 }
19377 return SDValue();
19378}
19379
19380SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
19381 SDValue N0 = N->getOperand(0);
19382 EVT VT = N->getValueType(0);
19383 SDLoc DL(N);
19384
19385 // fold (fp_to_sint undef) -> undef
19386 if (N0.isUndef())
19387 return DAG.getUNDEF(VT);
19388
19389 // fold (fp_to_sint c1fp) -> c1
19390 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_SINT, DL, VT, {N0}))
19391 return C;
19392
19393 return FoldIntToFPToInt(N, DL, DAG);
19394}
19395
19396SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
19397 SDValue N0 = N->getOperand(0);
19398 EVT VT = N->getValueType(0);
19399 SDLoc DL(N);
19400
19401 // fold (fp_to_uint undef) -> undef
19402 if (N0.isUndef())
19403 return DAG.getUNDEF(VT);
19404
19405 // fold (fp_to_uint c1fp) -> c1
19406 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_UINT, DL, VT, {N0}))
19407 return C;
19408
19409 return FoldIntToFPToInt(N, DL, DAG);
19410}
19411
19412SDValue DAGCombiner::visitXROUND(SDNode *N) {
19413 SDValue N0 = N->getOperand(0);
19414 EVT VT = N->getValueType(0);
19415
19416 // fold (lrint|llrint undef) -> undef
19417 // fold (lround|llround undef) -> undef
19418 if (N0.isUndef())
19419 return DAG.getUNDEF(VT);
19420
19421 // fold (lrint|llrint c1fp) -> c1
19422 // fold (lround|llround c1fp) -> c1
19423 if (SDValue C =
19424 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
19425 return C;
19426
19427 return SDValue();
19428}
19429
19430SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
19431 SDValue N0 = N->getOperand(0);
19432 SDValue N1 = N->getOperand(1);
19433 EVT VT = N->getValueType(0);
19434 SDLoc DL(N);
19435
19436 // fold (fp_round c1fp) -> c1fp
19437 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_ROUND, DL, VT, {N0, N1}))
19438 return C;
19439
19440 // fold (fp_round (fp_extend x)) -> x
19441 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
19442 return N0.getOperand(0);
19443
19444 // fold (fp_round (fp_round x)) -> (fp_round x)
19445 if (N0.getOpcode() == ISD::FP_ROUND) {
19446 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
19447 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
19448
19449 // Avoid folding legal fp_rounds into non-legal ones.
19450 if (!hasOperation(ISD::FP_ROUND, VT))
19451 return SDValue();
19452
19453 // Skip this folding if it results in an fp_round from f80 to f16.
19454 //
19455 // f80 to f16 always generates an expensive (and as yet, unimplemented)
19456 // libcall to __truncxfhf2 instead of selecting native f16 conversion
19457 // instructions from f32 or f64. Moreover, the first (value-preserving)
19458 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
19459 // x86.
19460 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
19461 return SDValue();
19462
19463 // If the first fp_round isn't a value preserving truncation, it might
19464 // introduce a tie in the second fp_round, that wouldn't occur in the
19465 // single-step fp_round we want to fold to.
19466 // In other words, double rounding isn't the same as rounding.
19467 // Also, this is a value preserving truncation iff both fp_round's are.
19468 if ((N->getFlags().hasAllowContract() &&
19469 N0->getFlags().hasAllowContract()) ||
19470 N0IsTrunc)
19471 return DAG.getNode(
19472 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
19473 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
19474 }
19475
19476 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
19477 // Note: From a legality perspective, this is a two step transform. First,
19478 // we duplicate the fp_round to the arguments of the copysign, then we
19479 // eliminate the fp_round on Y. The second step requires an additional
19480 // predicate to match the implementation above.
19481 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
19483 N0.getValueType())) {
19484 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
19485 N0.getOperand(0), N1);
19486 AddToWorklist(Tmp.getNode());
19487 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, Tmp, N0.getOperand(1));
19488 }
19489
19490 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19491 return NewVSel;
19492
19493 return SDValue();
19494}
19495
19496// Eliminate a floating-point widening of a narrowed value if the fast math
19497// flags allow it.
19499 SDValue N0 = N->getOperand(0);
19500 EVT VT = N->getValueType(0);
19501
19502 unsigned NarrowingOp;
19503 switch (N->getOpcode()) {
19504 case ISD::FP16_TO_FP:
19505 NarrowingOp = ISD::FP_TO_FP16;
19506 break;
19507 case ISD::BF16_TO_FP:
19508 NarrowingOp = ISD::FP_TO_BF16;
19509 break;
19510 case ISD::FP_EXTEND:
19511 NarrowingOp = ISD::FP_ROUND;
19512 break;
19513 default:
19514 llvm_unreachable("Expected widening FP cast");
19515 }
19516
19517 if (N0.getOpcode() == NarrowingOp && N0.getOperand(0).getValueType() == VT) {
19518 const SDNodeFlags NarrowFlags = N0->getFlags();
19519 const SDNodeFlags WidenFlags = N->getFlags();
19520 // Narrowing can introduce inf and change the encoding of a nan, so the
19521 // widen must have the nnan and ninf flags to indicate that we don't need to
19522 // care about that. We are also removing a rounding step, and that requires
19523 // both the narrow and widen to allow contraction.
19524 if (WidenFlags.hasNoNaNs() && WidenFlags.hasNoInfs() &&
19525 NarrowFlags.hasAllowContract() && WidenFlags.hasAllowContract()) {
19526 return N0.getOperand(0);
19527 }
19528 }
19529
19530 return SDValue();
19531}
19532
19533SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
19534 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19535 SDValue N0 = N->getOperand(0);
19536 EVT VT = N->getValueType(0);
19537 SDLoc DL(N);
19538
19539 if (VT.isVector())
19540 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
19541 return FoldedVOp;
19542
19543 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
19544 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
19545 return SDValue();
19546
19547 // fold (fp_extend c1fp) -> c1fp
19548 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_EXTEND, DL, VT, {N0}))
19549 return C;
19550
19551 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
19552 if (N0.getOpcode() == ISD::FP16_TO_FP &&
19554 return DAG.getNode(ISD::FP16_TO_FP, DL, VT, N0.getOperand(0));
19555
19556 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
19557 // value of X.
19558 if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) {
19559 SDValue In = N0.getOperand(0);
19560 if (In.getValueType() == VT) return In;
19561 if (VT.bitsLT(In.getValueType()))
19562 return DAG.getNode(ISD::FP_ROUND, DL, VT, In, N0.getOperand(1));
19563 return DAG.getNode(ISD::FP_EXTEND, DL, VT, In);
19564 }
19565
19566 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
19567 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19569 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
19570 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT,
19571 LN0->getChain(),
19572 LN0->getBasePtr(), N0.getValueType(),
19573 LN0->getMemOperand());
19574 CombineTo(N, ExtLoad);
19575 CombineTo(
19576 N0.getNode(),
19577 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
19578 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
19579 ExtLoad.getValue(1));
19580 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19581 }
19582
19583 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19584 return NewVSel;
19585
19586 if (SDValue CastEliminated = eliminateFPCastPair(N))
19587 return CastEliminated;
19588
19589 return SDValue();
19590}
19591
19592SDValue DAGCombiner::visitFCEIL(SDNode *N) {
19593 SDValue N0 = N->getOperand(0);
19594 EVT VT = N->getValueType(0);
19595
19596 // fold (fceil c1) -> fceil(c1)
19597 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCEIL, SDLoc(N), VT, {N0}))
19598 return C;
19599
19600 return SDValue();
19601}
19602
19603SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
19604 SDValue N0 = N->getOperand(0);
19605 EVT VT = N->getValueType(0);
19606
19607 // fold (ftrunc c1) -> ftrunc(c1)
19608 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FTRUNC, SDLoc(N), VT, {N0}))
19609 return C;
19610
19611 // fold ftrunc (known rounded int x) -> x
19612 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
19613 // likely to be generated to extract integer from a rounded floating value.
19614 switch (N0.getOpcode()) {
19615 default: break;
19616 case ISD::FRINT:
19617 case ISD::FTRUNC:
19618 case ISD::FNEARBYINT:
19619 case ISD::FROUNDEVEN:
19620 case ISD::FFLOOR:
19621 case ISD::FCEIL:
19622 return N0;
19623 }
19624
19625 return SDValue();
19626}
19627
19628SDValue DAGCombiner::visitFFREXP(SDNode *N) {
19629 SDValue N0 = N->getOperand(0);
19630
19631 // fold (ffrexp c1) -> ffrexp(c1)
19633 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
19634 return SDValue();
19635}
19636
19637SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
19638 SDValue N0 = N->getOperand(0);
19639 EVT VT = N->getValueType(0);
19640
19641 // fold (ffloor c1) -> ffloor(c1)
19642 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FFLOOR, SDLoc(N), VT, {N0}))
19643 return C;
19644
19645 return SDValue();
19646}
19647
19648SDValue DAGCombiner::visitFNEG(SDNode *N) {
19649 SDValue N0 = N->getOperand(0);
19650 EVT VT = N->getValueType(0);
19651 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19652
19653 // Constant fold FNEG.
19654 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FNEG, SDLoc(N), VT, {N0}))
19655 return C;
19656
19657 if (SDValue NegN0 =
19658 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
19659 return NegN0;
19660
19661 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
19662 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
19663 // know it was called from a context with a nsz flag if the input fsub does
19664 // not.
19665 if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() &&
19666 N0.hasOneUse()) {
19667 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
19668 N0.getOperand(0));
19669 }
19670
19672 return SDValue(N, 0);
19673
19674 if (SDValue Cast = foldSignChangeInBitcast(N))
19675 return Cast;
19676
19677 return SDValue();
19678}
19679
19680SDValue DAGCombiner::visitFMinMax(SDNode *N) {
19681 SDValue N0 = N->getOperand(0);
19682 SDValue N1 = N->getOperand(1);
19683 EVT VT = N->getValueType(0);
19684 const SDNodeFlags Flags = N->getFlags();
19685 unsigned Opc = N->getOpcode();
19686 bool PropAllNaNsToQNaNs = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
19687 bool ReturnsOtherForAllNaNs =
19689 bool IsMin =
19691 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19692
19693 // Constant fold.
19694 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
19695 return C;
19696
19697 // Canonicalize to constant on RHS.
19700 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
19701
19702 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
19703 const APFloat &AF = N1CFP->getValueAPF();
19704
19705 // minnum(X, qnan) -> X
19706 // maxnum(X, qnan) -> X
19707 // minimum(X, nan) -> qnan
19708 // maximum(X, nan) -> qnan
19709 // minimumnum(X, nan) -> X
19710 // maximumnum(X, nan) -> X
19711 if (AF.isNaN()) {
19712 if (PropAllNaNsToQNaNs) {
19713 if (AF.isSignaling())
19714 return DAG.getConstantFP(AF.makeQuiet(), SDLoc(N), VT);
19715 return N->getOperand(1);
19716 } else if (ReturnsOtherForAllNaNs || !AF.isSignaling()) {
19717 return N->getOperand(0);
19718 }
19719 return SDValue();
19720 }
19721
19722 // In the following folds, inf can be replaced with the largest finite
19723 // float, if the ninf flag is set.
19724 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
19725 // minimum(X, -inf) -> -inf if nnan
19726 // maximum(X, +inf) -> +inf if nnan
19727 // minimumnum(X, -inf) -> -inf
19728 // maximumnum(X, +inf) -> +inf
19729 if (IsMin == AF.isNegative() &&
19730 (ReturnsOtherForAllNaNs || Flags.hasNoNaNs()))
19731 return N->getOperand(1);
19732
19733 // minnum(X, +inf) -> X if nnan
19734 // maxnum(X, -inf) -> X if nnan
19735 // minimum(X, +inf) -> X (ignoring quieting of sNaNs)
19736 // maximum(X, -inf) -> X (ignoring quieting of sNaNs)
19737 // minimumnum(X, +inf) -> X if nnan
19738 // maximumnum(X, -inf) -> X if nnan
19739 if (IsMin != AF.isNegative() && (PropAllNaNsToQNaNs || Flags.hasNoNaNs()))
19740 return N->getOperand(0);
19741 }
19742 }
19743
19744 // There are no VECREDUCE variants of FMINIMUMNUM or FMAXIMUMNUM
19746 return SDValue();
19747
19748 if (SDValue SD = reassociateReduction(
19749 PropAllNaNsToQNaNs
19752 Opc, SDLoc(N), VT, N0, N1, Flags))
19753 return SD;
19754
19755 return SDValue();
19756}
19757
19758SDValue DAGCombiner::visitFABS(SDNode *N) {
19759 SDValue N0 = N->getOperand(0);
19760 EVT VT = N->getValueType(0);
19761 SDLoc DL(N);
19762
19763 // fold (fabs c1) -> fabs(c1)
19764 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
19765 return C;
19766
19768 return SDValue(N, 0);
19769
19770 if (SDValue Cast = foldSignChangeInBitcast(N))
19771 return Cast;
19772
19773 return SDValue();
19774}
19775
19776SDValue DAGCombiner::visitBRCOND(SDNode *N) {
19777 SDValue Chain = N->getOperand(0);
19778 SDValue N1 = N->getOperand(1);
19779 SDValue N2 = N->getOperand(2);
19780
19781 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
19782 // nondeterministic jumps).
19783 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
19784 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19785 N1->getOperand(0), N2, N->getFlags());
19786 }
19787
19788 // Variant of the previous fold where there is a SETCC in between:
19789 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
19790 // =>
19791 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
19792 // =>
19793 // BRCOND(SETCC(X, CONST, Cond))
19794 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
19795 // isn't equivalent to true or false.
19796 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
19797 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
19798 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
19799 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
19801 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
19802 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
19803 bool Updated = false;
19804
19805 // Is 'X Cond C' always true or false?
19806 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
19807 bool False = (Cond == ISD::SETULT && C->isZero()) ||
19808 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
19809 (Cond == ISD::SETUGT && C->isAllOnes()) ||
19810 (Cond == ISD::SETGT && C->isMaxSignedValue());
19811 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
19812 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
19813 (Cond == ISD::SETUGE && C->isZero()) ||
19814 (Cond == ISD::SETGE && C->isMinSignedValue());
19815 return True || False;
19816 };
19817
19818 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
19819 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
19820 S0 = S0->getOperand(0);
19821 Updated = true;
19822 }
19823 }
19824 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
19825 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
19826 S1 = S1->getOperand(0);
19827 Updated = true;
19828 }
19829 }
19830
19831 if (Updated)
19832 return DAG.getNode(
19833 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19834 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
19835 N->getFlags());
19836 }
19837
19838 // If N is a constant we could fold this into a fallthrough or unconditional
19839 // branch. However that doesn't happen very often in normal code, because
19840 // Instcombine/SimplifyCFG should have handled the available opportunities.
19841 // If we did this folding here, it would be necessary to update the
19842 // MachineBasicBlock CFG, which is awkward.
19843
19844 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
19845 // on the target, also copy fast math flags.
19846 if (N1.getOpcode() == ISD::SETCC &&
19848 N1.getOperand(0).getValueType())) {
19849 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain,
19850 N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2,
19851 N1->getFlags());
19852 }
19853
19854 if (N1.hasOneUse()) {
19855 // rebuildSetCC calls visitXor which may change the Chain when there is a
19856 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
19857 HandleSDNode ChainHandle(Chain);
19858 if (SDValue NewN1 = rebuildSetCC(N1))
19859 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
19860 ChainHandle.getValue(), NewN1, N2, N->getFlags());
19861 }
19862
19863 return SDValue();
19864}
19865
19866SDValue DAGCombiner::rebuildSetCC(SDValue N) {
19867 if (N.getOpcode() == ISD::SRL ||
19868 (N.getOpcode() == ISD::TRUNCATE &&
19869 (N.getOperand(0).hasOneUse() &&
19870 N.getOperand(0).getOpcode() == ISD::SRL))) {
19871 // Look pass the truncate.
19872 if (N.getOpcode() == ISD::TRUNCATE)
19873 N = N.getOperand(0);
19874
19875 // Match this pattern so that we can generate simpler code:
19876 //
19877 // %a = ...
19878 // %b = and i32 %a, 2
19879 // %c = srl i32 %b, 1
19880 // brcond i32 %c ...
19881 //
19882 // into
19883 //
19884 // %a = ...
19885 // %b = and i32 %a, 2
19886 // %c = setcc eq %b, 0
19887 // brcond %c ...
19888 //
19889 // This applies only when the AND constant value has one bit set and the
19890 // SRL constant is equal to the log2 of the AND constant. The back-end is
19891 // smart enough to convert the result into a TEST/JMP sequence.
19892 SDValue Op0 = N.getOperand(0);
19893 SDValue Op1 = N.getOperand(1);
19894
19895 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
19896 SDValue AndOp1 = Op0.getOperand(1);
19897
19898 if (AndOp1.getOpcode() == ISD::Constant) {
19899 const APInt &AndConst = AndOp1->getAsAPIntVal();
19900
19901 if (AndConst.isPowerOf2() &&
19902 Op1->getAsAPIntVal() == AndConst.logBase2()) {
19903 SDLoc DL(N);
19904 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
19905 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
19906 ISD::SETNE);
19907 }
19908 }
19909 }
19910 }
19911
19912 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
19913 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
19914 if (N.getOpcode() == ISD::XOR) {
19915 // Because we may call this on a speculatively constructed
19916 // SimplifiedSetCC Node, we need to simplify this node first.
19917 // Ideally this should be folded into SimplifySetCC and not
19918 // here. For now, grab a handle to N so we don't lose it from
19919 // replacements interal to the visit.
19920 HandleSDNode XORHandle(N);
19921 while (N.getOpcode() == ISD::XOR) {
19922 SDValue Tmp = visitXOR(N.getNode());
19923 // No simplification done.
19924 if (!Tmp.getNode())
19925 break;
19926 // Returning N is form in-visit replacement that may invalidated
19927 // N. Grab value from Handle.
19928 if (Tmp.getNode() == N.getNode())
19929 N = XORHandle.getValue();
19930 else // Node simplified. Try simplifying again.
19931 N = Tmp;
19932 }
19933
19934 if (N.getOpcode() != ISD::XOR)
19935 return N;
19936
19937 SDValue Op0 = N->getOperand(0);
19938 SDValue Op1 = N->getOperand(1);
19939
19940 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
19941 bool Equal = false;
19942 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
19943 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
19944 Op0.getValueType() == MVT::i1) {
19945 N = Op0;
19946 Op0 = N->getOperand(0);
19947 Op1 = N->getOperand(1);
19948 Equal = true;
19949 }
19950
19951 EVT SetCCVT = N.getValueType();
19952 if (LegalTypes)
19953 SetCCVT = getSetCCResultType(SetCCVT);
19954 // Replace the uses of XOR with SETCC. Note, avoid this transformation if
19955 // it would introduce illegal operations post-legalization as this can
19956 // result in infinite looping between converting xor->setcc here, and
19957 // expanding setcc->xor in LegalizeSetCCCondCode if requested.
19959 if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
19960 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
19961 }
19962 }
19963
19964 return SDValue();
19965}
19966
19967// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
19968//
19969SDValue DAGCombiner::visitBR_CC(SDNode *N) {
19970 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
19971 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
19972
19973 // If N is a constant we could fold this into a fallthrough or unconditional
19974 // branch. However that doesn't happen very often in normal code, because
19975 // Instcombine/SimplifyCFG should have handled the available opportunities.
19976 // If we did this folding here, it would be necessary to update the
19977 // MachineBasicBlock CFG, which is awkward.
19978
19979 // Use SimplifySetCC to simplify SETCC's.
19981 CondLHS, CondRHS, CC->get(), SDLoc(N),
19982 false);
19983 if (Simp.getNode()) AddToWorklist(Simp.getNode());
19984
19985 // fold to a simpler setcc
19986 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
19987 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
19988 N->getOperand(0), Simp.getOperand(2),
19989 Simp.getOperand(0), Simp.getOperand(1),
19990 N->getOperand(4));
19991
19992 return SDValue();
19993}
19994
19995static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
19996 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
19997 const TargetLowering &TLI) {
19998 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19999 if (LD->isIndexed())
20000 return false;
20001 EVT VT = LD->getMemoryVT();
20002 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
20003 return false;
20004 Ptr = LD->getBasePtr();
20005 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20006 if (ST->isIndexed())
20007 return false;
20008 EVT VT = ST->getMemoryVT();
20009 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
20010 return false;
20011 Ptr = ST->getBasePtr();
20012 IsLoad = false;
20013 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
20014 if (LD->isIndexed())
20015 return false;
20016 EVT VT = LD->getMemoryVT();
20017 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
20018 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
20019 return false;
20020 Ptr = LD->getBasePtr();
20021 IsMasked = true;
20023 if (ST->isIndexed())
20024 return false;
20025 EVT VT = ST->getMemoryVT();
20026 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
20027 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
20028 return false;
20029 Ptr = ST->getBasePtr();
20030 IsLoad = false;
20031 IsMasked = true;
20032 } else {
20033 return false;
20034 }
20035 return true;
20036}
20037
20038/// Try turning a load/store into a pre-indexed load/store when the base
20039/// pointer is an add or subtract and it has other uses besides the load/store.
20040/// After the transformation, the new indexed load/store has effectively folded
20041/// the add/subtract in and all of its other uses are redirected to the
20042/// new load/store.
20043bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
20044 if (Level < AfterLegalizeDAG)
20045 return false;
20046
20047 bool IsLoad = true;
20048 bool IsMasked = false;
20049 SDValue Ptr;
20050 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
20051 Ptr, TLI))
20052 return false;
20053
20054 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
20055 // out. There is no reason to make this a preinc/predec.
20056 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
20057 Ptr->hasOneUse())
20058 return false;
20059
20060 // Ask the target to do addressing mode selection.
20064 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
20065 return false;
20066
20067 // Backends without true r+i pre-indexed forms may need to pass a
20068 // constant base with a variable offset so that constant coercion
20069 // will work with the patterns in canonical form.
20070 bool Swapped = false;
20071 if (isa<ConstantSDNode>(BasePtr)) {
20072 std::swap(BasePtr, Offset);
20073 Swapped = true;
20074 }
20075
20076 // Don't create a indexed load / store with zero offset.
20078 return false;
20079
20080 // Try turning it into a pre-indexed load / store except when:
20081 // 1) The new base ptr is a frame index.
20082 // 2) If N is a store and the new base ptr is either the same as or is a
20083 // predecessor of the value being stored.
20084 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
20085 // that would create a cycle.
20086 // 4) All uses are load / store ops that use it as old base ptr.
20087
20088 // Check #1. Preinc'ing a frame index would require copying the stack pointer
20089 // (plus the implicit offset) to a register to preinc anyway.
20090 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))